Class: Trie

Inherits:
Object
  • Object
show all
Defined in:
ext/trie/trie.c,
ext/trie/trie.c

Overview

A key-value data structure for string keys which is efficient memory usage and fast retrieval time.

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.read(filename_base) ⇒ Trie

Returns a new trie with data as read from disk.

Returns:


33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# File 'ext/trie/trie.c', line 33

static VALUE rb_trie_read(VALUE self, VALUE filename_base) {
  VALUE da_filename = rb_str_dup(filename_base);
  rb_str_concat(da_filename, rb_str_new2(".da"));
  StringValue(da_filename);
    
  VALUE tail_filename = rb_str_dup(filename_base);
  rb_str_concat(tail_filename, rb_str_new2(".tail"));
  StringValue(tail_filename);

  Trie *trie = trie_new();

  VALUE obj;
  obj = Data_Wrap_Struct(self, 0, trie_free, trie);

  DArray *old_da = trie->da;
  Tail *old_tail = trie->tail;

  FILE *da_file = fopen(RSTRING_PTR(da_filename), "r");
  if (da_file == NULL)
    raise_ioerror("Error reading .da file.");

  trie->da = da_read(da_file);
  fclose(da_file);

  FILE *tail_file = fopen(RSTRING_PTR(tail_filename), "r");
  if (tail_file == NULL)
    raise_ioerror("Error reading .tail file.");

  trie->tail = tail_read(tail_file);
  fclose(tail_file);

  da_free(old_da);
  tail_free(old_tail);

  return obj;
}

Instance Method Details

#add(key) ⇒ Object #add(key, value) ⇒ Object

Add a key, or a key and value to the Trie. If you add a key without a value it assumes true for the value.


119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# File 'ext/trie/trie.c', line 119

static VALUE rb_trie_add(VALUE self, VALUE args) {
	Trie *trie;
    Data_Get_Struct(self, Trie, trie);

    int size = RARRAY_LEN(args);
    if(size < 1 || size > 2)
		return Qnil;

    VALUE key;
    key = RARRAY_PTR(args)[0];
	StringValue(key);

    TrieData value = size == 2 ? RARRAY_PTR(args)[1] : TRIE_DATA_ERROR;
    
    if(trie_store(trie, (TrieChar*)RSTRING_PTR(key), value))
		return Qtrue;
    else
		return Qnil;
}

#children(prefix) ⇒ Array

Finds all keys in the Trie beginning with the given prefix.

Returns:

  • (Array)

202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
# File 'ext/trie/trie.c', line 202

static VALUE rb_trie_children(VALUE self, VALUE prefix) {
    if(NIL_P(prefix))
		return rb_ary_new();

	StringValue(prefix);

    Trie *trie;
    Data_Get_Struct(self, Trie, trie);

	int prefix_size = RSTRING_LEN(prefix);
    TrieState *state = trie_root(trie);
    VALUE children = rb_ary_new();
	TrieChar *char_prefix = (TrieChar*)RSTRING_PTR(prefix);
    
    if(!traverse(state, char_prefix)) {
    	return children;
    }

    if(trie_state_is_terminal(state))
		rb_ary_push(children, prefix);
	
	char prefix_buffer[1024];
	memcpy(prefix_buffer, char_prefix, prefix_size);
	prefix_buffer[prefix_size] = 0;

    walk_all_paths(trie, children, state, prefix_buffer, prefix_size);

    trie_state_free(state);
    return children;
}

#children_with_values(key) ⇒ Array

Finds all keys with their respective values in the Trie beginning with the given prefix.

Returns:

  • (Array)

334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
# File 'ext/trie/trie.c', line 334

static VALUE rb_trie_children_with_values(VALUE self, VALUE prefix) {
    if(NIL_P(prefix))
		return rb_ary_new();

	StringValue(prefix);

    Trie *trie;
    Data_Get_Struct(self, Trie, trie);

	int prefix_size = RSTRING_LEN(prefix);
    TrieChar *char_prefix = (TrieChar*)RSTRING_PTR(prefix);
    
    VALUE children = rb_ary_new();

    TrieState *state = trie_root(trie);
    
    if(!traverse(state, char_prefix)) {
		return children;
	}

    if(trie_state_is_terminal(state)) {
		TrieState *end_state = trie_state_clone(state);
		trie_state_walk(end_state, '\0');

		VALUE tuple = rb_ary_new();
		rb_ary_push(tuple, prefix);
		TrieData trie_data = trie_state_get_data(end_state);
		rb_ary_push(tuple, (VALUE)trie_data);
		rb_ary_push(children, tuple);

		trie_state_free(end_state);
    }

	char prefix_buffer[1024];
	memcpy(prefix_buffer, char_prefix, prefix_size);
	prefix_buffer[prefix_size] = 0;

    walk_all_paths_with_values(trie, children, state, prefix_buffer, prefix_size);

    trie_state_free(state);
    return children;
}

#delete(key) ⇒ Object

Delete a key from the Trie. Returns true if it deleted a key, nil otherwise.


146
147
148
149
150
151
152
153
154
155
156
# File 'ext/trie/trie.c', line 146

static VALUE rb_trie_delete(VALUE self, VALUE key) {
	StringValue(key);

	Trie *trie;
    Data_Get_Struct(self, Trie, trie);

    if(trie_delete(trie, (TrieChar*)RSTRING_PTR(key)))
		return Qtrue;
    else
		return Qnil;
}

#get(key) ⇒ Object

Retrieves the value for a particular key (or nil) from the Trie.


98
99
100
101
102
103
104
105
106
107
108
109
# File 'ext/trie/trie.c', line 98

static VALUE rb_trie_get(VALUE self, VALUE key) {
	StringValue(key);

    Trie *trie;
    Data_Get_Struct(self, Trie, trie);

	TrieData data;
    if(trie_retrieve(trie, (TrieChar*)RSTRING_PTR(key), &data))
		return (VALUE)data;
    else
		return Qnil;
}

#has_children?(prefix) ⇒ Boolean

Returns:

  • (Boolean)

262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
# File 'ext/trie/trie.c', line 262

static VALUE rb_trie_has_children(VALUE self, VALUE prefix) {
    if(NIL_P(prefix))
		return rb_ary_new();

	StringValue(prefix);

    Trie *trie;
    Data_Get_Struct(self, Trie, trie);

	int prefix_size = RSTRING_LEN(prefix);
    TrieState *state = trie_root(trie);
	TrieChar *char_prefix = (TrieChar*)RSTRING_PTR(prefix);

    if(!traverse(state, char_prefix)) {
		return Qfalse;
	}

    if(trie_state_is_terminal(state))
        return Qtrue;

	char prefix_buffer[1024];
	memcpy(prefix_buffer, char_prefix, prefix_size);
	prefix_buffer[prefix_size] = 0;

    Bool ret = walk_all_paths_until_first_terminal(trie, state, prefix_buffer, prefix_size);

    trie_state_free(state);
    return ret == TRUE ? Qtrue : Qfalse;
}

#has_key?(key) ⇒ Boolean

Determines whether or not a key exists in the Trie. Use this if you don’t care about the value, as it is marginally faster than Trie#get.

Returns:

  • (Boolean)

78
79
80
81
82
83
84
85
86
87
88
# File 'ext/trie/trie.c', line 78

static VALUE rb_trie_has_key(VALUE self, VALUE key) {
	StringValue(key);

    Trie *trie;
    Data_Get_Struct(self, Trie, trie);

    if(trie_has_key(trie, (TrieChar*)RSTRING_PTR(key)))
		return Qtrue;
    else
		return Qnil;
}

#rootTrieNode

Returns a TrieNode representing the root of the Trie.

Returns:


386
387
388
389
390
391
392
393
394
395
396
397
398
# File 'ext/trie/trie.c', line 386

static VALUE rb_trie_root(VALUE self) {
    Trie *trie;
    Data_Get_Struct(self, Trie, trie);

    VALUE trie_node = rb_trie_node_alloc(cTrieNode);

	TrieState *state = trie_root(trie);
	RDATA(trie_node)->data = state;
    
    rb_iv_set(trie_node, "@state", Qnil);
    rb_iv_set(trie_node, "@full_state", rb_str_new2(""));
    return trie_node;
}

#save(filename_base) ⇒ true

Saves the trie data to two files, filename_base.da and filename_base.tail. Returns true if saving was successful.

Returns:

  • (true)

567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
# File 'ext/trie/trie.c', line 567

static VALUE rb_trie_save(VALUE self, VALUE filename_base) {
  VALUE da_filename = rb_str_dup(filename_base);
  rb_str_concat(da_filename, rb_str_new2(".da"));
  StringValue(da_filename);
    
  VALUE tail_filename = rb_str_dup(filename_base);
  rb_str_concat(tail_filename, rb_str_new2(".tail"));
  StringValue(tail_filename);

  Trie *trie;
  Data_Get_Struct(self, Trie, trie);

  FILE *da_file = fopen(RSTRING_PTR(da_filename), "w");
  if (da_file == NULL)
    raise_ioerror("Error opening .da file for writing.");
  if (da_write(trie->da, da_file) != 0)
    raise_ioerror("Error writing DArray data.");
  fclose(da_file);

  FILE *tail_file = fopen(RSTRING_PTR(tail_filename), "w");
  if (tail_file == NULL)
    raise_ioerror("Error opening .tail file for writing.");
  if (tail_write(trie->tail, tail_file) != 0)
    raise_ioerror("Error writing Tail data.");
  fclose(tail_file);

  return Qtrue;
}