Class: Whistlepig::Query

Inherits:
Object
  • Object
show all
Defined in:
lib/whistlepig.rb,
ext/whistlepig/whistlepig.c

Overview

A query. Queries are created from strings with Query#new. If parsing the string fails, a ParseError is thrown.

At the lowest level, queries are composed of space-separated terms. Matches against that term are restricted to the default field specified at parse time.

hello                  # search for "hello" in the default field

Term matches can be restricted to another field by by prefixing them with the field name and “:”, e.g. “subject:hello”.

subject:hello          # search for "hello" in the "subject" field

Multiple terms are considered conjunctive (i.e. all must match) unless the special token “OR” appears between them. The “OR” must be capitalized in this case.

word1 word2            # search for word1 and word2
word1 OR word2         # search for word1 or word2
subject:hello bob      # "hello" in the subject field and "bob" in the
                       #  default field

Parentheses can be used to group disjunctions, conjunctions or fields.

(word1 OR word2) word3 # "word3" and either "word1" or "word2"
field:(word1 OR word2) # "word1" or "word2" in field "field"

Phrases are specified by surrounding the terms with double quotes.

"bob jones"             # documents with the phrase "bob jones"

Negations can be specified with a - prefix.

-word                  # docs without "word"
-subject:(bob OR joe)  # docs with neither "bob" nor "joe" in subject

Labels are specified with a ~ prefix. Labels do not have fields.

~inbox                 # docs with the "inbox" label
-~inbox                # docs without the "inbox" label
-~inbox subject:hello  # docs with subject "hello" and without the
                       # inbox label

All of the above can be mixed and matched, of course.

-subject:"spam email" ~inbox (money OR cash)
("love you" OR "hate you") -(~deleted OR ~spam)
etc...

Existing query objects can also be altered programmatically, at least to a limited extent, by calling Query#and and Query#or.

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(query) ⇒ Object



499
500
501
502
# File 'ext/whistlepig/whistlepig.c', line 499

static VALUE query_init(VALUE self, VALUE query) {
  rb_iv_set(self, "@query", query);
  return self;
}

Instance Attribute Details

#queryObject (readonly)

Class Method Details

.new(default_field, query_string) ⇒ Object

Creates a new query by parsing the string query_string, which must be a String. Any non-fielded terms will used the field default_field, which must also be a String. Raises a ParseError if the query cannot be parsed.



334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
# File 'ext/whistlepig/whistlepig.c', line 334

static VALUE query_new(VALUE class, VALUE default_field, VALUE string) {
  Check_Type(default_field, T_STRING);
  Check_Type(string, T_STRING);

  wp_query* query;
  wp_error* e = wp_query_parse(RSTRING_PTR(string), RSTRING_PTR(default_field), &query);
  if(e != NULL) {
    VALUE exc = rb_exc_new2(c_parse_error, e->msg);
    wp_error_free(e);
    rb_exc_raise(exc);
  }

  VALUE o_query = Data_Wrap_Struct(class, NULL, wp_query_free, query);
  VALUE argv[1] = { string };
  rb_obj_call_init(o_query, 1, argv);

  return o_query;
}

Instance Method Details

#and(other) ⇒ Object

Returns a new Query that is a conjunction of this query and other, which must also be a Query object.



414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
# File 'ext/whistlepig/whistlepig.c', line 414

static VALUE query_and(VALUE self, VALUE v_other) {
  if(CLASS_OF(v_other) != c_query) {
    rb_raise(rb_eTypeError, "query must be a Whistlepig::Query object"); // would be nice to support subclasses somehow...
    // not reached
  }

  wp_query* query; Data_Get_Struct(self, wp_query, query);
  wp_query* other; Data_Get_Struct(v_other, wp_query, other);

  wp_query* result = wp_query_new_conjunction();
  result = wp_query_add(result, wp_query_clone(query));
  result = wp_query_add(result, wp_query_clone(other));

  VALUE o_result = Data_Wrap_Struct(c_query, NULL, wp_query_free, result);
  VALUE argv[1] = { rb_iv_get(self, "@query") }; // i guess
  rb_obj_call_init(o_result, 1, argv);

  return o_result;
}

#cloneObject

Returns a deep copy of the query, with any in-progress search state discarded. Useful for rerunning a query without interrupting any pagination state.



371
372
373
374
375
376
377
378
379
# File 'ext/whistlepig/whistlepig.c', line 371

static VALUE query_clone(VALUE self) {
  char buf[1024];

  wp_query* query; Data_Get_Struct(self, wp_query, query);
  wp_query* clone = wp_query_clone(query);

  VALUE o_query = Data_Wrap_Struct(c_query, NULL, wp_query_free, clone);
  return o_query;
}

#or(other) ⇒ Object

Returns a new Query that is a disjunction of this query and other, which must also be a Query object.



441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
# File 'ext/whistlepig/whistlepig.c', line 441

static VALUE query_or(VALUE self, VALUE v_other) {
  if(CLASS_OF(v_other) != c_query) {
    rb_raise(rb_eTypeError, "query must be a Whistlepig::Query object"); // would be nice to support subclasses somehow...
    // not reached
  }

  wp_query* query; Data_Get_Struct(self, wp_query, query);
  wp_query* other; Data_Get_Struct(v_other, wp_query, other);

  wp_query* result = wp_query_new_disjunction();
  result = wp_query_add(result, wp_query_clone(query));
  result = wp_query_add(result, wp_query_clone(other));

  VALUE o_result = Data_Wrap_Struct(c_query, NULL, wp_query_free, result);
  VALUE argv[1] = { rb_iv_get(self, "@query") }; // i guess
  rb_obj_call_init(o_result, 1, argv);

  return o_result;
}

#snippetize(field, string, max_num_results = 10) ⇒ Object

Returns an array of [start, end] subarrays that mark the matching positions of the query within the string. ‘field’ determines which field the string is taken to be.



469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
# File 'ext/whistlepig/whistlepig.c', line 469

static VALUE query_snippetize(int argc, VALUE* argv, VALUE self) {
  VALUE v_field, v_string, v_max_num_results;
  rb_scan_args(argc, argv, "21", &v_field, &v_string, &v_max_num_results);
  Check_Type(v_string, T_STRING);

  wp_query* query; Data_Get_Struct(self, wp_query, query);
  uint32_t num_results, max_num_results;
  if(NIL_P(v_max_num_results)) max_num_results = 10;
  else max_num_results = NUM2INT(v_max_num_results);

  uint32_t* start_offsets = malloc(sizeof(uint32_t) * max_num_results);
  uint32_t* end_offsets = malloc(sizeof(uint32_t) * max_num_results);

  wp_error* e = wp_snippetize_string(query, RSTRING_PTR(v_field), RSTRING_PTR(v_string), max_num_results, &num_results, start_offsets, end_offsets);
  RAISE_IF_NECESSARY(e);

  VALUE array = rb_ary_new2(num_results);
  for(uint32_t i = 0; i < num_results; i++) {
    VALUE subarray = rb_ary_new2(2);
    rb_ary_store(subarray, 0, INT2NUM(start_offsets[i]));
    rb_ary_store(subarray, 1, INT2NUM(end_offsets[i]));
    rb_ary_store(array, i, subarray);
  }

  free(start_offsets);
  free(end_offsets);

  return array;
}

#term_mapObject

Returns a new query that’s the result of applying the block to each word in the query. Useful for transforming queries programmatically after they’ve been parsed.



393
394
395
396
397
398
399
400
401
402
403
404
405
# File 'ext/whistlepig/whistlepig.c', line 393

static VALUE query_map_terms(VALUE self) {
  char buf[1024];

  wp_query* query; Data_Get_Struct(self, wp_query, query);
  wp_query* result = wp_query_substitute(query, yielding_substituter);

  VALUE o_query = Data_Wrap_Struct(c_query, NULL, wp_query_free, result);

  VALUE argv[1] = { rb_iv_get(self, "@query")  };
  rb_obj_call_init(o_query, 1, argv);

  return o_query;
}

#to_sObject

Returns a parsed representation of a String, useful for debugging.



356
357
358
359
360
361
362
363
# File 'ext/whistlepig/whistlepig.c', line 356

static VALUE query_to_s(VALUE self) {
  char buf[1024];

  wp_query* query; Data_Get_Struct(self, wp_query, query);
  wp_query_to_s(query, 1024, buf);

  return rb_str_new2(buf);
}