Class: Appstats::Parser
- Inherits:
-
Object
- Object
- Appstats::Parser
- Defined in:
- lib/appstats/parser.rb
Instance Attribute Summary collapse
-
#constants ⇒ Object
readonly
Returns the value of attribute constants.
-
#constants_no_spaces ⇒ Object
readonly
Returns the value of attribute constants_no_spaces.
-
#raw_results ⇒ Object
readonly
Returns the value of attribute raw_results.
-
#raw_rules ⇒ Object
readonly
Returns the value of attribute raw_rules.
-
#raw_tokenize ⇒ Object
readonly
Returns the value of attribute raw_tokenize.
-
#repeating ⇒ Object
readonly
Returns the value of attribute repeating.
-
#results ⇒ Object
readonly
Returns the value of attribute results.
-
#rules ⇒ Object
readonly
Returns the value of attribute rules.
-
#tokenize ⇒ Object
readonly
Returns the value of attribute tokenize.
-
#tokenize_no_spaces ⇒ Object
readonly
Returns the value of attribute tokenize_no_spaces.
-
#tokenize_regex ⇒ Object
readonly
Returns the value of attribute tokenize_regex.
-
#tokenize_regex_no_spaces ⇒ Object
readonly
Returns the value of attribute tokenize_regex_no_spaces.
Class Method Summary collapse
- .alpha?(raw_input) ⇒ Boolean
- .merge_regex_filter(inputs = []) ⇒ Object
- .parse_constant(current_text, constant) ⇒ Object
Instance Method Summary collapse
-
#initialize(data = {}) ⇒ Parser
constructor
A new instance of Parser.
- #parse(input) ⇒ Object
- #parse_word(current_text, stop_on, strict = false) ⇒ Object
Constructor Details
#initialize(data = {}) ⇒ Parser
Returns a new instance of Parser.
9 10 11 12 13 14 15 16 17 |
# File 'lib/appstats/parser.rb', line 9 def initialize(data = {}) @raw_rules = data[:rules] @raw_tokenize = data[:tokenize] @repeating = data[:repeating] == true @results = {} @raw_results = [] update_tokens update_rules end |
Instance Attribute Details
#constants ⇒ Object (readonly)
Returns the value of attribute constants.
5 6 7 |
# File 'lib/appstats/parser.rb', line 5 def constants @constants end |
#constants_no_spaces ⇒ Object (readonly)
Returns the value of attribute constants_no_spaces.
5 6 7 |
# File 'lib/appstats/parser.rb', line 5 def constants_no_spaces @constants_no_spaces end |
#raw_results ⇒ Object (readonly)
Returns the value of attribute raw_results.
5 6 7 |
# File 'lib/appstats/parser.rb', line 5 def raw_results @raw_results end |
#raw_rules ⇒ Object (readonly)
Returns the value of attribute raw_rules.
5 6 7 |
# File 'lib/appstats/parser.rb', line 5 def raw_rules @raw_rules end |
#raw_tokenize ⇒ Object (readonly)
Returns the value of attribute raw_tokenize.
5 6 7 |
# File 'lib/appstats/parser.rb', line 5 def raw_tokenize @raw_tokenize end |
#repeating ⇒ Object (readonly)
Returns the value of attribute repeating.
5 6 7 |
# File 'lib/appstats/parser.rb', line 5 def repeating @repeating end |
#results ⇒ Object (readonly)
Returns the value of attribute results.
5 6 7 |
# File 'lib/appstats/parser.rb', line 5 def results @results end |
#rules ⇒ Object (readonly)
Returns the value of attribute rules.
5 6 7 |
# File 'lib/appstats/parser.rb', line 5 def rules @rules end |
#tokenize ⇒ Object (readonly)
Returns the value of attribute tokenize.
5 6 7 |
# File 'lib/appstats/parser.rb', line 5 def tokenize @tokenize end |
#tokenize_no_spaces ⇒ Object (readonly)
Returns the value of attribute tokenize_no_spaces.
5 6 7 |
# File 'lib/appstats/parser.rb', line 5 def tokenize_no_spaces @tokenize_no_spaces end |
#tokenize_regex ⇒ Object (readonly)
Returns the value of attribute tokenize_regex.
5 6 7 |
# File 'lib/appstats/parser.rb', line 5 def tokenize_regex @tokenize_regex end |
#tokenize_regex_no_spaces ⇒ Object (readonly)
Returns the value of attribute tokenize_regex_no_spaces.
5 6 7 |
# File 'lib/appstats/parser.rb', line 5 def tokenize_regex_no_spaces @tokenize_regex_no_spaces end |
Class Method Details
.alpha?(raw_input) ⇒ Boolean
76 77 78 79 |
# File 'lib/appstats/parser.rb', line 76 def self.alpha?(raw_input) return false if raw_input.nil? !raw_input.match(/^[A-Za-z]+$/i).nil? end |
.merge_regex_filter(inputs = []) ⇒ Object
100 101 102 103 104 |
# File 'lib/appstats/parser.rb', line 100 def self.merge_regex_filter(inputs = []) inputs.collect! { |x| x unless x.blank? }.compact! return "" if inputs.empty? "(#{inputs.join('|')})" end |
.parse_constant(current_text, constant) ⇒ Object
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
# File 'lib/appstats/parser.rb', line 81 def self.parse_constant(current_text,constant) answer = [nil,nil] return answer if current_text.blank? || constant.nil? current_text.strip! remaining_text_index = -1 if alpha?(constant) m = current_text.match(/^(#{constant})(\s|$)(.*)$/im) remaining_text_index = 3 else m = current_text.match(/^(#{constant})(.*)$/im) remaining_text_index = 2 end answer[0] = m[1] unless m.nil? answer[1] = m.nil? ? current_text : m[remaining_text_index] clean_parsed_word(answer) end |
Instance Method Details
#parse(input) ⇒ Object
19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
# File 'lib/appstats/parser.rb', line 19 def parse(input) @results = {} @raw_results = [] return false if input.nil? return false if @rules.size == 0 @rule_index = 0 @max_rule_index = @rules.size - 1 @previous_text_so_far = input.strip @text_so_far = @previous_text_so_far @remaining_constants = @constants.dup @remaining_constants_no_spaces = @constants_no_spaces.dup while !@text_so_far.blank? process_constant_if_present break if @rule_index > @max_rule_index && !@repeating @rule_index = 0 if @rule_index > @max_rule_index rule = @rules[@rule_index] @rule_index += 1 if rule.kind_of?(Hash) if rule[:stop] == :constant was_found = false @remaining_constants.each_with_index do |k,index| p = parse_word(@text_so_far,k,true) if p[0].nil? unset_rules_until(k) else (index-1).downto(0) do |i| @remaining_constants_no_spaces.delete_at(i) @remaining_constants.delete_at(i) end add_results(rule[:rule],p[0]) @text_so_far = p[1] was_found = true break end end unless was_found add_results(rule[:rule],@text_so_far) @text_so_far = nil end else p = parse_word(@text_so_far,rule[:stop],false) add_results(rule[:rule],p[0]) unless p[0].nil? @text_so_far = p[1] end end break if @previous_text_so_far == @text_so_far @previous_text_so_far = @text_so_far end remove_tokens_at_start(@text_so_far) unset_rules_until(nil) true end |
#parse_word(current_text, stop_on, strict = false) ⇒ Object
106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
# File 'lib/appstats/parser.rb', line 106 def parse_word(current_text,stop_on,strict = false) answer = [nil,nil] return answer if current_text.blank? || stop_on.nil? current_text.strip! current_text = remove_tokens_at_start(current_text) if stop_on == :end filter = Parser.merge_regex_filter([nil,@tokenize_regex]) m = current_text.match(/^(.*?)(#{filter}.*)$/im) if m.nil? || m[1].blank? answer[0] = current_text else answer[0] = m[1] answer[1] = m[2] end elsif stop_on == :space filter = Parser.merge_regex_filter(['\s',@tokenize_regex,remaining_constants_regex]) m = current_text.match(/^(.*?)(#{filter}.*)$/im) if m.nil? answer[0] = current_text else answer[0] = m[1] answer[1] = m[2] end else filter = Parser.merge_regex_filter([stop_on,@tokenize_regex,remaining_constants_regex]) m = current_text.match(/^(.*?)(#{filter}.*)$/im) if strict answer[0] = m[1] unless m.nil? answer[1] = m.nil? ? current_text : m[2] else answer[0] = m.nil? ? current_text : m[1] answer[1] = m[2] unless m.nil? end end answer = Parser.clean_parsed_word(answer) answer end |