Class: FoodIngredientParser::Loose::Scanner

Inherits:
Object
  • Object
show all
Defined in:
lib/food_ingredient_parser/loose/scanner.rb

Constant Summary collapse

SEP_CHARS =
"|;,.".freeze
AND_SEP_RE =
/\A\s*(and|en|und)\s+/i.freeze
MARK_CHARS =
"¹²³⁴⁵ᵃᵇᶜᵈᵉᶠᵍªº⁽⁾†‡⁺•°▪◊#^˄*~".freeze
PREFIX_RE =
/\A\s*(ingredients(\s*list)?|contains|ingred[iï][eë]nt(en)?(declaratie)?|bevat|dit zit er\s?in|samenstelling|zutaten)\b\s*[:;.]?\s*/i.freeze
NOTE_RE =
/\A\b(dit product kan\b|deze verpakking kan\b|kan sporen\b.*?\bbevatten\b|voor allergenen\b|allergenen\b|allergie[- ]informatie(\s*:|\b)|E\s*=|gemaakt in\b|geproduceerd in\b|bevat mogelijk\b|kijk voor meer\b|allergie-info|in de fabriek\b|in dit bedrijf\b|voor [0-9,.]+ (g\.?|gr\.?|ram|ml).*\bis [0-9,.]+ (g\.?|gr\.?|ram|ml).*\bgebruikt\b)/i.freeze
ABBREV_RE =

Keep in sync with abbrev in the Common grammar, plus relevant ones from the Amount grammar.

Regexp.union(
  /\A(
    N°\b |
    °C\b |
    (ijzer|chroom|koper)\s*\(I+\)\s*[[:alnum:]]+\b |
    L\(\+\)[ -][[:alnum:]]+\b |
    L\.\s+rhamnosus\b | L\.\s+acidophilus\b | L\.\s+casei\b | B\.\s+lactis | A\.\s+oryzae |
    S\.\s+thermophilus\b | L\.\sbulgaricus\b |
    T\.\s*aestivum\b(\s+vitt\.)? |
    nucifera\s+L\. |
    type\s+"\d+" |
    E(-|\s+)?\d{3}[a-z]?\s*(\([iv]+\)|\[[iv]+\]) |
    www\.[-_\/:%.A-Za-z0-9]+
  )/xi,
  *%w[
    a.o.p b.g.a b.o.b c.a c.i d.e d.m.v d.o.c d.o.p d.s e.a e.g e.u f.i.l f.o.s h.o.h
    i.a i.d i.e i.g.m.e i.g.p i.m.v i.o i.v.m l.s.l n.a n.b n.o n.v.t o.a o.b.v p.d.o
    p.g.i q.s s.l s.s t.o.v u.h.t v.g v.s w.a w.o w.v vit denat alc vol conc subsp
    min max ca
  ].map {|s| /\A#{Regexp.escape(s)}\b\.?/i}
).freeze

Instance Method Summary collapse

Constructor Details

#initialize(s, index: 0) ⇒ Scanner

Returns a new instance of Scanner.



34
35
36
37
38
39
40
41
42
# File 'lib/food_ingredient_parser/loose/scanner.rb', line 34

def initialize(s, index: 0)
  @s = s                           # input string
  @i = index                       # current index in string, the iterator looks at this character
  @cur = nil                       # current node we're populating
  @curifree = nil                  # last index in string for current node that we haven't added to a child node yet
  @ancestors = [Node.new(@s, @i)]  # nesting hierarchy
  @iterator = :beginning           # scan_iteration_<iterator> to use for parsing
  @dest = :contains                # append current node to this attribute on parent
end

Instance Method Details

#scanObject



44
45
46
47
48
49
50
51
52
# File 'lib/food_ingredient_parser/loose/scanner.rb', line 44

def scan
  loop do
    method(:"scan_iteration_#{@iterator}").call
  end

  close_all_ancestors
  @ancestors.first.ends(@i-1)
  @ancestors.first
end