Module: FoodIngredientParser::Cleaner

Defined in:
lib/food_ingredient_parser/cleaner.rb

Class Method Summary collapse

Class Method Details

.clean(s) ⇒ Object



4
5
6
7
8
9
10
11
12
13
14
# File 'lib/food_ingredient_parser/cleaner.rb', line 4

def self.clean(s)
  s.gsub!(/(_x005f_|_)x000d_/i, "\n") # fix sometimes encoding for newline
  s.gsub!("\u00ad", "")               # strip soft hyphen
  s.gsub!("\u0092", "'")              # windows-1252 apostrophe - https://stackoverflow.com/a/15564279/2866660
  s.gsub!("", ",")                   # normalize unicode comma
  s.gsub!("aÄs", "aïs")               # encoding issue for maïs
  s.gsub!("ï", "ï")                  # encoding issue
  s.gsub!("ë", "ë")                  # encoding issue
  s.gsub!(/\A\s*(["']+)(.*)\1\s*\z/, '\2') # enclosing quotation marks
  s
end