Class: ScientificNameParser

Inherits:
Object
  • Object
show all
Defined in:
lib/biodiversity/parser.rb

Overview

we can use these expressions when we are ready to parse virus names class VirusParser

def initialize
  @order     = /^\s*[A-Z][a-z]\+virales/i
  @family    = /^\s*[A-Z][a-z]\+viridae|viroidae/i
  @subfamily = /^\s*[A-Z][a-z]\+virinae|viroinae/i
  @genus     = /^\s*[A-Z][a-z]\+virus|viroid/i
  @species   = /^\s*[A-z0-9u0391-u03C9\[\] ]\+virus|phage|viroid|satellite|prion[A-z0-9u0391-u03C9\[\] ]\+/i
  @parsed    = nil
end

end

Constant Summary collapse

VERSION =
open(File.join(File.dirname(__FILE__), '..', '..', 'VERSION')).readline.strip

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeScientificNameParser

Returns a new instance of ScientificNameParser.



41
42
43
44
45
46
47
# File 'lib/biodiversity/parser.rb', line 41

def initialize
  @verbatim = ''
  @clean = ScientificNameCleanParser.new
  @dirty = ScientificNameDirtyParser.new
  @canonical = ScientificNameCanonicalParser.new
  @parsed = nil
end

Class Method Details

.all(verbatim = @verbatim) ⇒ Object



77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# File 'lib/biodiversity/parser.rb', line 77

def @parsed.all(verbatim = @verbatim)
  parsed = self.class != Hash
  res = { :parsed => parsed, :parser_version => ScientificNameParser::VERSION}
  if parsed
    hybrid = self.hybrid rescue false
    res.merge!({
      :verbatim => @verbatim,
      :normalized => self.value,
      :canonical => self.canonical,
      :hybrid => hybrid,
      :details => self.details,
      :parser_run => self.parser_run,
      :positions => self.pos
      })
  else
    res.merge!(self)
  end
  res = {:scientificName => res}
  res
end

.all_jsonObject



102
103
104
# File 'lib/biodiversity/parser.rb', line 102

def @parsed.all_json
  self.all.to_json rescue ''
end

.pos_jsonObject



98
99
100
# File 'lib/biodiversity/parser.rb', line 98

def @parsed.pos_json
  self.pos.to_json rescue ''
end

.verbatim=(a_string) ⇒ Object



73
74
75
# File 'lib/biodiversity/parser.rb', line 73

def @parsed.verbatim=(a_string)
  @verbatim = a_string
end

Instance Method Details

#parse(a_string) ⇒ Object



61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# File 'lib/biodiversity/parser.rb', line 61

def parse(a_string)
  @verbatim = a_string
  a_string = PreProcessor::clean(a_string)
  
  if virus?(a_string)
    @parsed = { :verbatim => a_string, :virus => true }
  elsif unknown_placement?(a_string)
    @parsed = { :verbatim => a_string }
  else
    @parsed = @clean.parse(a_string) || @dirty.parse(a_string) || @canonical.parse(a_string) || { :verbatim => a_string }
  end

  def @parsed.verbatim=(a_string)
    @verbatim = a_string
  end

  def @parsed.all(verbatim = @verbatim)
    parsed = self.class != Hash
    res = { :parsed => parsed, :parser_version => ScientificNameParser::VERSION}
    if parsed
      hybrid = self.hybrid rescue false
      res.merge!({
        :verbatim => @verbatim,
        :normalized => self.value,
        :canonical => self.canonical,
        :hybrid => hybrid,
        :details => self.details,
        :parser_run => self.parser_run,
        :positions => self.pos
        })
    else
      res.merge!(self)
    end
    res = {:scientificName => res}
    res
  end
  
  def @parsed.pos_json
    self.pos.to_json rescue ''
  end
  
  def @parsed.all_json
    self.all.to_json rescue ''
  end

  @parsed.verbatim = @verbatim
  @parsed.all
end

#parsedObject



57
58
59
# File 'lib/biodiversity/parser.rb', line 57

def parsed
  @parsed
end

#unknown_placement?(a_string) ⇒ Boolean

Returns:

  • (Boolean)


53
54
55
# File 'lib/biodiversity/parser.rb', line 53

def unknown_placement?(a_string)
  !!(a_string.match(/incertae\s+sedis/i) || a_string.match(/inc\.\s*sed\./i))
end

#virus?(a_string) ⇒ Boolean

Returns:

  • (Boolean)


49
50
51
# File 'lib/biodiversity/parser.rb', line 49

def virus?(a_string)
  !!(a_string.match(/\sICTV\s*$/) || a_string.match(/\s(virus|phage|viroid|satellite|prion)\b/i))
end