Class: ODDB::AnalysisParse::FragmentedPageHandler
- Defined in:
- ext/analysisparse/src/fragmented_page_handler.rb
Instance Attribute Summary collapse
-
#footnotes ⇒ Object
readonly
Returns the value of attribute footnotes.
-
#list_title ⇒ Object
Returns the value of attribute list_title.
-
#permission ⇒ Object
Returns the value of attribute permission.
-
#taxpoint_type ⇒ Object
Returns the value of attribute taxpoint_type.
Instance Method Summary collapse
- #each_fragment(txt) ⇒ Object
-
#initialize ⇒ FragmentedPageHandler
constructor
A new instance of FragmentedPageHandler.
- #parse_fragment(fragment, pagenum) ⇒ Object
- #parse_page(txt, pagenum) ⇒ Object
Constructor Details
#initialize ⇒ FragmentedPageHandler
Returns a new instance of FragmentedPageHandler.
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
# File 'ext/analysisparse/src/fragmented_page_handler.rb', line 13 def initialize @taxpoint_type = nil @permission = nil @footnotes = {} @chapters = [ /^ *teilliste\s*1/iu, /^ *teilliste\s*2/iu, /^ *allergologie\s*und\s*klinische\s*immunologie/iu, /^ *dermatologie\s*und\s*venerologie/iu, /^ *endokrinologie\s*-\s*diabetologie/iu, /^ *gastroenterologie/iu, /^ *gynäkologie\s*und\s*geburtshilfe/iu, /^ *hämatologie/iu, /^ *kinder-\s*und\s*jugendmedizin/iu, /^ *medizinische\s*onkologie/iu, /^ *physikalische\s*medizin\s*und\s*rehabilitation/iu, /^ *rheumatologie/iu, /^ *tropenmedizin/iu, /^\s*Liste\s*partielle\s*1/iu, /^\s*Liste\s*partielle\s*2/iu, /^\s*allergologie\s*et\s*immunologie\s*clinique/iu, /^\s*dermatologie\s*et\s*vénérologie/iu, /^\s*endocrinologie\s*-\s*diabétologie/iu, /^\s*gastro-entérologie/iu, /^\s*gynécologie\s*et\s*obstétrique/iu, /^\s*hématologie/iu, /^\s*médecine\s*physique\s*et\s*réadaptation/iu, /^\s*médecine\s*tropicale/iu, /^\s*oncologie\s*médicale/iu, /^\s*pédiatrie/iu, /^\s*rhumatologie/iu, ] end |
Instance Attribute Details
#footnotes ⇒ Object (readonly)
Returns the value of attribute footnotes.
12 13 14 |
# File 'ext/analysisparse/src/fragmented_page_handler.rb', line 12 def footnotes @footnotes end |
#list_title ⇒ Object
Returns the value of attribute list_title.
11 12 13 |
# File 'ext/analysisparse/src/fragmented_page_handler.rb', line 11 def list_title @list_title end |
#permission ⇒ Object
Returns the value of attribute permission.
11 12 13 |
# File 'ext/analysisparse/src/fragmented_page_handler.rb', line 11 def @permission end |
#taxpoint_type ⇒ Object
Returns the value of attribute taxpoint_type.
11 12 13 |
# File 'ext/analysisparse/src/fragmented_page_handler.rb', line 11 def taxpoint_type @taxpoint_type end |
Instance Method Details
#each_fragment(txt) ⇒ Object
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
# File 'ext/analysisparse/src/fragmented_page_handler.rb', line 64 def each_fragment(txt) start = 0 indices = [] ptrns = [] @chapters.each { |ptrn| if(idx = txt.index(ptrn)) indices.push(idx) ptrns.push(ptrn) end } first = indices.first unless(first == 0) yield txt[0..(first.to_i - 1)] end indices.each_with_index { |start, idx| stop = indices.at(idx.next).to_i - 1 src = txt[start..stop] case src when /teilliste\s*1/iu, /Liste\s*partielle\s*1/iu @taxpoint_type = :fixed @permission = src.match(ptrns.at(idx)).to_s.lstrip when /teilliste\s*2/iu, /Liste\s*partielle\s*2/iu @taxpoint_type = :default @permission = src.match(ptrns.at(idx)).to_s.lstrip else @taxpoint_type = nil @permission = src.match(ptrns.at(idx)).to_s.lstrip end yield src } end |
#parse_fragment(fragment, pagenum) ⇒ Object
55 56 57 58 59 60 61 62 63 |
# File 'ext/analysisparse/src/fragmented_page_handler.rb', line 55 def parse_fragment(fragment, pagenum) parser = ExtendedListParser.new parser.taxpoint_type = @taxpoint_type parser. = @permission parser.list_title = @list_title positions = parser.parse_page(fragment, pagenum) @footnotes.update(parser.footnotes) positions end |
#parse_page(txt, pagenum) ⇒ Object
46 47 48 49 50 51 52 53 54 |
# File 'ext/analysisparse/src/fragmented_page_handler.rb', line 46 def parse_page(txt, pagenum) positions = [] each_fragment(txt) { |fragment| if(!fragment.empty?) positions += parse_fragment(fragment, pagenum) end } positions end |