Class: ODDB::AnalysisParse::FragmentedPageHandler

Inherits:
Object
  • Object
show all
Defined in:
ext/analysisparse/src/fragmented_page_handler.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeFragmentedPageHandler

Returns a new instance of FragmentedPageHandler.



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# File 'ext/analysisparse/src/fragmented_page_handler.rb', line 13

def initialize
	@taxpoint_type = nil
	@permission = nil
	@footnotes = {}
	@chapters = [
		/^ *teilliste\s*1/iu,
		/^ *teilliste\s*2/iu,
		/^ *allergologie\s*und\s*klinische\s*immunologie/iu,
		/^ *dermatologie\s*und\s*venerologie/iu,
		/^ *endokrinologie\s*-\s*diabetologie/iu,
		/^ *gastroenterologie/iu,
		/^ *gynäkologie\s*und\s*geburtshilfe/iu,
		/^ *hämatologie/iu,
		/^ *kinder-\s*und\s*jugendmedizin/iu,
		/^ *medizinische\s*onkologie/iu,
		/^ *physikalische\s*medizin\s*und\s*rehabilitation/iu,
		/^ *rheumatologie/iu,
		/^ *tropenmedizin/iu,
		/^\s*Liste\s*partielle\s*1/iu,
		/^\s*Liste\s*partielle\s*2/iu,
		/^\s*allergologie\s*et\s*immunologie\s*clinique/iu,
		/^\s*dermatologie\s*et\s*vénérologie/iu,
		/^\s*endocrinologie\s*-\s*diabétologie/iu,
		/^\s*gastro-entérologie/iu,
		/^\s*gynécologie\s*et\s*obstétrique/iu,
		/^\s*hématologie/iu,
		/^\s*médecine\s*physique\s*et\s*réadaptation/iu,
		/^\s*médecine\s*tropicale/iu,
		/^\s*oncologie\s*médicale/iu,
		/^\s*pédiatrie/iu,
		/^\s*rhumatologie/iu,
	]
end

Instance Attribute Details

#footnotesObject (readonly)

Returns the value of attribute footnotes.



12
13
14
# File 'ext/analysisparse/src/fragmented_page_handler.rb', line 12

def footnotes
  @footnotes
end

#list_titleObject

Returns the value of attribute list_title.



11
12
13
# File 'ext/analysisparse/src/fragmented_page_handler.rb', line 11

def list_title
  @list_title
end

#permissionObject

Returns the value of attribute permission.



11
12
13
# File 'ext/analysisparse/src/fragmented_page_handler.rb', line 11

def permission
  @permission
end

#taxpoint_typeObject

Returns the value of attribute taxpoint_type.



11
12
13
# File 'ext/analysisparse/src/fragmented_page_handler.rb', line 11

def taxpoint_type
  @taxpoint_type
end

Instance Method Details

#each_fragment(txt) ⇒ Object



64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# File 'ext/analysisparse/src/fragmented_page_handler.rb', line 64

def each_fragment(txt)
	start = 0
	indices = []
	ptrns = []
	@chapters.each { |ptrn|
		if(idx = txt.index(ptrn))
			indices.push(idx)
			ptrns.push(ptrn)
		end
	}
	first = indices.first
	unless(first == 0)
		yield txt[0..(first.to_i - 1)]
	end
	indices.each_with_index { |start, idx|
		stop = indices.at(idx.next).to_i - 1	
		src = txt[start..stop]
		case src
		when /teilliste\s*1/iu, /Liste\s*partielle\s*1/iu
			@taxpoint_type = :fixed
			@permission = src.match(ptrns.at(idx)).to_s.lstrip
		when /teilliste\s*2/iu, /Liste\s*partielle\s*2/iu
			@taxpoint_type = :default
			@permission = src.match(ptrns.at(idx)).to_s.lstrip
		else
			@taxpoint_type = nil
			@permission = src.match(ptrns.at(idx)).to_s.lstrip
		end
		yield src
	}
end

#parse_fragment(fragment, pagenum) ⇒ Object



55
56
57
58
59
60
61
62
63
# File 'ext/analysisparse/src/fragmented_page_handler.rb', line 55

def parse_fragment(fragment, pagenum)
	parser = ExtendedListParser.new
	parser.taxpoint_type = @taxpoint_type
	parser.permission = @permission
	parser.list_title = @list_title
	positions = parser.parse_page(fragment, pagenum)
	@footnotes.update(parser.footnotes)
	positions
end

#parse_page(txt, pagenum) ⇒ Object



46
47
48
49
50
51
52
53
54
# File 'ext/analysisparse/src/fragmented_page_handler.rb', line 46

def parse_page(txt, pagenum)
	positions = []
	each_fragment(txt) { |fragment|
		if(!fragment.empty?)
			positions += parse_fragment(fragment, pagenum)
		end
	}
	positions
end