Class: ODDB::Import::Whocc::Guidelines

Inherits:
ODDB::Import show all
Defined in:
lib/oddb/import/whocc.rb

Defined Under Namespace

Classes: CodeHandler

Constant Summary collapse

@@query_re =
/query=([A-Z0-9]+)/

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeGuidelines

Returns a new instance of Guidelines.



73
74
75
76
77
78
79
80
81
# File 'lib/oddb/import/whocc.rb', line 73

def initialize
  super
  @url = 'http://www.whocc.no/atcddd/database/index.php'
  @codes = CodeHandler.new
  @count = 0
  @created = 0
  @ddd_guidelines = 0
  @guidelines = 0
end

Instance Attribute Details

#codesObject (readonly)

Returns the value of attribute codes.



53
54
55
# File 'lib/oddb/import/whocc.rb', line 53

def codes
  @codes
end

Instance Method Details

#extract_text(node) ⇒ Object



82
83
84
85
86
# File 'lib/oddb/import/whocc.rb', line 82

def extract_text(node)
  unless(node.children.any? { |br| br.element? && br.name != 'br' })
    node.inner_html.gsub(/\s+/, ' ').gsub(/\s*<br\s*\/?>\s*/, "\n")
  end
end

#import(agent) ⇒ Object



87
88
89
90
91
92
93
94
# File 'lib/oddb/import/whocc.rb', line 87

def import(agent)
  (agent)
  while(code = @codes.shift)
    @count += 1
    import_code(agent, code)
  end
  report
end

#import_atc(code, link) ⇒ Object



95
96
97
98
99
100
101
102
103
104
# File 'lib/oddb/import/whocc.rb', line 95

def import_atc(code, link)
  name = capitalize_all(link.inner_text.to_s)
  atc = Drugs::Atc.find_by_code(code) || Drugs::Atc.new(code)
  unless(atc.name.en == name)
    @created += 1
    atc.name.en = name
    atc.save
  end
  atc
end

#import_code(agent, get_code) ⇒ Object



105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# File 'lib/oddb/import/whocc.rb', line 105

def import_code(agent, get_code)
  page = agent.get(@url + "?query=%s&showdescription=yes" % get_code)
  (page/"//b/a").each { |link|
    if(match = @@query_re.match(link.attributes['href']))
      code = match[1] 
      if(code == get_code)
        atc = import_atc(code, link) 
        import_guidelines(atc, link) && atc.save
      end
      @codes.push(code)
    end
  }
  (page/"//ul//a").each { |link|
    if(match = @@query_re.match(link.attributes['href']))
      code = match[1] 
      import_atc(code, link)
    end
  }
end

#import_ddd_guidelines(atc, table) ⇒ Object



124
125
126
127
128
129
130
131
132
# File 'lib/oddb/import/whocc.rb', line 124

def import_ddd_guidelines(atc, table)
  guidelines = (table/'td').collect { |td| 
    extract_text(td) }.join if(table)
  if(atc.ddd_guidelines.en != guidelines)
    @ddd_guidelines += 1
    atc.ddd_guidelines.en = guidelines
    modified = true
  end
end

#import_guidelines(atc, link) ⇒ Object



133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# File 'lib/oddb/import/whocc.rb', line 133

def import_guidelines(atc, link)
  node = link.parent
  while(node.name != 'p')
    node = node.next_sibling
  end
  ## nokogiri fixes the faulty html of whocc.no, and moves the table element
  #  out of the p-container.
  table = node.next_sibling
  modified = false
  if table.name == 'table' && table[:bgcolor] == '#cccccc'
    modified = import_ddd_guidelines(atc, table)
  end
  guidelines = extract_text(node)
  if(atc.guidelines.en != guidelines)
    @guidelines += 1
    modified = true
    atc.guidelines.en = guidelines
  end
  modified
end

#login(agent) ⇒ Object



153
154
155
156
157
158
159
160
161
# File 'lib/oddb/import/whocc.rb', line 153

def (agent)
  msg = "Please configure your access to #@url in ODDB.config.credentials['whocc']"
  credentials = ODDB.config.credentials['whocc'] or raise msg
  page = agent.get(@url)
  form = page.forms.first
  form.username = credentials['username']
  form.password = credentials['password']
  agent.submit(form)
end

#reportObject



162
163
164
165
166
167
168
169
# File 'lib/oddb/import/whocc.rb', line 162

def report
  [
    sprintf("Imported %3i ATC-Codes", @count),
    sprintf("Updated  %3i English descriptions", @created),
    sprintf("Updated  %3i Guidelines", @guidelines),
    sprintf("Updated  %3i DDD-Guidelines", @ddd_guidelines),
  ]
end