Class: Raakt::Test
- Inherits:
-
Object
- Object
- Raakt::Test
- Defined in:
- lib/raakt.rb,
lib/iso_language_codes.rb
Constant Summary collapse
- ISO_CODES =
[ "aa", "ab", "ae", "af", "ak", "am", "an", "ar", "as", "av", "ay", "az", "ba", "be", "bg", "bh", "bi", "bm", "bn", "bo", "br", "bs", "ca", "ce", "ch", "co", "cr", "cs", "cv", "cy", "da", "de", "dv", "dz", "ee", "el", "en", "eo", "es", "et", "eu", "fa", "ff", "fi", "fj", "fo", "fr", "fy", "ga", "gd", "gl", "gn", "gu", "gv", "ha", "he", "hi", "ho", "hr", "ht", "hu", "hy", "hz", "ia", "id", "ie", "ig", "ii", "ik", "io", "is", "it", "iu", "ja", "jv", "ka", "kg", "ki", "kj", "kk", "kl", "km", "kn", "ko", "kr", "ks", "ku", "kv", "kw", "ky", "la", "lb", "lg", "li", "ln", "lo", "lt", "lv", "mg", "mh", "mi", "mk", "ml", "mn", "mo", "mr", "ms", "mt", "my", "na", "nb", "nd", "ne", "ng", "nl", "nn", "no", "nr", "nv", "ny", "oc", "oj", "om", "or", "os", "pa", "pi", "pl", "ps", "pt", "qu", "rm", "rn", "ro", "ru", "rw", "sa", "sc", "sd", "se", "sg", "sh", "si", "sk", "sl", "sm", "sn", "so", "sq", "sr", "ss", "st", "su", "sv", "sw", "ta", "te", "tg", "th", "ti", "tk", "tl", "tn", "to", "tr", "ts", "tt", "tw", "ty", "ug", "uk", "ur", "uz", "ve", "vi", "vo", "wa", "wo", "xh", "yi", "yo", "za", "zh", "zu"]
Instance Attribute Summary collapse
-
#headers ⇒ Object
Returns the value of attribute headers.
-
#html ⇒ Object
Returns the value of attribute html.
-
#ignore_bi ⇒ Object
Returns the value of attribute ignore_bi.
-
#wordlist ⇒ Object
Returns the value of attribute wordlist.
Instance Method Summary collapse
-
#all ⇒ Object
Call all check methods.
- #alt_to_text(element) ⇒ Object
-
#check_areas ⇒ Object
Verify that all area elements have a non-empty alt attribute.
-
#check_character_set ⇒ Object
Verify that the charater set specified in HTTP headers match that specidied in the HTML meta element.
- #check_difficult_words ⇒ Object
-
#check_document_structure ⇒ Object
Verify that heading elements (h1-h6) appear in the correct order (no levels skipped).
-
#check_embed ⇒ Object
Verify that the embed element isn’t used.
-
#check_fieldset_legend ⇒ Object
Verify that all fieldset elements have a legend child element.
-
#check_for_formatting_elements ⇒ Object
Verify that no formatting elements have been used.
-
#check_for_language_info ⇒ Object
Verify that the root documet html element as a lang attribute.
-
#check_for_nested_tables ⇒ Object
Verify that the document does not have any nested tables.
-
#check_form ⇒ Object
Verify that all form fields have a corresponding label element.
-
#check_frames ⇒ Object
Verify that all frame elements have a title atribute.
-
#check_has_heading ⇒ Object
Verify that the document has at least one h1 element.
-
#check_images ⇒ Object
Verify that all img elements have an alt attribute.
-
#check_input_type_img ⇒ Object
Verify that all input type=image elements have an alt attribute.
-
#check_link_text ⇒ Object
Verify that no link texts are ambiguous.
-
#check_refresh ⇒ Object
Verify that the document does not use meta-refresh to redirect the user away after a period of time.
-
#check_tables ⇒ Object
Verify that all tables have at least on table header (th) element.
-
#check_title ⇒ Object
Verify that the document has a non-empty title element.
-
#check_valid_language_code ⇒ Object
Verify that the html element has a valid lang code.
-
#doc=(html) ⇒ Object
Set the HTML used in the test.
- #downcase_hash_keys(a_hash) ⇒ Object
- #elements_to_text(element) ⇒ Object
- #get_editable_fields ⇒ Object
- #get_labels ⇒ Object
- #get_link_text(link) ⇒ Object
- #get_link_title(link) ⇒ Object
- #get_link_url(link) ⇒ Object
- #get_links ⇒ Object
-
#headings ⇒ Object
Utility methods.
-
#initialize(html = nil, headers = nil, wordlist = nil) ⇒ Test
constructor
A new instance of Test.
- #is_ambiguous_link(link_a, link_b) ⇒ Object
- #is_frameset ⇒ Object
- #langinfo(element) ⇒ Object
- #level(heading) ⇒ Object
- #link_text_identical?(link_a, link_b) ⇒ Boolean
- #link_title_identical?(link_a, link_b) ⇒ Boolean
- #links_point_to_same_resource?(link_a, link_b) ⇒ Boolean
- #normalize_text(text) ⇒ Object
- #parse_charset(contenttype) ⇒ Object
Constructor Details
#initialize(html = nil, headers = nil, wordlist = nil) ⇒ Test
Returns a new instance of Test.
85 86 87 88 89 90 91 92 93 |
# File 'lib/raakt.rb', line 85 def initialize(html=nil, headers=nil, wordlist=nil) @html = html @headers = headers @wordlist = wordlist self.doc = @html if html self.headers = @headers if headers self.wordlist = @wordlist if wordlist @ignore_bi = false end |
Instance Attribute Details
#headers ⇒ Object
Returns the value of attribute headers.
83 84 85 |
# File 'lib/raakt.rb', line 83 def headers @headers end |
#html ⇒ Object
Returns the value of attribute html.
83 84 85 |
# File 'lib/raakt.rb', line 83 def html @html end |
#ignore_bi ⇒ Object
Returns the value of attribute ignore_bi.
83 84 85 |
# File 'lib/raakt.rb', line 83 def ignore_bi @ignore_bi end |
#wordlist ⇒ Object
Returns the value of attribute wordlist.
83 84 85 |
# File 'lib/raakt.rb', line 83 def wordlist @wordlist end |
Instance Method Details
#all ⇒ Object
Call all check methods.
112 113 114 115 116 117 118 119 120 121 122 |
# File 'lib/raakt.rb', line 112 def all = [] self.methods.each do |method| if method[0..5] == "check_" += self.send(method) end end return end |
#alt_to_text(element) ⇒ Object
524 525 526 527 528 529 530 |
# File 'lib/raakt.rb', line 524 def alt_to_text(element) if element.kind_of?(Hpricot::Elem) then element.has_attribute?("alt") ? element['alt'] : "" else "" end end |
#check_areas ⇒ Object
Verify that all area elements have a non-empty alt attribute. See UWEM 1.0 Test 1.1_HTML_01 (together with check_images)
204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 |
# File 'lib/raakt.rb', line 204 def check_areas = [] area_elements = (@doc/"area") area_elements.map { |element| unless element['alt'] << ErrorMessage.new(:missing_area_alt, element['name'] || element['id'] || "unknown") else if element['alt'].length == 0 << ErrorMessage.new(:missing_area_alt_text, element['name'] || element['id'] || "unknown") end end } end |
#check_character_set ⇒ Object
Verify that the charater set specified in HTTP headers match that specidied in the HTML meta element.
148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 |
# File 'lib/raakt.rb', line 148 def check_character_set = [] header_charset = = "" if @headers and @headers.length > 0 then if @headers.has_key?("content-type") header_charset = parse_charset(@headers["content-type"].to_s) end #get meta element charset = @doc.search("//meta[@http-equiv]") for element in do if element["http-equiv"].downcase == "content-type" then = parse_charset(element["content"]) end end if header_charset.length > 0 and .length > 0 unless == header_charset << ErrorMessage.new(:charset_mismatch) end end end return end |
#check_difficult_words ⇒ Object
426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 |
# File 'lib/raakt.rb', line 426 def check_difficult_words = [] if @wordlist # get document text (and all title and ait attributes but remove blockquote and q elements) # remove q and blockquotes @doc.search("blockquote").remove @doc.search("q").remove doctext = @doc.inner_text #add alt texts @doc.search("*[@alt]").each { |item| doctext += " " + item['alt'] doctext += ", " } #add title texts @doc.search("*[@title]").each { |item| doctext += " " + item['title'] doctext += ", " } @wordlist.each { |key, value| re = Regexp.new("\\b" + key.sub(/ /, "\\s+") + "\\b", true) if doctext =~ re # loop over all keys in wordlist << ErrorMessage.new(:difficult_word, value) end } end return end |
#check_document_structure ⇒ Object
Verify that heading elements (h1-h6) appear in the correct order (no levels skipped). See UWEM 1.0 Test 3.5_HTML_03.
239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 |
# File 'lib/raakt.rb', line 239 def check_document_structure = [] currentitem = 0 for heading in headings if currentitem == 0 if level(heading.name) != 1 << ErrorMessage.new(:first_h_not_h1, "h" + heading.name[1,1]) end else if level(heading.name) - level(headings[currentitem - 1].name) > 1 << ErrorMessage.new(:wrong_h_structure) break end end currentitem += 1 end end |
#check_embed ⇒ Object
Verify that the embed element isn’t used. See UWEM 1.0 Test 1.1_HTML_06.
141 142 143 144 |
# File 'lib/raakt.rb', line 141 def return [ErrorMessage.new(:embed_used)] unless (@doc/'embed').empty? [] end |
#check_fieldset_legend ⇒ Object
Verify that all fieldset elements have a legend child element. See UWEM 1.0 Test 12.3_HTML_01.
126 127 128 129 130 131 132 133 134 135 136 137 |
# File 'lib/raakt.rb', line 126 def check_fieldset_legend = [] fieldsets = (@doc/"fieldset") fieldset_instance = 1 for fieldset in fieldsets if (fieldset/"legend").empty? << ErrorMessage.new(:fieldset_missing_legend, fieldset_instance.to_s) end fieldset_instance += 1 end end |
#check_for_formatting_elements ⇒ Object
Verify that no formatting elements have been used. See UWEM 1.0 Test 7.2_HTML_01 and Test 7.3_HTML_01.
302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 |
# File 'lib/raakt.rb', line 302 def check_for_formatting_elements = [] formatting_elements = %w(font b i u tt small big strike s) formatting_elements = %w(font u tt small big strike s) if @ignore_bi formatting_items = (@doc/formatting_elements.join('|')) unless formatting_items.empty? found_elements = [] for element in formatting_items found_elements << element.name end << ErrorMessage.new(:missing_semantics, "#{found_elements.uniq.join(', ')}") end flicker_elements = %w(blink marquee) flicker_items = (@doc/flicker_elements.uniq.join('|')) unless flicker_items.empty? << ErrorMessage.new(:has_flicker) end end |
#check_for_language_info ⇒ Object
Verify that the root documet html element as a lang attribute.
331 332 333 334 335 336 337 338 339 340 341 342 |
# File 'lib/raakt.rb', line 331 def check_for_language_info = [] unless (@doc/'html[@lang]').empty? lang_code = (@doc/"html").first["lang"].to_s if lang_code.length < 2 << ErrorMessage.new(:missing_lang_info) end else << ErrorMessage.new(:missing_lang_info) end end |
#check_for_nested_tables ⇒ Object
Verify that the document does not have any nested tables. This is indicative of a table-based layout.
264 265 266 267 268 269 270 271 272 273 274 275 276 |
# File 'lib/raakt.rb', line 264 def check_for_nested_tables = [] tables = (@doc/"table") for table in tables unless (table/"table").empty? return << ErrorMessage.new(:has_nested_tables) end end end |
#check_form ⇒ Object
Verify that all form fields have a corresponding label element. See UWEM 1.0 Test 12.4_HTML_02.
377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 |
# File 'lib/raakt.rb', line 377 def check_form = [] labels = get_labels fields = get_editable_fields #make sure all fields have associated labels label_for_ids = [] for label in labels if label["for"] label_for_ids << label["for"] end end field_id = nil for field in fields field_id = (field["id"] || "") field_identifier = (field["id"] || field["name"] || "unknown") if not label_for_ids.include?(field_id) << ErrorMessage.new(:field_missing_label, field_identifier) end end end |
#check_frames ⇒ Object
Verify that all frame elements have a title atribute.
405 406 407 408 409 410 411 412 413 |
# File 'lib/raakt.rb', line 405 def check_frames # Covers UWEM Test 12.1_HTML_01 return [] unless is_frameset (@doc/"frame").find_all do |frame| frame_title = frame['title'] || '' normalize_text(frame_title).empty? end.map { |frame| ErrorMessage.new(:missing_frame_title, frame['src']) } end |
#check_has_heading ⇒ Object
Verify that the document has at least one h1 element.
232 233 234 235 |
# File 'lib/raakt.rb', line 232 def check_has_heading return [ErrorMessage.new(:missing_heading)] if (@doc/"h1").empty? [] end |
#check_images ⇒ Object
Verify that all img elements have an alt attribute.
197 198 199 200 |
# File 'lib/raakt.rb', line 197 def check_images no_alt_images = (@doc/"img:not([@alt])") no_alt_images.map { |img| ErrorMessage.new(:missing_alt, img['src']) } end |
#check_input_type_img ⇒ Object
Verify that all input type=image elements have an alt attribute.
177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 |
# File 'lib/raakt.rb', line 177 def check_input_type_img #Covers UWEM 1.0 Test 1.1_HTML_01 = [] = @doc.search("input").select { |element| element['type'] =~ /image/i } .map { |element| unless element['alt'] << ErrorMessage.new(:missing_input_alt, element['name'] || element['id'] || "") else if element['alt'].length == 0 << ErrorMessage.new(:missing_input_alt_text, element['name'] || element['id'] || "") end end } end |
#check_link_text ⇒ Object
Verify that no link texts are ambiguous. A typical example is the presence of multiple “Read more” links.
364 365 366 367 368 369 370 371 372 373 |
# File 'lib/raakt.rb', line 364 def check_link_text links = get_links link = links.find do |link| links.find { |cmp_link| is_ambiguous_link(link, cmp_link) } end return [] unless link [ErrorMessage.new(:ambiguous_link_text, get_link_text(link))] end |
#check_refresh ⇒ Object
Verify that the document does not use meta-refresh to redirect the user away after a period of time.
417 418 419 420 421 422 423 |
# File 'lib/raakt.rb', line 417 def check_refresh = (@doc/'meta') .find_all do |element| element["http-equiv"] == "refresh" end.map { ErrorMessage.new(:has_meta_refresh) } end |
#check_tables ⇒ Object
Verify that all tables have at least on table header (th) element.
280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 |
# File 'lib/raakt.rb', line 280 def check_tables = [] tables = (@doc/"table") currenttable = 1 for table in tables hasth = false hasth = true unless (table/">tr>th").empty? hasth = true unless (table/">thead>tr>th").empty? hasth = true unless (table/">tbody>tr>th").empty? << ErrorMessage.new(:missing_th, currenttable.to_s) unless hasth currenttable += 1 end end |
#check_title ⇒ Object
Verify that the document has a non-empty title element.
223 224 225 226 227 228 |
# File 'lib/raakt.rb', line 223 def check_title title = @doc.at('title') return [ErrorMessage.new(:missing_title)] unless title return [ErrorMessage.new(:empty_title)] if normalize_text(title.inner_html).empty? [] end |
#check_valid_language_code ⇒ Object
Verify that the html element has a valid lang code.
346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 |
# File 'lib/raakt.rb', line 346 def check_valid_language_code = [] unless (@doc/"html[@lang]").empty? #load list of valid language codes #iso_lang_codes = [] #IO.foreach(File.dirname(__FILE__) + "/iso_language_codes.txt") { |code| iso_lang_codes << code.chomp } doc_main_lang_code = (@doc/"html").first["lang"].to_s.downcase unless ISO_CODES.include?(doc_main_lang_code[0..1]) << ErrorMessage.new(:wrong_lang_code, doc_main_lang_code) end end end |
#doc=(html) ⇒ Object
Set the HTML used in the test.
96 97 98 99 |
# File 'lib/raakt.rb', line 96 def doc=(html) Hpricot.buffer_size = 524288 #Allow for asp.net bastard-sized viewstate attributes... @doc = Hpricot(html) end |
#downcase_hash_keys(a_hash) ⇒ Object
481 482 483 484 485 |
# File 'lib/raakt.rb', line 481 def downcase_hash_keys(a_hash) downcased_hash = {} a_hash.collect {|key,value| downcased_hash[key.downcase] = value} return downcased_hash end |
#elements_to_text(element) ⇒ Object
532 533 534 535 536 537 538 539 |
# File 'lib/raakt.rb', line 532 def elements_to_text(element) str = '' element.traverse_all_element do |elem| elem.kind_of?(Hpricot::Text) ? str += "#{elem}" : str += alt_to_text(elem) end str end |
#get_editable_fields ⇒ Object
564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 |
# File 'lib/raakt.rb', line 564 def get_editable_fields allfields = (@doc/"textarea|select|input") fields = [] field_type = "" for field in allfields do field_type = field["type"] || "" unless ["button", "submit", "hidden", "image"].include?(field_type) fields << field end end return fields end |
#get_labels ⇒ Object
559 560 561 |
# File 'lib/raakt.rb', line 559 def get_labels @doc/'label' end |
#get_link_text(link) ⇒ Object
599 600 601 602 |
# File 'lib/raakt.rb', line 599 def get_link_text(link) text = (elements_to_text(link) || '').strip normalize_text(text) end |
#get_link_title(link) ⇒ Object
608 609 610 611 |
# File 'lib/raakt.rb', line 608 def get_link_title(link) text = (link['title'] || '').strip normalize_text(text) end |
#get_link_url(link) ⇒ Object
604 605 606 |
# File 'lib/raakt.rb', line 604 def get_link_url(link) link['href'] end |
#get_links ⇒ Object
505 506 507 |
# File 'lib/raakt.rb', line 505 def get_links (@doc/'a') end |
#headings ⇒ Object
Utility methods
467 468 469 470 471 472 473 |
# File 'lib/raakt.rb', line 467 def headings items = [] @doc.traverse_element("h1", "h2", "h3", "h4", "h5", "h6") { |heading| items << heading } return items end |
#is_ambiguous_link(link_a, link_b) ⇒ Object
497 498 499 500 501 502 503 |
# File 'lib/raakt.rb', line 497 def is_ambiguous_link(link_a, link_b) return false if links_point_to_same_resource?(link_a, link_b) return true if link_text_identical?(link_a, link_b) && link_title_identical?(link_a, link_b) false end |
#is_frameset ⇒ Object
581 582 583 |
# File 'lib/raakt.rb', line 581 def is_frameset (@doc/"frameset").length > 0 end |
#langinfo(element) ⇒ Object
509 510 511 512 513 514 515 516 517 518 519 520 521 |
# File 'lib/raakt.rb', line 509 def langinfo(element) langval = "" if element.class.to_s == 'Tag' if element['lang'] langval = element['lang'] end else return nil end return langval end |
#level(heading) ⇒ Object
476 477 478 |
# File 'lib/raakt.rb', line 476 def level(heading) Integer(heading[1].chr) end |
#link_text_identical?(link_a, link_b) ⇒ Boolean
586 587 588 |
# File 'lib/raakt.rb', line 586 def link_text_identical?(link_a, link_b) get_link_text(link_a) == get_link_text(link_b) end |
#link_title_identical?(link_a, link_b) ⇒ Boolean
590 591 592 |
# File 'lib/raakt.rb', line 590 def link_title_identical?(link_a, link_b) get_link_title(link_a) == get_link_title(link_b) end |
#links_point_to_same_resource?(link_a, link_b) ⇒ Boolean
594 595 596 597 |
# File 'lib/raakt.rb', line 594 def links_point_to_same_resource?(link_a, link_b) (link_a == link_b) || (get_link_url(link_a) == get_link_url(link_b)) end |
#normalize_text(text) ⇒ Object
542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 |
# File 'lib/raakt.rb', line 542 def normalize_text(text) text ||= '' retval = text.gsub(/ /, ' ') retval = retval.gsub(/ /, ' ') retval = retval.gsub(/\n/, '') retval = retval.gsub(/\r/, '') retval = retval.gsub(/\t/, '') while / /.match(retval) do retval = retval.gsub(/ /, ' ') end retval = retval.strip return retval end |
#parse_charset(contenttype) ⇒ Object
487 488 489 490 491 492 493 494 |
# File 'lib/raakt.rb', line 487 def parse_charset(contenttype) # get charset identifier from content type string if contenttype=~/charset=(.*)\w?/ then return $1.downcase.strip end return "" end |