Module: Oat::Parsers

Includes:
Constants, Formats, OaRegexes
Included in:
Document
Defined in:
lib/oat/parsers.rb

Overview

Parsers module is mixed-in to Oat::Document class

Constant Summary

Constants included from Formats

Formats::CIT_MISS, Formats::CIT_SIMPLE, Formats::CIT_WITH_PRIME, Formats::CIT_WITH_PRIME_PCT, Formats::FINALWML, Formats::HEADERS_FMT, Formats::STOPSTARTP, Formats::STOPSTARTTAB

Constants included from OaRegexes

OaRegexes::R_AND_AMENDMENTS, OaRegexes::R_ARTICLE_SECTION, OaRegexes::R_CAPTURE_AMENDMENTS_DATE, OaRegexes::R_CAPTURE_APPEAL_APP_NO, OaRegexes::R_CAPTURE_APPEAL_DRAFTED, OaRegexes::R_CAPTURE_APPEAL_NO, OaRegexes::R_CAPTURE_APPEAL_TARO, OaRegexes::R_CAPTURE_APP_NO, OaRegexes::R_CAPTURE_DRAFTED, OaRegexes::R_CAPTURE_MAILING_DATE, OaRegexes::R_CAPTURE_NO_REJECT_CLAIMS, OaRegexes::R_CAPTURE_PREVIOUS_OA, OaRegexes::R_CAPTURE_RETROACTIVE, OaRegexes::R_CAPTURE_SHIREISHO_APP, OaRegexes::R_CAPTURE_SHIREISHO_CODE, OaRegexes::R_CAPTURE_TARO, OaRegexes::R_CITATIONS_START, OaRegexes::R_HEADER_REASONS, OaRegexes::R_HEADER_SEPARATOR, OaRegexes::R_HEADER_SEPARATOR_DETECT, OaRegexes::R_HEADER_TYPES, OaRegexes::R_JPL_DETECT, OaRegexes::R_OYOBI, OaRegexes::R_REJ_TYPES, OaRegexes::R_RESPONSE_PERIOD, OaRegexes::R_SATEI_REASONS

Constants included from Constants

Constants::CITATIONS_FILE, Constants::FILENAME_DASH_SEPARATOR, Constants::FILENAME_DOT_SEPARATOR

Instance Method Summary collapse

Instance Method Details

#analyze_blobsObject

convert the japanese blobs to english


333
334
335
336
# File 'lib/oat/parsers.rb', line 333

def analyze_blobs
  check_for_templatable_portions
  check_for_headers
end

#check_for_headersObject


338
339
340
341
342
343
344
345
346
347
# File 'lib/oat/parsers.rb', line 338

def check_for_headers
  @blobs.select(&:incomplete?).each do |b| 
    next if b.to_jp[0] =~ /\p{Z}/ #skip "header" lines
    
    if m = b.to_jp.gsub("\n", '').match(R_HEADER_TYPES)
      tex = m[1]
      b.english = tex =~ /^\s*$/ ? "\n" : format_headers(tex) + "\n" unless mistaken_header?(tex)
    end
  end
end

#check_for_templatable_portionsObject


349
350
351
352
353
354
355
356
357
358
# File 'lib/oat/parsers.rb', line 349

def check_for_templatable_portions

  @blobs.select(&:incomplete?).each do |b| 
    match_template_regexes(Oat.configuration.templatables, b)
  end

  @blobs.select(&:incomplete?).each do |b| 
    match_template_regexes(Oat.configuration.partial_templates, b)
  end
end

#convert_possible_heisei(hs, first, last) ⇒ Object

matches /([昭|平]*)(pN+?).(?:pZ*)(pN+?)号/ convert 平09-060274 into H09-060274 or 2008-003749 into 2008-003748


596
597
598
599
600
601
602
603
604
605
606
607
608
# File 'lib/oat/parsers.rb', line 596

def convert_possible_heisei(hs, first, last)
  no = ''
  case hs
  when''
    no += 'H' + sprintf('%02u', NKF.nkf('-m0Z1 -w', first).to_i(10)) + '-' + NKF.nkf('-m0Z1 -w', last)
  when ''
    no += 'S' + sprintf('%02u', NKF.nkf('-m0Z1 -w', first).to_i(10)) + '-' + NKF.nkf('-m0Z1 -w', last)
  else
    no += NKF.nkf('-m0Z1 -w', first) + '-' + NKF.nkf('-m0Z1 -w', last)
  end

  no
end

#convert_pub_no(m, eng) ⇒ Object


577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
# File 'lib/oat/parsers.rb', line 577

def convert_pub_no(m, eng)
  #m is MatchData object, handle different styles of citations
  #by using the number of captures
  case m.length
  when 2
    pub = (eng =~ /United States Patent No/) ? eng.gsub('CIT_NO', NKF.nkf('-m0Z1 -w', m[1]).to_i.commas) : (eng =~ /European Patent/ ? eng.gsub('CIT_NO', NKF.nkf('-m0Z1 -w', m[1]).to_i.eurostyle) : eng.gsub('CIT_NO', NKF.nkf('-m0Z1 -w', m[1])))
  when 3
    pub = eng.gsub('CIT_NO', (NKF.nkf('-m0Z1 -w', m[1]) + '/' + NKF.nkf('-m0Z1 -w', m[2])))
  when 4, 5
    pub = eng.gsub('CIT_NO', convert_possible_heisei(m[2], m[3], m[4]))
  when 9
    pub = eng.gsub(/CIT_NO /, convert_possible_heisei(m[2], m[3], m[4]) + ' ').gsub('CIT_NO2', convert_possible_heisei(m[6], m[7], m[8]))
  end

  pub
end

#fix_kanji_dates(tex) ⇒ Object


567
568
569
570
571
572
573
574
575
# File 'lib/oat/parsers.rb', line 567

def fix_kanji_dates(tex)
  months = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
  tex.gsub!(/([\d ]{1,4})年([\d ]{1,2})月([\d ]{1,2})日/, '\1/\2/\3')
  tex.gsub!(/([\d ]{1,4})年([\d ]{1,2})月/) do
    "#{months[Regexp.last_match[2].to_i - 1]} #{Regexp.last_match[1]}" 
  end
  tex.gsub!('', '')
  tex
end

#format_headers(tex, options = {}) ⇒ Object

parse_articles


667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
# File 'lib/oat/parsers.rb', line 667

def format_headers(tex, options = {})
  defaults = {  replace_toh: false,
                ignore_toh: true,
                markdown: false
  }
  options = defaults.merge(options)

  squish! tex

  # try to handle when Examiners put multiple groups separated by : or /
  # on the same line like 引用文献1:請求項1,2/ bla
  formatted_text = ''
  if R_HEADER_SEPARATOR_DETECT =~ tex
    # super fragile. If regex is changed
    # demarker = NKF.nkf('-m0Z1 -w', '#{$&[1,1]} ')
    demarker = NKF.nkf('-m0Z1 -w', "#{$1} ") #$~ is last matchdata
    tex.split(R_HEADER_SEPARATOR).each do |section|
      formatted_text += demarker unless formatted_text.length == 0

      if section =~ R_JPL_DETECT
        formatted_text += handle_jpl(section)
      else
        #no jpl to handle
        formatted_text += format_headers(section, options)
      end

    end
  else
    #handle special Reason lines
    if /#{R_HEADER_REASONS}/x =~ tex
      if /#{R_OYOBI}/ =~ tex
        tex.split(/#{R_OYOBI}/).each do |section|
          section = format_number_listing(section)

          formatted_text += ' and ' unless formatted_text.length == 0
          formatted_text += "#{replace_common_phrases(section, options)}".gsub('(', ' (') #add space before parenthasis
        end
      else
        if tex =~ R_JPL_DETECT
          formatted_text += handle_jpl(tex)
        else
          #no jpl to handle
          formatted_text = "#{replace_common_phrases(tex, options)}"
        end
      end
      formatted_text.gsub!('Reason(', 'Reason (')
      formatted_text.gsub!(/(\p{N})\(/, '\1 (')
    else
      if tex =~ R_JPL_DETECT
        #note : it cant get here
        formatted_text += handle_jpl(tex)
      else
        #no jpl to handle
        formatted_text = "#{replace_common_phrases(tex, options)}"
      end
    end
  end

  formatted_text
end

#format_invention_according_to(str) ⇒ Object


488
489
490
491
# File 'lib/oat/parsers.rb', line 488

def format_invention_according_to(str)
  res = format_headers(str.gsub('に係る発明','').gsub('請求項', 'the invention according to 請求項'))
  res += res =~ /inventions/ ? ' are' : ' is'
end

#format_number_listing(tex) ⇒ Object

formats a number listing assuming only one list in the string one level up, format_headers breaks single lines into a plurality of these ex: 請求項3,17,31,45


808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
# File 'lib/oat/parsers.rb', line 808

def format_number_listing(tex)
  tex = NKF.nkf('-m0Z1 -w', tex)

  # if no numbers (like 'Notes:') then do nothing
  if m = tex.match(/(?:...)(.*?)\p{N}/) # skip first two charcters in case it's something like '1.理由1,2について'
    # opening, numbers, close
    op = tex[0..m.end(1) - 1]
    num_start = m.end(1)
    m = tex.match(/\p{N}(?!.*\p{N})/)
    cl = tex[m.end(0)..-1]
    nums = tex[num_start..m.end(0) - 1]

    parsed = nums.split(/((?:~|-)*\p{N}+(?:to\p{N}+)*,*)/).reject(&:empty?)

    # change ['1to2,', '3'] to ['1', '2', '3']
    parsed.each_index do |el|
      if /to\p{N}/ =~ parsed[el]
        parts = parsed[el].split(/to/)
        if parts[0].to_i(10) == (parts[1].to_i(10) - 1)
          parsed[el] = parts[0] + ','
          parsed.insert(el + 1, parts[1])
        end
      end
    end

    if parsed.length > 1
      parsed.insert(-2, 'and')
      parsed[0].gsub!(',', '') if parsed.length == 3
    end

    tex = "#{op} #{parsed.join(' ')}#{cl}"

    if (parsed.length > 2) || (/\p{N}to\p{N}/ =~ tex)
      tex.gsub!('Claim', 'Claims')
      tex.gsub!('Citation', 'Citations')
      tex.gsub!('Example', 'Examples')
      tex.gsub!('Reason', 'Reasons')
      tex.gsub!('invention', 'inventions')
      tex.gsub!('Prior Application', 'Prior Applications')
    end
    tex.gsub!('to', ' to ')

    # remove extra spaces
    tex.gsub!(/\p{Z}+/, ' ')
  end

  # dont feel like tracking this bug down, cludge
  tex.gsub!('( ', ' (')

  tex
end

#format_templated_english!(result) ⇒ Object


407
408
409
# File 'lib/oat/parsers.rb', line 407

def format_templated_english!(result)
  result.gsub!('Claim', 'claim') if Oat.configuration.use_lowercase
end

#get_filename_separatorObject

at the moment it's either 0 or 1, meaning dash or dot


45
46
47
48
49
50
51
52
53
54
55
# File 'lib/oat/parsers.rb', line 45

def get_filename_separator
  demarker = '-'
  case Oat.configuration.use_dot
  when FILENAME_DASH_SEPARATOR
    demarker = '-'
  when FILENAME_DOT_SEPARATOR
    demarker = '.'
  else
    demarker = '-'
  end
end

#get_text_from_blobsObject


302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
# File 'lib/oat/parsers.rb', line 302

def get_text_from_blobs
  text = ''
  sub_text = ''
  sub_en_text = ''
  @blobs.each do |b|
    if Oat.configuration.use_japanese == 1
      if Oat::Document::end_of_blob?(b.to_jp)
        sub_text += b.to_jp
        sub_en_text += b.to_en
      else
        if sub_text.length > 0
          text += sub_text
          text += b.complete? ? sub_en_text : ''
          sub_text = sub_en_text = ''
        end
        text += b.to_jp.gsub(/\n([\p{L}\p{N}、])/, '\1')
      end
    end

    text += b.complete? ? b.to_en : ''
  end

  #convert to xml, everything below here needs to be xml
  text.encode!(:xml => :text)
  #replace OAOA_TEMPLATER_TAB with word_ml newline + tab
  text.gsub!(/OAOA_TEMPLATER_TAB/, STOPSTARTTAB) 
  #replace newlines with word_ml newlines
  text.gsub!(/\n/, STOPSTARTP) 
end

#handle_jpl(tex) ⇒ Object


745
746
747
748
749
750
751
752
753
754
755
756
757
# File 'lib/oat/parsers.rb', line 745

def handle_jpl(tex)
  #comes in looking something like "(A)理由1(特許法29条1項3号)"
  jpl = ''
  tex = NKF.nkf('-m0Z1 -w', tex)
  jpl = tex.gsub(/(.*)特許法第?(\p{N}+)条第?(\p{N}+)項(?:第?(\p{N}+.*)号)*/){ 
    replace_common_phrases(format_number_listing($1)) + "Japanese Patent Law, Article #{$2}, Paragraph #{$3}, Number #{$4}"
  }
  jpl.gsub!(/, Number\p{Z}+\)(?:について)?$/, ')') #if it doesnt have a \4
  jpl.gsub!(')R', ') R')
  jpl.gsub!('Reason(', 'Reason (')
  jpl.gsub!(/(\p{N})\(/, '\1 (')
  jpl
end

#match_template_regexes(regexes, b) ⇒ Object

pass in the collection of regexes, eg templatable.yml, partial-templates.yml pass in a blob, split blob into sentence and run matches on each sentence


362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
# File 'lib/oat/parsers.rb', line 362

def match_template_regexes(regexes, b)
  return if b.to_jp.length < 4 #skip very short blobs, they wont match anything anyway

  regexes.each do |title, a|
    #a has: a['detect'] #a[:'full'] #a['text']
    if match = b.to_jp.gsub(/\n/,'').match(a['detect'])
      result = a['text']
      match.captures.each_with_index do |m, i|
        result = replace_I_vars(result, m, i)
        result = replace_PC_vars(result, m, i)
        result = replace_P_vars(result, m, i)
        result = replace_CI_vars(result, m, i)
        result = replace_JL_vars(result, m, i)
      end

      b.english = b.to_jp[-1] == "\n" ? result + "\n\n" : result + "\n" #add a newline if it ended in a newline
    end
  end
end

#mistaken_header?(tex) ⇒ Boolean

is_mistaken_header?

Returns:

  • (Boolean)

494
495
496
497
498
499
500
# File 'lib/oat/parsers.rb', line 494

def mistaken_header?(tex)
  val = false
  val = true if tex =~ /調査/ || /先行技術文/ =~ tex || /注意/ =~ tex and !(/検討しましたが/ =~ tex)
  val = true if (tex =~ //) || (tex =~//) || (tex =~ /には/) || (tex =~ //)
  val = true if (tex =~ /係る発明/) || (tex =~/明らか/)
  val
end

#pad_spaces(tex) ⇒ Object


558
559
560
561
562
563
564
565
# File 'lib/oat/parsers.rb', line 558

def pad_spaces (tex)
  # add space after period, add space after comma, remove year kanji, reduce multiple spaces to 1
  tex.gsub!(/\.(\p{N})/, '. \1')
  tex.gsub!(/\,/, ', ')
  tex.gsub!(/p{Z}*/, ' ')
  tex.gsub!(/. ,/, '.,')
  tex
end

#parse_amendments_dateObject


74
75
76
77
78
79
# File 'lib/oat/parsers.rb', line 74

def parse_amendments_date
  capture_the(:amendments_date, R_CAPTURE_AMENDMENTS_DATE)
  return if @scrapes[:amendments_date].nil?

  set_prop(:amendments_date, format_heisei_date('%04u/%02u/%02u', @scrapes[:amendments_date]))
end

#parse_app_noObject


109
110
111
112
113
114
115
116
117
118
# File 'lib/oat/parsers.rb', line 109

def parse_app_no
  capture_the(:app_no, R_CAPTURE_APP_NO)
  if @scrapes[:app_no].nil?
    #try for the appeal format if nothing came up
    capture_the(:app_no, R_CAPTURE_APPEAL_APP_NO)
  end
  return if @scrapes[:app_no].nil?

  set_prop(:app_no, NKF.nkf('-m0Z1 -w', @scrapes[:app_no][1]) + '-' + NKF.nkf('-m0Z1 -w', @scrapes[:app_no][2]))
end

#parse_appeal_draftedObject


16
17
18
19
20
21
22
# File 'lib/oat/parsers.rb', line 16

def parse_appeal_drafted
  capture_the(:appeal_drafted, R_CAPTURE_APPEAL_DRAFTED)  # year/month/day
  return if @scrapes[:appeal_drafted].nil?

  @scrapes[:appeal_drafted] = @data.match(R_CAPTURE_APPEAL_DRAFTED)
  set_prop(:appeal_drafted, format_heisei_date('%04u/%02u/%02u', @scrapes[:appeal_drafted]))
end

#parse_appeal_examinerObject


158
159
160
161
162
163
# File 'lib/oat/parsers.rb', line 158

def parse_appeal_examiner
  capture_the(:appeal_taro, R_CAPTURE_APPEAL_TARO) # 1, 2
  return if @scrapes[:appeal_taro].nil?

  set_prop(:appeal_taro, @scrapes[:appeal_taro][1] + ' ' + @scrapes[:appeal_taro][2])
end

#parse_appeal_noObject


88
89
90
91
92
93
# File 'lib/oat/parsers.rb', line 88

def parse_appeal_no
  capture_the(:appeal_no, R_CAPTURE_APPEAL_NO)
  return if @scrapes[:appeal_no].nil?

  set_prop(:appeal_no, NKF.nkf('-m0Z1 -w', @scrapes[:appeal_no][1]) + '-' + NKF.nkf('-m0Z1 -w', @scrapes[:appeal_no][2]))
end

#parse_articlesObject


610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
# File 'lib/oat/parsers.rb', line 610

def parse_articles
  count = 1
  articles_text = '<w:p><w:pPr><w:kinsoku w:val="0"/><w:spacing w:line="360" w:lineRule="atLeast"/></w:pPr><w:r w:rsidR="006A661C"><w:rPr><w:b/><w:noProof/></w:rPr><w:t>Cited Articles:</w:t><w:tab/><w:tab/><w:tab/>'
  reasons_for_text = '<w:p><w:pPr><w:kinsoku w:val="0"/><w:spacing w:line="360" w:lineRule="atLeast"/></w:pPr><w:r w:rsidR="006A661C"><w:rPr><w:noProof/></w:rPr>'

  found_articles = []
  original_length = articles_text.length

  m = @data.match(R_ARTICLE_SECTION)
  target_data = m ? m[0] : @data

  line_num = 0

  while (line = target_data.lines[line_num..line_num+1]) do
    #last line+1 will return nil
    if line
      line = line.join.gsub(/\r\n|\r|\n/,'') #merge 2 lines and remove newline
    else
      break
    end

    line_num += 1

    @reasons.each do |_r, a|
      if line =~ a['japanese']
        found_articles << a['short']

        unless /#{Regexp.quote(a["english"])}/m =~ reasons_for_text 
          reasons_for_text += "<w:t>#{count}.</w:t><w:tab/><w:t>#{a['english']}</w:t><w:br/><w:br/>"
          count += 1
        end
      end
    end
  end

  found_articles.uniq.sort.each do |a|
    # skip tab on first reason
    articles_text += "<w:tab/><w:tab/><w:tab/><w:tab/><w:tab/>" unless articles_text.length == original_length
    # only add short text once (36 shows up multiple times)
    articles_text += "<w:t>#{a}</w:t><w:br/>" 
  end

  # remove number if only 1 article listed
  reasons_for_text.gsub!(/<w:t>1.<\/w:t><w:tab\/><w:t>/, '<w:tab\/><w:t>') if count == 2

  #remove final word_ml newline
  reasons_for_text.gsub!(/<w:br\/><w:br\/>$/, '')
  articles_text.gsub!(/<w:br\/>$/, '')

  #close the paragraph
  articles_text += '</w:r></w:p>'
  reasons_for_text += '</w:r></w:p>'

  set_prop(:articles, Sablon.content(:word_ml, articles_text))
  set_prop(:reasons_for, Sablon.content(:word_ml, reasons_for_text))
end

#parse_citationsObject


502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
# File 'lib/oat/parsers.rb', line 502

def parse_citations
  citation_text = ''

  if m = @data.match(R_CITATIONS_START)
    @cits ||= YAML.load_file(CITATIONS_FILE)
    count = 0
    data = @data[m.end(0) - 2..-1].gsub(%r{</?[^>]+?>}, '') # end minus '1.', gsub to remove html

    catch :done_scanning do
      data.each_line do |line|
        tex = line
        throw :done_scanning if (/^\s*$/ =~ line) || (line[0..2].eql?('---'))

        old_citation_text = citation_text
        if /^\p{Z}*\p{N}+((?:\.|.|:)+.*?)/m =~ tex
          count += 1
        end

        @cits.each do |_n, a|
          if m = tex.match(a['japanese'])
            count = 1 if count == 0
            wellknown_text = tex =~ /周知技術/ ? ' (Publication showing well-known technology)' : ''
            newlyadd_text = ((tex =~ /追加した文献)/) || (tex =~ /新たに引/)) ? ' (Newly added publication)' : ''
            if (/United States/ =~ a['english']) or (/United Kingdom/ =~ a['english']) or (/European Patent/ =~ a['english'])
              # citation is in English (no prime needed)
              citation_text += sprintf(CIT_SIMPLE, count, convert_pub_no(m, a['english']) + newlyadd_text + wellknown_text)
            else # normal
              if /Published Japanese Translation No./ =~ a['english']
                citation_text += sprintf(CIT_WITH_PRIME_PCT, count, convert_pub_no(m, a['english']) + newlyadd_text + wellknown_text, count)
              else
                citation_text += sprintf(CIT_WITH_PRIME, count, convert_pub_no(m, a['english']) + newlyadd_text + wellknown_text, count)
              end
            end
          end
        end # cits

        if old_citation_text == citation_text
          tex = NKF.nkf('-m0Z1 -w', tex)
          # strip blank dos lines
          tex.gsub!(/\p{Z}*\r\n/, '')
          tex = pad_spaces(tex)
          tex = fix_kanji_dates(tex)
          tex = swap_reference_words(tex)

          # if no match was found, just copy the japanese, skip first character (it's a period from the regex)
          # should have the correct number from the actual source (not from count variable)
          tex.encode!(:xml => :text) if tex
          citation_text += sprintf(CIT_MISS, tex)
        end
      end # each line
    end # catch
  end # if citations found

  set_prop(:citation_list, Sablon.content(:word_ml, citation_text))
end

#parse_currently_knownObject


211
212
213
214
215
216
217
218
219
220
221
222
223
224
# File 'lib/oat/parsers.rb', line 211

def parse_currently_known
  case @data
  when /拒絶の理由を発見しない請求項/
    if m = @data.match(R_CAPTURE_NO_REJECT_CLAIMS)
      set_prop(:currently_known, "<Claims for which no reasons for rejection have been found>\r\n \tNo reasons for rejection are currently known for #{format_headers(m[1])}.  The applicant will be notified of new reasons for rejection if such reasons for rejection are found.")
    else
      set_prop(:currently_known, "<Claims for which no reasons for rejection have been found>\r\n \tNo reasons for rejection are currently known for the claims.  The applicant will be notified of new reasons for rejection if such reasons for rejection are found.")
    end
  when /拒絶の理由が通知される/
    set_prop(:currently_known, 'The applicant will be notified of new reasons for rejection if such reasons for rejection are found.')
  else
    set_prop(:currently_known, '')
  end
end

#parse_draftedObject


24
25
26
27
28
29
# File 'lib/oat/parsers.rb', line 24

def parse_drafted
  capture_the(:drafted, R_CAPTURE_DRAFTED)  # year/month/day
  return if @scrapes[:drafted].nil?

  set_prop(:drafted, format_heisei_date('%04u/%02u/%02u', @scrapes[:drafted]))
end

#parse_examinerObject

definitely need to fix this up later, haha


121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# File 'lib/oat/parsers.rb', line 121

def parse_examiner
  capture_the(:taro, R_CAPTURE_TARO) # 1, 2 (codes are #3, 4)

  #if there was no normal appeal examiner, try an appeal examiner
  if @scrapes[:taro].nil?
    capture_the(:taro, R_CAPTURE_APPEAL_TARO)
  end

  return if @scrapes[:taro].nil?

  found = false

  last, first = @scrapes[:taro][1], @scrapes[:taro][2]

  CSV.foreach(Oat.configuration.templates[:examiners]) do |r|
    if NKF.nkf('-m0Z1 -w', r[1]).eql? (' ' + last + ' ' + first)
      set_prop(:taro, r[0])
      found = true
      break
    end
  end

  unless found
    if (Oat.configuration.guess_examiner == 1)
      found = true
      first, last = Kakasi.kakasi('-Ja', first).capitalize, Kakasi.kakasi('-Ja', last).upcase
      # use kakashi to romajify the Examiner names
      set_prop(:taro, "#{first} #{last} #{@scrapes[:taro][1]} #{@scrapes[:taro][2]}")
    end
  end

  set_prop(:taro, @scrapes[:taro][1] + ' ' + @scrapes[:taro][2]) unless found

  # always set examiners numbers
  set_prop(:code, NKF.nkf('-m0Z1 -w', @scrapes[:taro][3]) + ' ' + NKF.nkf('-m0Z1 -w', @scrapes[:taro][4]))
end

#parse_final_oaObject


165
166
167
168
169
170
171
# File 'lib/oat/parsers.rb', line 165

def parse_final_oa
  set_prop(:reason_for_final, '')
  capture_the(:final_oa, /<<<<\p{Z}+最\p{Z}{0,6}後\p{Z}+>>>>/)
  return if @scrapes[:final_oa].nil?
  set_prop(:final_oa, "\n<<<<    FINAL    >>>>\n")
  set_prop(:reason_for_final, Sablon.content(:word_ml, FINALWML))
end

#parse_headersObject


294
295
296
297
298
299
300
# File 'lib/oat/parsers.rb', line 294

def parse_headers
  @blobs = Oat::Document::get_blobs_from_data(@data)

  analyze_blobs

  set_prop(:oa_headers, Sablon.content(:word_ml, sprintf(HEADERS_FMT, get_text_from_blobs)))
end

#parse_ipcObject


226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
# File 'lib/oat/parsers.rb', line 226

def parse_ipc
  ipc_text = ''

  if m = @data.match(/先行技術文献(?:等{0,1})調査結果(.*?)[^\p{N}]先行技術文献/m)
    data = m[1]
    ipc_list_end = m.end(0)
    if m = data.match(/(I|I)(P|P)(C|C)/)
      data = data[m.begin(0)..-2]
      ipc_text = NKF.nkf('-m0Z1 -w', data).gsub('IPC', 'IPC:').gsub('DB名', "\tDB Name:").gsub('^\p{Z}{3,8}', "\t ")
      parse_ipc_references(ipc_list_end)
    end
  end

  set_prop(:ipc_list, ipc_text)
end

#parse_ipc_references(ipc_list_end) ⇒ Object


242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
# File 'lib/oat/parsers.rb', line 242

def parse_ipc_references(ipc_list_end)
  ipc_reference_text = ''
  data = @data[ipc_list_end..-1]

  if m = data.match(/(^.*先行技術文献調査結果|この拒絶理由通知の内容)/)
    @cits ||= YAML.load_file(CITATIONS_FILE)
    data = data[0..m.begin(0)]
    oldmatch = false
    count = 1

    data.each_line do |line|
      match = false
      @cits.each do |_n, a|
        if m = line.match(a['japanese'])
          match = true
          ipc_reference_text += "#{count}.  #{convert_pub_no(m, a['english'])}\n"
        end
      end # cits.each

      unless match
        # if no match, change 全角 to 半角
        line = NKF.nkf('-m0Z1 -w', line)

        # first line of non-match
        if oldmatch and (!match)
          line.gsub!(/^/, "#{count}. ") if line.length > 4
        end

        # >1st line of non-match
        if (!oldmatch) and (!match)
          count -= 1 #decrease count to that it stays the same after being increased below
          #remove newlines since it's probably a big english title
          ipc_reference_text.gsub!(/\r\n$/,"\n") if line.length > 4
        end

        line = pad_spaces(line)
        line = fix_kanji_dates(line)
        line = swap_reference_words(line)

        ipc_reference_text += line
      end

      # increase count
      count += 1

      oldmatch = match
    end
  end

  set_prop(:ipc_reference_text, ipc_reference_text)
end

#parse_mailing_dateObject

also sets outputfile, should be separated but screw it


32
33
34
35
36
37
38
39
40
41
42
# File 'lib/oat/parsers.rb', line 32

def parse_mailing_date
  Oat.configuration.outputfile = 'oa_template'
  capture_the(:mailing_date, R_CAPTURE_MAILING_DATE)
  return if @scrapes[:mailing_date].nil?

  #generate names like 'ALP.拒絶理由.20161114.docx'
  Oat.configuration.outputfile =
    "ALP#{Oat.configuration.casenumber}#{get_filename_separator}#{template_name}#{get_filename_separator}#{format_heisei_date('%04u%02u%02u', @scrapes[:mailing_date])}.docx"

  set_prop(:mailing_date, format_heisei_date('%04u/%02u/%02u', @scrapes[:mailing_date]))
end

#parse_note_to_applicantObject


204
205
206
207
208
209
# File 'lib/oat/parsers.rb', line 204

def parse_note_to_applicant
  capture_the(:note_to_applicant, /本願出願時に公開されており、/)
  return if @scrapes[:note_to_applicant].nil?

  set_prop(:note_to_applicant, "\t• Request to the Applicant\r\n\tCitation 1 was already published at the time of filing of the present application and has a common applicant or inventor with the present application.  Citation 1 alone would be a bar to the novelty or inventive step of more than one claim of the present application.\r\n\tBased on this type of citation, appropriately evaluating the invention in advance can be thought to be beneficial to the applicant while creating appropriate claims, as well as helpful to the Examiner for an efficient and accurate examination.  We request that the applicant disclose this type of citation that the applicant is already aware of when filing the application or a request for examination, as well as requesting that the applicant evaluates whether or not the invention for which a patent is sought has patentability based on this type of citation. ")
end

#parse_our_lawyerObject


181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
# File 'lib/oat/parsers.rb', line 181

def parse_our_lawyer
  capture_the(:our_lawyer, /[特許出願人]*代理人[弁理士]*[弁護士]*\p{Zs}+(\S+?)\p{Zs}(\S+?)/)
  return if @scrapes[:our_lawyer].nil?

  # only check last name
  case @scrapes[:our_lawyer][1]
  when '村山'
    set_prop(:our_lawyer, 'Yasuhiko MURAYAMA')
  when '志賀'
    set_prop(:our_lawyer, 'Masatake SHIGA')
  when '佐伯'
    set_prop(:our_lawyer, 'Yoshifumi SAEKI')
  when '渡邊'
    set_prop(:our_lawyer, 'Takashi WATANABE')
  when '実広'
    set_prop(:our_lawyer, 'Shinya JITSUHIRO')
  when '棚井'
    set_prop(:our_lawyer, 'Sumio TANAI')
  else
    set_prop(:our_lawyer, 'Taro TOKKYO')
  end
end

#parse_response_periodObject


177
178
179
# File 'lib/oat/parsers.rb', line 177

def parse_response_period
  set_prop(:response_period, R_RESPONSE_PERIOD =~ @data ? '60 days' : 'three months')
end

#parse_retroactiveObject


81
82
83
84
85
86
# File 'lib/oat/parsers.rb', line 81

def parse_retroactive
  capture_the(:retroactive, R_CAPTURE_RETROACTIVE)
  return if @scrapes[:retroactive].nil?

  set_prop(:retroactive, format_heisei_date("\nFiling Date (Retroactive Date) \t%04u/%02u/%02u\n \n", @scrapes[:retroactive]))
end

#parse_satei_previous_oaObject


57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# File 'lib/oat/parsers.rb', line 57

def parse_satei_previous_oa
  capture_the(:satei_previous_oa, R_CAPTURE_PREVIOUS_OA)
  return if @scrapes[:satei_previous_oa].nil?

  set_prop(:satei_previous_oa, format_heisei_date('%04u/%02u/%02u', @scrapes[:satei_previous_oa]))

  # set "and Amendments"
  set_prop(:and_amendments, R_AND_AMENDMENTS =~ @data ? 'Remarks and Amendments' : 'Remarks')

  # set the reason
  # default
  set_prop(:satei_reasons, 'the reasons')
  r = @data.match(R_SATEI_REASONS)
  m = r.nil? ? '理由' : r[1].gsub!(/\s+/, '')
  set_prop(:satei_reasons, m == '理由' ? 'the reasons' : format_headers(m))
end

#parse_see_listObject


173
174
175
# File 'lib/oat/parsers.rb', line 173

def parse_see_list
  set_prop(:see_list, /引用文献等については引用文献等一覧参照/ =~ @data ? "  \n(See the List of Citations for the cited publications)  \n" : '')
end

#parse_shireisho_appObject


95
96
97
98
99
100
# File 'lib/oat/parsers.rb', line 95

def parse_shireisho_app
  capture_the(:shireisho_num, R_CAPTURE_SHIREISHO_APP)
  return if @scrapes[:shireisho_num].nil?

  set_prop(:shireisho_num, NKF.nkf('-m0Z1 -w', @scrapes[:shireisho_num][1]) + '-' + NKF.nkf('-m0Z1 -w', @scrapes[:shireisho_num][2]))
end

#parse_shireisho_codeObject


102
103
104
105
106
107
# File 'lib/oat/parsers.rb', line 102

def parse_shireisho_code
  capture_the(:scode, R_CAPTURE_SHIREISHO_CODE)
  return if @scrapes[:scode].nil?

  set_prop(:scode, NKF.nkf('-m0Z1 -w', @scrapes[:scode][1]) + ' ' + NKF.nkf('-m0Z1 -w', @scrapes[:scode][2]))
end

#replace_CI_vars(result, m, i) ⇒ Object


411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
# File 'lib/oat/parsers.rb', line 411

def replace_CI_vars(result, m, i)
  i += 1
  if (result =~ /\\\\CI#{i}/)
    if m.nil?
      nums = "the above citations"
    else
      nums = format_headers("引用文献" + m)
    end

    if ((nums =~ /to /) or (nums =~ /and/) or (nums =~ /citations/))
      result = result.gsub("\\\\CI#{i}", "inventions disclosed in #{nums}") 
      result = replace_ISARE(result, i, true)
    else
      result = result.gsub("\\\\CI#{i}", "invention disclosed in #{nums}") 
      result = replace_ISARE(result, i)
    end
  end

  return result
end

#replace_common_phrases(tex, options = {}) ⇒ Object


728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
# File 'lib/oat/parsers.rb', line 728

def replace_common_phrases(tex, options = {})
  defaults = {  replace_toh: false,
                ignore_toh: true
  }
  options = defaults.merge(options)

  tex = NKF.nkf('-m0Z1 -w', tex)
  tex = swap_words(tex)
  tex.gsub!('', '') if options[:ignore_toh]
  tex.gsub!('', ', etc.') if options[:replace_toh]

  # strip abberant \r characters
  tex.gsub!("\r", '')

  tex = format_number_listing(tex)
end

#replace_I_vars(result, m, x) ⇒ Object

the following three methods could be refactored, almost identical


383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
# File 'lib/oat/parsers.rb', line 383

def replace_I_vars(result, m, x)
  x += 1

  if (result =~ /\\\\I#{x}/)
    if m.nil?
      nums = "the above claims"
    else
      nums = format_headers("請求項" + m)
    end

    if ((nums =~ /to /) or (nums =~ /and/) or (nums =~ /claims/))
      result = result.gsub("\\\\I#{x}", "inventions according to #{nums}") 
      result = replace_ISARE(result, x, true)
    else
      result = result.gsub("\\\\I#{x}", "invention according to #{nums}") 
      result = replace_ISARE(result, x)
    end
  end

  format_templated_english!(result)

  return result
end

#replace_ISARE(result, i, plural = false) ⇒ Object


484
485
486
# File 'lib/oat/parsers.rb', line 484

def replace_ISARE(result, i, plural = false)
  return plural ? result.gsub("\\\\ISARE#{i}", 'are') : result.gsub("\\\\ISARE#{i}", 'is')
end

#replace_JL_vars(result, m, i) ⇒ Object


464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
# File 'lib/oat/parsers.rb', line 464

def replace_JL_vars(result, m, i)
  i += 1
  if (result =~ /\\\\JL#{i}/)
    nums = handle_jpl(m)

    #next line is a cludge to remove the 'Number' part of a['text] if there is no number
    nums.gsub!(/, Number $/, '') if nums =~ /, Number $/

    if ((nums =~ /to /) or (nums =~ /and/))
      result = result.gsub("\\\\JL#{i}", nums) 
      result = replace_ISARE(result, i, true)
    else
      result = result.gsub("\\\\JL#{i}", nums) 
      result = replace_ISARE(result, i)
    end
  end

  return result
end

#replace_P_vars(result, m, i) ⇒ Object


432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
# File 'lib/oat/parsers.rb', line 432

def replace_P_vars(result, m, i)
  i += 1
  if (result =~ /\\\\P#{i}/)
    nums = format_headers(m)
    if ((nums =~ /to /) or (nums =~ /and/))
      result = result.gsub("\\\\P#{i}", nums) 
      result = replace_ISARE(result, i, true)
    else
      result = result.gsub("\\\\P#{i}", nums) 
      result = replace_ISARE(result, i)
    end
  end

  return result
end

#replace_PC_vars(result, m, i) ⇒ Object


448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
# File 'lib/oat/parsers.rb', line 448

def replace_PC_vars(result, m, i)
  i += 1
  if (result =~ /\\\\PC#{i}/)
    nums = format_headers("請求項" + m)
    if ((nums =~ /to /) or (nums =~ /and/))
      result = result.gsub("\\\\PC#{i}", nums) 
      result = replace_ISARE(result, i, true)
    else
      result = result.gsub("\\\\PC#{i}", nums) 
      result = replace_ISARE(result, i)
    end
  end

  return result
end

#swap_reference_words(tex) ⇒ Object


759
760
761
762
763
764
765
# File 'lib/oat/parsers.rb', line 759

def swap_reference_words(tex)
  #very rarely works since this word is usually split over multiple lines
  tex.gsub!(/インターネット/m, 'Internet')
  tex.gsub!(/米国/m, 'United States')

  tex
end

#swap_words(tex) ⇒ Object

do actual swapping of japanese and english words


768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
# File 'lib/oat/parsers.rb', line 768

def swap_words(tex)
  tex.gsub!('', ',')
  tex.gsub!('', ',')
  tex.gsub!('拡大先願', 'Lack of Novelty Due to Secret Prior Art')
  tex.gsub!('ないし', 'to')
  tex.gsub!('について', '')
  tex.gsub!('のいずれか', 'any one of')
  tex.gsub!('及び', ',')
  tex.gsub!('および', ',')
  tex.gsub!('進歩性', 'Inventive Step')
  tex.gsub!('新規事項', 'New Matter')
  tex.gsub!('実施可能要件', 'Enablement Requirements')
  tex.gsub!('産業上の利用可能性', 'Industrial Applicability')
  tex.gsub!('特許請求の範囲の記載に関する委任省令要件', 'Ministerial Ordinance Requirements Regarding Claims')
  tex.gsub!('発明の単一性', 'Unity of Invention')
  tex.gsub!('明確性', 'Clarity')
  tex.gsub!('サポート要件', 'Support Requirements')
  tex.gsub!('新規性', 'Novelty')
  tex.gsub!('同日出願', 'Applications Filed on Same Day')
  tex.gsub!(/請\p{Z}*求\p{Z}*項/, 'Claim')
  tex.gsub!('引用文献', 'Citation')
  tex.gsub!(/引\p{Z}*用\p{Z}*例/, 'Citation')
  tex.gsub!(/実\p{Z}*施\p{Z}*例/, 'Example')
  tex.gsub!(/理\p{Z}*由/, 'Reason')
  tex.gsub!(/先\p{Z}*願/, 'Prior Application')
  tex.gsub!('', 'to')
  tex.gsub!('-', 'to')
  tex.gsub!('', 'to')
  tex.gsub!('乃至', 'to')
  tex.gsub!('理由', 'Reason')

  # match 備考:
  tex.gsub!('備考', 'Notes')

  tex
end