Class: Oat::Document

Inherits:
Object
  • Object
show all
Includes:
Helpers, Parsers
Defined in:
lib/oat/document.rb

Overview

base class that all OA types extend

Constant Summary

Constants included from Formats

Formats::CIT_MISS, Formats::CIT_SIMPLE, Formats::CIT_WITH_PRIME, Formats::CIT_WITH_PRIME_PCT, Formats::FINALWML, Formats::HEADERS_FMT, Formats::STOPSTARTP, Formats::STOPSTARTTAB

Constants included from OaRegexes

OaRegexes::R_AND_AMENDMENTS, OaRegexes::R_ARTICLE_SECTION, OaRegexes::R_CAPTURE_AMENDMENTS_DATE, OaRegexes::R_CAPTURE_APPEAL_APP_NO, OaRegexes::R_CAPTURE_APPEAL_DRAFTED, OaRegexes::R_CAPTURE_APPEAL_NO, OaRegexes::R_CAPTURE_APPEAL_TARO, OaRegexes::R_CAPTURE_APP_NO, OaRegexes::R_CAPTURE_DRAFTED, OaRegexes::R_CAPTURE_MAILING_DATE, OaRegexes::R_CAPTURE_NO_REJECT_CLAIMS, OaRegexes::R_CAPTURE_PREVIOUS_OA, OaRegexes::R_CAPTURE_RETROACTIVE, OaRegexes::R_CAPTURE_SHIREISHO_APP, OaRegexes::R_CAPTURE_SHIREISHO_CODE, OaRegexes::R_CAPTURE_TARO, OaRegexes::R_CITATIONS_START, OaRegexes::R_HEADER_REASONS, OaRegexes::R_HEADER_SEPARATOR, OaRegexes::R_HEADER_SEPARATOR_DETECT, OaRegexes::R_HEADER_TYPES, OaRegexes::R_JPL_DETECT, OaRegexes::R_OYOBI, OaRegexes::R_REJ_TYPES, OaRegexes::R_RESPONSE_PERIOD, OaRegexes::R_SATEI_REASONS

Constants included from Constants

Constants::CITATIONS_FILE, Constants::FILENAME_DASH_SEPARATOR, Constants::FILENAME_DOT_SEPARATOR

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods included from Parsers

#analyze_blobs, #check_for_headers, #check_for_templatable_portions, #convert_possible_heisei, #convert_pub_no, #fix_kanji_dates, #format_headers, #format_invention_according_to, #format_number_listing, #format_templated_english!, #get_filename_separator, #get_text_from_blobs, #handle_jpl, #match_template_regexes, #mistaken_header?, #pad_spaces, #parse_amendments_date, #parse_app_no, #parse_appeal_drafted, #parse_appeal_examiner, #parse_appeal_no, #parse_articles, #parse_citations, #parse_currently_known, #parse_drafted, #parse_examiner, #parse_final_oa, #parse_headers, #parse_ipc, #parse_ipc_references, #parse_mailing_date, #parse_note_to_applicant, #parse_our_lawyer, #parse_response_period, #parse_retroactive, #parse_satei_previous_oa, #parse_see_list, #parse_shireisho_app, #parse_shireisho_code, #replace_CI_vars, #replace_ISARE, #replace_I_vars, #replace_JL_vars, #replace_PC_vars, #replace_P_vars, #replace_common_phrases, #swap_reference_words, #swap_words

Methods included from Helpers

#capture_the, #format_heisei_date, #set_prop, #squish!

Constructor Details

#initialize(data) ⇒ Document

Returns a new instance of Document.


106
107
108
109
110
111
112
# File 'lib/oat/document.rb', line 106

def initialize(data)
  @data = data

  @props = {}
  @scrapes = {}
  @blobs = []
end

Instance Attribute Details

#outputfileObject

Returns the value of attribute outputfile


100
101
102
# File 'lib/oat/document.rb', line 100

def outputfile
  @outputfile
end

#propsObject

end class methods


99
100
101
# File 'lib/oat/document.rb', line 99

def props
  @props
end

#reasons=(value) ⇒ Object (writeonly)

Sets the attribute reasons

Parameters:

  • value

    the value to set the attribute reasons to.


104
105
106
# File 'lib/oat/document.rb', line 104

def reasons=(value)
  @reasons = value
end

#templatables=(value) ⇒ Object (writeonly)

Sets the attribute templatables

Parameters:

  • value

    the value to set the attribute templatables to.


103
104
105
# File 'lib/oat/document.rb', line 103

def templatables=(value)
  @templatables = value
end

#templateObject

Returns the value of attribute template


101
102
103
# File 'lib/oat/document.rb', line 101

def template
  @template
end

#template_name=(value) ⇒ Object (writeonly)

Sets the attribute template_name

Parameters:

  • value

    the value to set the attribute template_name to.


102
103
104
# File 'lib/oat/document.rb', line 102

def template_name=(value)
  @template_name = value
end

Class Method Details

.convert_into_blobs(tdata, removenewlines = false) ⇒ Object


47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# File 'lib/oat/document.rb', line 47

def convert_into_blobs(tdata, removenewlines = false)
  blobtext = ''
  blobs = []

  tdata.each_line do |line|
    next if part_of_template?(line)

    blobtext += line
    
    #if the first character is an indent (tab, etc.) 
    if end_of_blob?(line)
      blobs << create_actual_blob(blobs.count, blobtext, removenewlines) 
      blobtext = ''
    end
  end

  if blobtext.length > 1 #catch the last blob if it's not detected above
    blobs << create_actual_blob(blobs.count, blobtext, removenewlines) 
  end

  #blobs.each do |b|
  #  puts "#{b.inspect}"
  #end

  blobs.dup
end

.create_actual_blob(count, text, removenewlines) ⇒ Object


74
75
76
# File 'lib/oat/document.rb', line 74

def create_actual_blob(count, text, removenewlines)
  (removenewlines ? Oat::Blob.new(count, text.gsub(/\n(\p{L})/, '\1').gsub(/\n\n+$/, "").gsub(/^\n+/, "")) : Oat::Blob.new(count, text))
end

.end_of_blob?(line) ⇒ Boolean

check if this line makes the end of a blob previousnewline is whether the previous line was “n”

Returns:

  • (Boolean)

88
89
90
91
92
93
94
95
96
# File 'lib/oat/document.rb', line 88

def end_of_blob?(line)
  #return false if line == "\n"
  #remove trailing whitespace
  return true if  (line =~ /2項の規定により特許を受けることができない。\n$/) #special case where reason ends at 36 lines
  return false if (line[0] =~ /[・●]/ and (line.gsub(/[\p{Zs}]*$/, '').length == 36)) #special case where header lines are 2 lines
  return true if (line.gsub(/\p{Z}*$/, '').length < 36)
  return true if (line[0] =~ /[・●]/)
  return false
end

.find_actual_content(data) ⇒ Object

find the body of the OA


31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# File 'lib/oat/document.rb', line 31

def find_actual_content(data)
  possibilities = []
  #try multiple regexes and take the most accurate (shortest) result
  [/(?<=記)\p{Z}*(引用文献等については引用文献等一覧参照)(.*?)(?:引\p{Z}?用\p{Z}?文\p{Z}?献\p{Z}?等\p{Z}?一\p{Z}?覧|───────|-----------)/mi,
   /\n\p{Z}*?(?:備考|記)\p{Z}*?\n(.*?)(?:引\p{Z}?用\p{Z}?文\p{Z}?献\p{Z}?等\p{Z}?一\p{Z}?覧|───────|この通知に関するお問い合わせがございましたら|--------)/mi,
   /\p{Z}理\p{Z}*由\p{Z}*$\n(.*?)(?:^\p{Z}最後の拒絶理由通知とする理由|<拒絶の理由を発見しない請求項|この拒絶理由通知の内容に関するお問い合わせ)/mi,
   /\p{Z}理\w+?由\p{Z}*$\n(.*)この拒絶理由通知の内容に関するお問い合わせがございましたら/mi,
   /根拠\n?が見いだせません。\n\n備考\n(.*)(?:引用文献等一覧|───────|--------)/mi,
   /。\n\n\n?\p{Z}*(?:備考|記)\p{Z}*(?:(引用文献等については引用文献等一覧参照))?\n(.*?)(?:引用文献等一覧|───────|--------)/mi,
   /。\n\n\n?\p{Z}*理由\p{Z}*(.*?)(?:引用文献等一覧(?!参照)\p{Z}*$|───────|--------)/mi
  ].each do |r|
    possibilities << data.gsub(%r{</?[^>]+?>}, '').gsub("\r\n", "\n").match(r)
  end
  possibilities.compact.reject{|x| x[0].length  < 150 }.min{ |a,b| a[0].length <=> b[0].length } 
end

.get_blobs_from_data(data, removenewlines = false, fulldocument = false) ⇒ Object


19
20
21
22
23
24
25
26
27
28
# File 'lib/oat/document.rb', line 19

def get_blobs_from_data(data, removenewlines = false, fulldocument = false)
  if fulldocument
    blobs = convert_into_blobs(data, removenewlines)
  elsif m = find_actual_content(data)
    # gsub to strip HTML tags from japanese text
    tdata = m[1].nil? ? data : m[1] 
    blobs = convert_into_blobs(tdata.gsub("\r\n", "\n"), removenewlines)
  end
  blobs.nil? ? [] : blobs
end

.part_of_template?(line) ⇒ Boolean

check for lines that are already in the .docx template file

Returns:

  • (Boolean)

79
80
81
82
83
84
# File 'lib/oat/document.rb', line 79

def part_of_template?(line)
  return true if (line == "\n")
  return true if (/^\p{Z}+<引用文献等一覧>/ =~ line) 
  return true if (/引用文献等一覧参照)/ =~ line)
  return false
end

Instance Method Details

#scanObject


114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# File 'lib/oat/document.rb', line 114

def scan
  set_prop(:citaton_list, '')
  #required for calculating filenames
  capture_the(:mailing_no, /発送番号\p{Z}+(\S+)/)
  capture_the(:ref_no, /整理番号\p{Z}+(\S+)/)
  capture_the(:ipc_list, /調査した分野$/)
  set_prop(:ipc_reference_text, '')

  parse_mailing_date 
  parse_examiner
  parse_app_no
  parse_drafted
  parse_our_lawyer
  parse_response_period
  parse_see_list
  parse_final_oa
  parse_amendments_date
  parse_satei_previous_oa
  parse_articles
  parse_currently_known
  parse_citations
  parse_ipc
  parse_appeal_examiner
  parse_appeal_drafted
  parse_appeal_no
  parse_retroactive
  parse_note_to_applicant
  parse_shireisho_app
  parse_shireisho_code

  parse_headers

  @props
end