Class: Bliss::Parser

Inherits:
Object
  • Object
show all
Defined in:
lib/bliss/parser.rb

Instance Method Summary collapse

Constructor Details

#initialize(path, filepath = nil) ⇒ Parser

Returns a new instance of Parser.



3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# File 'lib/bliss/parser.rb', line 3

def initialize(path, filepath=nil)
  @path = path
  
  @parser_machine = Bliss::ParserMachine.new

  @push_parser = Nokogiri::XML::SAX::PushParser.new(@parser_machine)

  if filepath
    @file = File.new(filepath, 'w')
    @file.autoclose = false
  end

  @root = nil
  @nodes = nil
  @formats = []

  on_root {}
end

Instance Method Details

#add_format(format) ⇒ Object



22
23
24
# File 'lib/bliss/parser.rb', line 22

def add_format(format)
  @formats.push(format)
end

#check_unhandled_bytesObject



86
87
88
89
90
91
92
93
# File 'lib/bliss/parser.rb', line 86

def check_unhandled_bytes
  if @unhandled_bytes > @max_unhandled_bytes
    if @on_max_unhandled_bytes
      @on_max_unhandled_bytes.call
      @on_max_unhandled_bytes = nil
    end
  end
end

#check_unhandled_bytes?Boolean

Returns:

  • (Boolean)


102
103
104
# File 'lib/bliss/parser.rb', line 102

def check_unhandled_bytes?
  @max_unhandled_bytes ? true : false
end

#closeObject



110
111
112
# File 'lib/bliss/parser.rb', line 110

def close
  @parser_machine.close
end

#exceeded?Boolean

Returns:

  • (Boolean)


95
96
97
98
99
100
# File 'lib/bliss/parser.rb', line 95

def exceeded?
  return false if not check_unhandled_bytes?
  if @unhandled_bytes > @max_unhandled_bytes
    return true
  end
end

#file_closeObject



215
216
217
218
219
# File 'lib/bliss/parser.rb', line 215

def file_close
  if @file
    @file.close
  end
end

#formats_detailsObject



30
31
32
33
34
# File 'lib/bliss/parser.rb', line 30

def formats_details
  @formats.each do |format|
    puts format.details.inspect
  end
end

#handle_wait_tag_close(chunk) ⇒ Object



199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
# File 'lib/bliss/parser.rb', line 199

def handle_wait_tag_close(chunk)
  begin
    last_index = chunk.index(@wait_tag_close)
    if last_index
      last_index += 4
      @file << chunk[0..last_index]
      @file << "</#{self.root}>" # TODO set this by using actual depth, so all tags get closed
      secure_close
    else
      @file << chunk
    end
  rescue
    secure_close
  end
end

#load_constraints_on_parser_machineObject



26
27
28
# File 'lib/bliss/parser.rb', line 26

def load_constraints_on_parser_machine
  @parser_machine.constraints(@formats.collect(&:constraints).flatten)
end

#on_max_unhandled_bytes(bytes, &block) ⇒ Object



67
68
69
70
# File 'lib/bliss/parser.rb', line 67

def on_max_unhandled_bytes(bytes, &block)
  @max_unhandled_bytes = bytes
  @on_max_unhandled_bytes = block
end

#on_root(&block) ⇒ Object

deprecate this, use depth at on_tag_open or on_tag_close instead



37
38
39
40
41
42
43
# File 'lib/bliss/parser.rb', line 37

def on_root(&block)
  return false if not block.is_a? Proc
  @parser_machine.on_root { |root|
    @root = root
    block.call(root)
  }
end

#on_tag_close(element = '.', &block) ⇒ Object



58
59
60
61
62
63
64
65
# File 'lib/bliss/parser.rb', line 58

def on_tag_close(element='.', &block)
  overriden_block = Proc.new { |hash, depth|
    reset_unhandled_bytes

    block.call(hash, depth)
  }
  @parser_machine.on_tag_close(element, overriden_block)
end

#on_tag_open(element = '.', &block) ⇒ Object



45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/bliss/parser.rb', line 45

def on_tag_open(element='.', &block)
  return false if block.arity != 1

  overriden_block = Proc.new { |depth|
    if not element == 'default'
      reset_unhandled_bytes
    end

    block.call(depth)
  }
  @parser_machine.on_tag_open(element, overriden_block)
end

#on_timeout(seconds, &block) ⇒ Object



72
73
74
75
# File 'lib/bliss/parser.rb', line 72

def on_timeout(seconds, &block)
  @timeout = seconds
  @on_timeout = block
end

#parseObject



114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
# File 'lib/bliss/parser.rb', line 114

def parse
  reset_unhandled_bytes if check_unhandled_bytes?
  load_constraints_on_parser_machine

  EM.run do
    http = nil
    if @timeout
      http = EM::HttpRequest.new(@path, :connect_timeout => @timeout, :inactivity_timeout => @timeout).get
    else
      http = EM::HttpRequest.new(@path).get
    end
    
    @autodetect_compression = true
    compression = :none
    if @autodetect_compression
      http.headers do
        if (/^attachment.+filename.+\.gz/i === http.response_header['CONTENT_DISPOSITION']) or http.response_header.compressed? or ["application/octet-stream", "application/x-gzip"].include? http.response_header['CONTENT_TYPE']
          @zstream = Zlib::Inflate.new(Zlib::MAX_WBITS+16)
          compression = :gzip
        end
      end
    end
    
    http.stream { |chunk|
      if chunk
        chunk.force_encoding('UTF-8')

        if check_unhandled_bytes?
          @unhandled_bytes += chunk.length
          check_unhandled_bytes
        end
        if not @parser_machine.is_closed?
          begin
            case compression
              when :gzip
                chunk = @zstream.inflate(chunk)
                chunk.force_encoding('UTF-8')
            end
            @push_parser << chunk
            if @file
              @file << chunk
            end
          rescue Nokogiri::XML::SyntaxError => e
            #puts 'encoding error'
            if e.message.include?("encoding")
              raise Bliss::EncodingError, "Wrong encoding given"
            end
          end

        else
          if exceeded?
            #puts 'exceeded'
            secure_close
          else
            if @file
              if @wait_tag_close
                #puts 'handle wait'
                handle_wait_tag_close(chunk) #if @wait_tag_close
              else
                #puts 'secure close'
                secure_close
              end
            end
          end
        end
      end
    }
    http.errback {
      #puts 'errback'
      if @timeout
        @on_timeout.call
      end
      secure_close
    }
    http.callback {
      #if @file
      #  @file.close
      #end
      #EM.stop
      secure_close
    }
  end
  file_close
end

#reset_unhandled_bytesObject



81
82
83
84
# File 'lib/bliss/parser.rb', line 81

def reset_unhandled_bytes
  return false if not check_unhandled_bytes?
  @unhandled_bytes = 0
end

#rootObject



106
107
108
# File 'lib/bliss/parser.rb', line 106

def root
  @root
end

#secure_closeObject



221
222
223
224
225
226
227
228
229
230
231
# File 'lib/bliss/parser.rb', line 221

def secure_close
  begin
    if @zstream
      @zstream.close
    end
  rescue
  ensure
    EM.stop
    #puts "Closed secure."
  end
end

#wait_tag_close(element) ⇒ Object



77
78
79
# File 'lib/bliss/parser.rb', line 77

def wait_tag_close(element)
  @wait_tag_close = "</#{element}>"
end