Module: HexaPDF::Filter::Predictor

Defined in:
lib/hexapdf/filter/predictor.rb

Overview

Implements the predictor for the LZWDecode and FlateDecode filters.

Although a predictor isn’t a full PDF filter, it is implemented as one in HexaPDF terms to allow easy chaining of the predictor.

See: PDF2.0 s7.4.4.3, s7.4.4.4, partners.adobe.com/public/developer/en/tiff/TIFF6.pdf (p64f), www.w3.org/TR/PNG-Filters.html

– Implemenation notes:

The TIFF encoding and decoding methods are the same, except for the innermost loop. The way it is implemented is probably not the best but it avoids duplicate code.

The situation is similar with PNG encoding and decoding. ++

Constant Summary collapse

PREDICTOR_PNG_NONE =

:nodoc:

0
PREDICTOR_PNG_SUB =

:nodoc:

1
PREDICTOR_PNG_UP =

:nodoc:

2
PREDICTOR_PNG_AVERAGE =

:nodoc:

3
PREDICTOR_PNG_PAETH =

:nodoc:

4
PREDICTOR_PNG_OPTIMUM =

:nodoc:

5

Class Method Summary collapse

Class Method Details

.decoder(source, options) ⇒ Object

See HexaPDF::Filter



70
71
72
# File 'lib/hexapdf/filter/predictor.rb', line 70

def self.decoder(source, options)
  execute(:decoder, source, options)
end

.encoder(source, options) ⇒ Object

See HexaPDF::Filter



75
76
77
# File 'lib/hexapdf/filter/predictor.rb', line 75

def self.encoder(source, options)
  execute(:encoder, source, options)
end

.execute(type, source, options) ⇒ Object

:nodoc:



79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# File 'lib/hexapdf/filter/predictor.rb', line 79

def self.execute(type, source, options) # :nodoc:
  return source if !options[:Predictor] || options[:Predictor] == 1

  colors = options[:Colors] || 1
  bits_per_component = options[:BitsPerComponent] || 8
  columns = options[:Columns] || 1

  if options[:Predictor] == 2
    tiff_execute(type, source, colors, bits_per_component, columns)
  elsif options[:Predictor] >= 10
    png_execute(type, source, options[:Predictor], colors, bits_per_component, columns)
  else
    raise HexaPDF::InvalidPDFObjectError, "Predictor key is invalid: #{options[:Predictor]}"
  end
end

.png_execute(type, source, predictor, colors, bits_per_component, columns) ⇒ Object

:nodoc:



150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
# File 'lib/hexapdf/filter/predictor.rb', line 150

def self.png_execute(type, source, predictor, colors, bits_per_component, columns) # :nodoc:
  Fiber.new do
    bytes_per_pixel = (bits_per_component * colors + 7) / 8
    bytes_per_row = (columns * bits_per_component * colors + 7) / 8
    bytes_per_row += 1 if type == :decoder

    # Only on encoding: Arbitrarily choose a predictor if we should choose the optimum
    predictor = predictor == 15 ? PREDICTOR_PNG_PAETH : predictor - 10

    data = ''.b
    last_line = "\0".b * (bytes_per_row + 1)
    pos = 0

    decode_row = lambda do |result|
      line = data[pos + 1, bytes_per_row - 1]

      case data.getbyte(pos)
      when PREDICTOR_PNG_SUB
        bytes_per_pixel.upto(bytes_per_row - 2) do |i|
          line.setbyte(i, (line.getbyte(i) + line.getbyte(i - bytes_per_pixel)) % 256)
        end
      when PREDICTOR_PNG_UP
        0.upto(bytes_per_row - 2) do |i|
          line.setbyte(i, (line.getbyte(i) + last_line.getbyte(i)) % 256)
        end
      when PREDICTOR_PNG_AVERAGE
        0.upto(bytes_per_row - 2) do |i|
          a = i < bytes_per_pixel ? 0 : line.getbyte(i - bytes_per_pixel)
          line.setbyte(i, (line.getbyte(i) + ((a + last_line.getbyte(i)) >> 1)) % 256)
        end
      when PREDICTOR_PNG_PAETH
        0.upto(bytes_per_row - 2) do |i|
          a = i < bytes_per_pixel ? 0 : line.getbyte(i - bytes_per_pixel)
          b = last_line.getbyte(i)
          c = i < bytes_per_pixel ? 0 : last_line.getbyte(i - bytes_per_pixel)

          point = a + b - c
          pa = (point - a).abs
          pb = (point - b).abs
          pc = (point - c).abs

          point = ((pa <= pb && pa <= pc) ? a : (pb <= pc ? b : c))

          line.setbyte(i, (line.getbyte(i) + point) % 256)
        end
      end

      result << line
      last_line = line
    end

    encode_row = lambda do |result|
      line = predictor.chr.force_encoding(Encoding::BINARY) << data[pos, bytes_per_row]
      next_last_line = line.dup

      case predictor
      when PREDICTOR_PNG_SUB
        bytes_per_row.downto(bytes_per_pixel + 1) do |i|
          line.setbyte(i, (line.getbyte(i) - line.getbyte(i - bytes_per_pixel)) % 256)
        end
      when PREDICTOR_PNG_UP
        bytes_per_row.downto(1) do |i|
          line.setbyte(i, (line.getbyte(i) - last_line.getbyte(i)) % 256)
        end
      when PREDICTOR_PNG_AVERAGE
        bytes_per_row.downto(1) do |i|
          a = i <= bytes_per_pixel ? 0 : line.getbyte(i - bytes_per_pixel)
          line.setbyte(i, (line.getbyte(i) - ((a + last_line.getbyte(i)) >> 1)) % 256)
        end
      when PREDICTOR_PNG_PAETH
        bytes_per_row.downto(1) do |i|
          a = i <= bytes_per_pixel ? 0 : line.getbyte(i - bytes_per_pixel)
          b = last_line.getbyte(i)
          c = i <= bytes_per_pixel ? 0 : last_line.getbyte(i - bytes_per_pixel)

          point = a + b - c
          pa = (point - a).abs
          pb = (point - b).abs
          pc = (point - c).abs

          point = ((pa <= pb && pa <= pc) ? a : (pb <= pc ? b : c))

          line.setbyte(i, (line.getbyte(i) - point) % 256)
        end
      end

      result << line
      last_line = next_last_line
    end

    row_action = (type == :decoder ? decode_row : encode_row)

    while source.alive? && (new_data = source.resume)
      data.slice!(0...pos)
      data << new_data

      result = ''.b
      pos = 0

      while pos + bytes_per_row <= data.length
        row_action.call(result)
        pos += bytes_per_row
      end

      Fiber.yield(result) unless result.empty?
    end

    if pos != data.length && GlobalConfiguration['filter.predictor.strict']
      raise FilterError, "Data is missing for PNG predictor"
    elsif pos != data.length && data.length != 1
      result = ''.b
      bytes_per_row = data.length - pos
      row_action.call(result)
      result
    end
  end
end

.tiff_execute(type, source, colors, bits_per_component, columns) ⇒ Object

:nodoc:



95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# File 'lib/hexapdf/filter/predictor.rb', line 95

def self.tiff_execute(type, source, colors, bits_per_component, columns) # :nodoc:
  Fiber.new do
    bytes_per_row = (columns * bits_per_component * colors + 7) / 8
    mask = (1 << bits_per_component) - 1

    data = ''.b
    writer = HexaPDF::Utils::BitStreamWriter.new
    pos = 0

    decode_row = lambda do |result, reader|
      last_components = [0] * colors
      (columns * colors).times do |i|
        i %= colors
        tmp = (reader.read(bits_per_component) + last_components[i]) & mask
        result << writer.write(tmp, bits_per_component)
        last_components[i] = tmp
      end
      result << writer.finalize
    end

    encode_row = lambda do |result, reader|
      last_components = [0] * colors
      (columns * colors).times do |i|
        i %= colors
        tmp = reader.read(bits_per_component)
        result << writer.write((tmp - last_components[i]) & mask, bits_per_component)
        last_components[i] = tmp
      end
      result << writer.finalize
    end

    row_action = (type == :decoder ? decode_row : encode_row)

    while source.alive? && (new_data = source.resume)
      data.slice!(0...pos)
      data << new_data

      result = ''.b
      pos = 0

      while pos + bytes_per_row <= data.length
        reader = HexaPDF::Utils::BitStreamReader.new(data[pos, bytes_per_row])
        row_action.call(result, reader)
        pos += bytes_per_row
      end

      Fiber.yield(result) unless result.empty?
    end

    unless pos == data.length
      raise FilterError, "Data is missing for TIFF predictor"
    end
  end
end