Class: PDF::Reader::PageLayout

Inherits:
Object
  • Object
show all
Defined in:
lib/pdf/reader/page_layout.rb

Overview

Takes a collection of TextRun objects and renders them into a single string that best approximates the way they’d appear on a render PDF page.

media box should be a 4 number array that describes the dimensions of the page to be rendered as described by the page’s MediaBox attribute

Constant Summary collapse

DEFAULT_FONT_SIZE =

: Numeric

12

Instance Method Summary collapse

Constructor Details

#initialize(runs, mediabox) ⇒ PageLayout

: (Array, Array | PDF::Reader::Rectangle) -> void



20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# File 'lib/pdf/reader/page_layout.rb', line 20

def initialize(runs, mediabox)
  # mediabox is a 4-element array for now, but it'd be nice to switch to a
  # PDF::Reader::Rectangle at some point
  PDF::Reader::Error.validate_not_nil(mediabox, "mediabox")

  @mediabox = process_mediabox(mediabox) #: PDF::Reader::Rectangle
  @runs = runs #: Array[PDF::Reader::TextRun]
  @mean_font_size   = mean(@runs.map(&:font_size)) || DEFAULT_FONT_SIZE #: Numeric
  @mean_font_size = DEFAULT_FONT_SIZE if @mean_font_size == 0
  @median_glyph_width = median(@runs.map(&:mean_character_width)) || 0 #: Numeric
  @x_offset = @runs.map(&:x).sort.first || 0 #: Numeric
  lowest_y = @runs.map(&:y).sort.first || 0 #: Numeric
  @y_offset = lowest_y > 0 ? 0 : lowest_y #: Numeric
  @row_count = nil #: Numeric | nil
  @col_count = nil #: Numeric | nil
  @row_multiplier = nil #: Numeric | nil
  @col_multiplier = nil #: Numeric | nil
end

Instance Method Details

#to_sObject

: () -> String



40
41
42
43
44
45
46
47
48
49
50
51
52
53
# File 'lib/pdf/reader/page_layout.rb', line 40

def to_s
  return "" if @runs.empty?
  return "" if row_count == 0

  page = row_count.times.map { |i| " " * col_count }
  @runs.each do |run|
    x_pos = ((run.x - @x_offset) / col_multiplier).round
    y_pos = row_count - ((run.y - @y_offset) / row_multiplier).round
    if y_pos <= row_count && y_pos >= 0 && x_pos <= col_count && x_pos >= 0
      local_string_insert(page[y_pos-1], run.text, x_pos)
    end
  end
  interesting_rows(page).map(&:rstrip).join("\n")
end