Class: HocrTurtletext::Textangle

Inherits:
Object
  • Object
show all
Defined in:
lib/hocr_turtletext/textangle.rb

Overview

A DSL syntax for text extraction. Modified from the original at github.com/tardate/pdf-reader-turtletext

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(hocr_turtletext_reader, &block) ⇒ Textangle

hocr_turtletext_reader is a HocrTurtletext::Reader



8
9
10
11
12
13
14
15
16
17
18
# File 'lib/hocr_turtletext/textangle.rb', line 8

def initialize(hocr_turtletext_reader,&block)
  @reader = hocr_turtletext_reader
  @inclusive = false
  if block_given?
    if block.arity == 1
      yield self
    else
      instance_eval &block
    end
  end
end

Instance Attribute Details

#above(*args) ⇒ Object



40
41
42
43
44
45
# File 'lib/hocr_turtletext/textangle.rb', line 40

def above(*args)
  if value = args.first
    @above = value
  end
  @above
end

#below(*args) ⇒ Object



48
49
50
51
52
53
# File 'lib/hocr_turtletext/textangle.rb', line 48

def below(*args)
  if value = args.first
    @below = value
  end
  @below
end

#inclusive(*args) ⇒ Object



22
23
24
25
26
27
# File 'lib/hocr_turtletext/textangle.rb', line 22

def inclusive(*args)
  if value = args.first
    @inclusive = value
  end
  @inclusive
end

#left_of(*args) ⇒ Object



56
57
58
59
60
61
# File 'lib/hocr_turtletext/textangle.rb', line 56

def left_of(*args)
  if value = args.first
    @left_of = value
  end
  @left_of
end

#readerObject (readonly)

Returns the value of attribute reader.



5
6
7
# File 'lib/hocr_turtletext/textangle.rb', line 5

def reader
  @reader
end

#right_of(*args) ⇒ Object



64
65
66
67
68
69
# File 'lib/hocr_turtletext/textangle.rb', line 64

def right_of(*args)
  if value = args.first
    @right_of = value
  end
  @right_of
end

Instance Method Details

#exclusive!Object

Command: sets +inclusive false



35
36
37
# File 'lib/hocr_turtletext/textangle.rb', line 35

def exclusive!
  @inclusive = false
end

#inclusive!Object

Command: sets +inclusive true



30
31
32
# File 'lib/hocr_turtletext/textangle.rb', line 30

def inclusive!
  @inclusive = true
end

#textObject

Returns the text array found within the defined region. Each line of text is an array of the separate text elements found on that line.

[["first line first text", "first line last text"],["second line text"]]


74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# File 'lib/hocr_turtletext/textangle.rb', line 74

def text
  return unless reader

  xmin = if right_of
           if [Integer,Float].include?(right_of.class)
             right_of
           elsif xy = reader.text_position(right_of)
             xy[:x]
           end
         else
           0
         end
  xmax = if left_of
           if [Integer,Float].include?(left_of.class)
             left_of
           elsif xy = reader.text_position(left_of)
             xy[:x]
           end
         else
           99999 # TODO: figure out the actual limit?
         end

  ymax = if above
           if [Integer,Float].include?(above.class)
             above
           elsif xy = reader.text_position(above)
             xy[:y]
           end
         else
           99999
         end
  ymin = if below
           if [Integer,Float].include?(below.class)
             below
           elsif xy = reader.text_position(below)
             xy[:y]
           end
         else
           0 # TODO: figure out the actual limit?
         end

  reader.text_in_region(xmin,xmax,ymin,ymax,inclusive)
end