Module: PrawnHebrew::Text

Defined in:
lib/prawn_hebrew.rb

Constant Summary collapse

DEFAULT_HEBREW_FONT =
'GveretLevinHebrew'.freeze
DEFAULT_ENGLISH_FONT =
'Helvetica'.freeze
DEBUG_MODE =

Set to true for debugging which text rendering path is used

false
INVISIBLE_CHARS =

Characters that should be removed or replaced to prevent Prawn errors Zero-width and invisible characters

/[\u200B\u200C\u200D\u200E\u200F\uFEFF\u00AD\u202A\u202B\u202C\u202D\u202E\u2060\u2061\u2062\u2063\u2064\u206A\u206B\u206C\u206D\u206E\u206F]/.freeze
NBSP_CHARS =

Non-breaking spaces (replace with regular space)

/[\u00A0\u202F\u2007\u2008\u2009\u200A\u205F\u3000]/.freeze
DASH_CHARS =

Dashes - em dash, en dash, figure dash, horizontal bar, etc. (replace with regular hyphen)

/[\u2010\u2011\u2012\u2013\u2014\u2015\u2212\uFE58\uFE63\uFF0D]/.freeze
SMART_QUOTES_DOUBLE =

Quotation marks (replace with standard quotes)

/[\u201C\u201D\u201E\u201F\u00AB\u00BB\u301D\u301E\u301F]/.freeze
SMART_QUOTES_SINGLE =
/[\u2018\u2019\u201A\u201B\u2039\u203A]/.freeze
ELLIPSIS_CHAR =

Ellipsis (replace with three dots)

/\u2026/.freeze
ARROW_CHAR =

Arrows and special symbols (replace with text equivalents)

/\u2192/.freeze
LEFT_ARROW =

/\u2190/.freeze
UP_ARROW =

/\u2191/.freeze
DOWN_ARROW =

/\u2193/.freeze
BULLET_CHARS =

Bullet points and list markers

/[\u2022\u2023\u2043\u204C\u204D\u2219\u25E6\u25AA\u25AB\u25CF\u25CB]/.freeze
MISC_PROBLEM_CHARS =

Other problematic characters

/[\u2028\u2029\uFFFC\uFFFD\uFFFF]/.freeze
TRAILING_PUNCTUATION =

Punctuation that should stay at the end of Hebrew text (rendered after in RTL)

/([.,:;!?\-\u05BE\u05C3]+)$/.freeze
LEADING_PUNCTUATION =
/^([.,:;!?\-\u05BE\u05C3()\[\]{}]+)/.freeze

Instance Method Summary collapse

Instance Method Details

#hebrew_formatted_text(text, size: 12, style: :normal, hebrew_font: DEFAULT_HEBREW_FONT, english_font: DEFAULT_ENGLISH_FONT) ⇒ Object



79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# File 'lib/prawn_hebrew.rb', line 79

def hebrew_formatted_text(text, size: 12, style: :normal, hebrew_font: DEFAULT_HEBREW_FONT, english_font: DEFAULT_ENGLISH_FONT)
  text = sanitize_text(text)
  
  # Check if text is pure Hebrew (no English characters)
  is_pure_hebrew = text.to_s =~ /\p{Hebrew}/ && text.to_s !~ /[a-zA-Z]/
  
  # If pure Hebrew, render it directly without word reversal
  if is_pure_hebrew
    return render_pure_hebrew(text, size, style, hebrew_font)
  end
  
  # Otherwise, use the mixed text logic with word reversal
  # Split by newlines first to process each line independently
  lines = text.to_s.split("\n")
  all_fragments = []
  
  styles = style.is_a?(Array) ? style : [style].compact
  
  lines.each_with_index do |line, line_idx|
    words = line.split(/(\s+)/)
    hebrew_run = []
    
    words.each do |word|
      if word.strip.empty?
        all_fragments << { text: word, font: english_font, size: size, styles: styles } if word != ' '
        next
      end

      if word =~ /\p{Hebrew}/
        hebrew_run << word
      else
        unless hebrew_run.empty?
          # Process Hebrew run and handle trailing punctuation
          process_hebrew_run(hebrew_run, all_fragments, hebrew_font, english_font, size, styles)
          all_fragments << { text: ' ' }
          hebrew_run.clear
        end
        all_fragments << { text: "#{word} ", font: english_font, size: size, styles: styles }
      end
    end

    unless hebrew_run.empty?
      # Process remaining Hebrew run
      process_hebrew_run(hebrew_run, all_fragments, hebrew_font, english_font, size, styles)
    end
    
    # Add newline between lines (except after the last line)
    if line_idx < lines.length - 1
      all_fragments << { text: "\n", font: english_font, size: size, styles: styles }
    end
  end
  
  all_fragments
end

#hebrew_table(data, size: 12, style: :normal, hebrew_font: DEFAULT_HEBREW_FONT, english_font: DEFAULT_ENGLISH_FONT, **table_opts) ⇒ Object



193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
# File 'lib/prawn_hebrew.rb', line 193

def hebrew_table(data, size: 12, style: :normal, 
                 hebrew_font: DEFAULT_HEBREW_FONT, 
                 english_font: DEFAULT_ENGLISH_FONT, 
                 **table_opts)
  # Process each row: sanitize and render Hebrew cells using formatted text
  processed_data = data.map do |row|
    row.map do |cell_content|
      # Handle hash cells (e.g., {content: "text", font_style: :bold})
      if cell_content.is_a?(Hash)
        cell_hash = cell_content.dup
        # Sanitize the content if it exists
        if cell_hash[:content]
          cell_hash[:content] = sanitize_text(cell_hash[:content].to_s)
        end
        cell_hash
      else
        # Handle simple string cells
        sanitize_text(cell_content.to_s)
      end
    end
  end
  
  # Create table with a block to apply Hebrew formatting to cells
  table(processed_data, table_opts) do |table|
    # Apply Hebrew formatting to cells that contain Hebrew text
    table.cells.each do |cell|
      cell_text = cell.content
      if cell_text =~ /\p{Hebrew}/
        # Use text_color and font to support Hebrew
        cell.font = hebrew_font
        cell.size = size unless cell.size  # Don't override if already set
        cell.text_color = table_opts[:text_color] || "000000"
      else
        # English cells
        cell.font = english_font
        cell.size = size unless cell.size  # Don't override if already set
      end
    end
  end
end

#hebrew_text_box(text, size: 12, style: :normal, hebrew_font: DEFAULT_HEBREW_FONT, english_font: DEFAULT_ENGLISH_FONT, direction: :auto, **box_opts) ⇒ Object



234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
# File 'lib/prawn_hebrew.rb', line 234

def hebrew_text_box(text, size: 12, style: :normal,
                    hebrew_font: DEFAULT_HEBREW_FONT,
                    english_font: DEFAULT_ENGLISH_FONT,
                    direction: :auto, **box_opts)
  
  # Sanitize text first to remove problematic characters
  text = sanitize_text(text)
  
  # Handle font specification in box_opts or use defaults
  final_hebrew_font = box_opts.delete(:hebrew_font) || hebrew_font
  final_english_font = box_opts.delete(:english_font) || english_font
  final_size = box_opts.delete(:size) || size
  rotation = box_opts.delete(:rotate) || 0
  char_spacing = box_opts.delete(:character_spacing) || 0
  leading = box_opts.delete(:leading) || 0
  min_font_size = box_opts.delete(:min_font_size)
  overflow = box_opts[:overflow]
  
  # Check if text contains Hebrew characters
  contains_hebrew = text.to_s =~ /\p{Hebrew}/
  
  # If direction is auto, determine based on content
  if direction == :auto
    direction = contains_hebrew ? :rtl : :ltr
  end
  
  # Handle shrink_to_fit behavior
  if overflow == :shrink_to_fit
    box_opts.delete(:overflow)
    
    if !contains_hebrew && direction == :ltr
      # English-only: use Prawn's built-in shrink_to_fit
      box_opts[:overflow] = :shrink_to_fit
      box_opts[:min_font_size] = min_font_size if min_font_size
      render_english_only_text(text, final_size, style, final_english_font, 
                               rotation, char_spacing, leading, box_opts)
    else
      # Hebrew/mixed: implement shrinking manually
      shrink_hebrew_text_to_fit(text, final_size, style, final_hebrew_font, 
                                final_english_font, char_spacing, leading, 
                                min_font_size, rotation, box_opts)
    end
  else
    # Normal rendering without shrinking
    if !contains_hebrew && direction == :ltr
      render_english_only_text(text, final_size, style, final_english_font, 
                               rotation, char_spacing, leading, box_opts)
    else
      # For Hebrew text or RTL direction, use formatted text approach
      if rotation != 0
        rotate(rotation, origin: box_opts[:at] || [0, 0]) do
          render_hebrew_text_content(text, contains_hebrew, direction, final_size, style, 
                             final_hebrew_font, final_english_font, char_spacing, leading, box_opts)
        end
      else
        render_hebrew_text_content(text, contains_hebrew, direction, final_size, style, 
                           final_hebrew_font, final_english_font, char_spacing, leading, box_opts)
      end
    end
  end
end

#process_hebrew_run(hebrew_run, all_fragments, hebrew_font, english_font, size, styles) ⇒ Object

Process a run of Hebrew words, handling punctuation correctly



155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
# File 'lib/prawn_hebrew.rb', line 155

def process_hebrew_run(hebrew_run, all_fragments, hebrew_font, english_font, size, styles)
  # Check if the last word in the run has trailing punctuation
  last_word = hebrew_run.last
  trailing_punct = nil
  
  if last_word =~ TRAILING_PUNCTUATION
    trailing_punct = $1
    # Remove trailing punctuation from the last word
    hebrew_run[-1] = last_word.sub(TRAILING_PUNCTUATION, '')
  end
  
  # Check if the first word has leading punctuation (for RTL, this appears at the end)
  first_word = hebrew_run.first
  leading_punct = nil
  
  if first_word =~ LEADING_PUNCTUATION
    leading_punct = $1
    hebrew_run[0] = first_word.sub(LEADING_PUNCTUATION, '')
  end
  
  # Add leading punctuation first (it will appear at the visual end due to RTL)
  if leading_punct
    all_fragments << { text: leading_punct, font: english_font, size: size, styles: styles }
  end
  
  # Reverse Hebrew words for RTL display
  hebrew_run.reverse.each_with_index do |hw, idx|
    next if hw.empty?
    all_fragments << { text: hw, font: hebrew_font, size: size, direction: :rtl, styles: styles }
    all_fragments << { text: ' ', font: hebrew_font, size: size, direction: :rtl, styles: styles } if idx < hebrew_run.length - 1
  end
  
  # Add trailing punctuation at the end (it will appear at the visual end of Hebrew text)
  if trailing_punct
    all_fragments << { text: trailing_punct, font: english_font, size: size, styles: styles }
  end
end

#render_pure_hebrew(text, size, style, hebrew_font) ⇒ Object

Render pure Hebrew text as RTL without word reversal



135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# File 'lib/prawn_hebrew.rb', line 135

def render_pure_hebrew(text, size, style, hebrew_font)
  lines = text.to_s.split("\n")
  all_fragments = []
  
  styles = style.is_a?(Array) ? style : [style].compact
  
  lines.each_with_index do |line, line_idx|
    # For pure Hebrew, just add the line as-is with RTL direction
    all_fragments << { text: line, font: hebrew_font, size: size, direction: :rtl, styles: styles }
    
    # Add newline between lines (except after the last line)
    if line_idx < lines.length - 1
      all_fragments << { text: "\n", font: hebrew_font, size: size, direction: :rtl, styles: styles }
    end
  end
  
  all_fragments
end

#sanitize_text(text) ⇒ Object



62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# File 'lib/prawn_hebrew.rb', line 62

def sanitize_text(text)
  return text if text.nil?
  text.to_s
    .gsub(INVISIBLE_CHARS, '')           # Remove invisible characters completely
    .gsub(NBSP_CHARS, ' ')               # Replace non-breaking spaces with regular space
    .gsub(DASH_CHARS, '-')               # Replace all dash types with regular hyphen
    .gsub(SMART_QUOTES_DOUBLE, '"')      # Replace smart double quotes
    .gsub(SMART_QUOTES_SINGLE, "'")      # Replace smart single quotes
    .gsub(ELLIPSIS_CHAR, '...')          # Replace ellipsis with three dots
    .gsub(ARROW_CHAR, '->')              # Replace right arrow
    .gsub(LEFT_ARROW, '<-')              # Replace left arrow
    .gsub(UP_ARROW, '^')                 # Replace up arrow
    .gsub(DOWN_ARROW, 'v')               # Replace down arrow
    .gsub(BULLET_CHARS, '*')             # Replace bullets with asterisk
    .gsub(MISC_PROBLEM_CHARS, '')        # Remove other problematic chars
end