Module: Polytexnic::Literal

Extended by:
Literal
Included in:
Literal, Preprocessor, Preprocessor::Polytex, String
Defined in:
lib/polytexnic/literal.rb

Defined Under Namespace

Classes: FileNotFound, IncludedFileReader, IncludedSectionReader, SectionNotFound

Constant Summary collapse

LANG_REGEX =

Matches the line for syntax highlighting. %= lang: <language>[, options: …]

/^\s*%=\s+lang:\s*(\w+)(?:,\s*options:(.*))?/
CODE_INCLUSION_REGEX =

Matches the line for code inclusion. %= <</path/to/code.ext

/^\s*%=\s+<<\s*\(          # opening
\s*([^\s]+?)             # path to file
(?:\[(.+?)\])?           # optional section name
(?:,\s*lang:\s*(\w+))?   # optional lang
(,\s*options:\s*.*)?     # optional options
\s*\)                    # closing paren
/x

Instance Method Summary collapse

Instance Method Details

#cache_display_inline_math(output) ⇒ Object

Caches both display and inline math.



197
198
199
200
201
202
# File 'lib/polytexnic/literal.rb', line 197

def cache_display_inline_math(output)
  output.tap do
    cache_display_math(output)
    cache_inline_math(output)
  end
end

#cache_display_math(output) ⇒ Object

Caches display math. We support both TeX-style $$…$$ and LaTeX-style [ … ].



206
207
208
209
210
211
# File 'lib/polytexnic/literal.rb', line 206

def cache_display_math(output)
  output.gsub!(/\\\[(.*?)\\\]|\$\$(.*?)\$\$/m) do
    math = "\\[ #{$1 || $2} \\]"
    equation_element(math)
  end
end

#cache_inline_math(output) ⇒ Object

Caches inline math. We support both TeX-style $…$ and LaTeX-style ( … ). There’s an annoying edge case involving literal dollar signs, as in $. Handling it significantly complicates the regex, and necessesitates introducing an additional group to catch the character before the math dollar sign in $2 and prepend it to the inline math element.



232
233
234
235
236
237
238
239
# File 'lib/polytexnic/literal.rb', line 232

def cache_inline_math(output)
  output.gsub!(/(?:\\\((.*?)\\\)|([^\\]|^)\$(.*?[^\\])\$)/m) do
    math = "\\( #{$1 || $3} \\)"
    key = digest(math)
    literal_cache[key] = math
    $2.to_s + xmlelement('inline') { key }
  end
end

#cache_literal(polytex, format = :html) ⇒ Object

Makes the caches for literal environments.



20
21
22
23
24
25
# File 'lib/polytexnic/literal.rb', line 20

def cache_literal(polytex, format = :html)
  output = []
  lines = polytex.split("\n")
  cache_literal_environments(lines, output, format)
  output.join("\n")
end

#cache_literal_environments(lines, output, format, cache = nil) ⇒ Object

Handles environments that should be passed through the pipeline intact. The includes verbatim environments (‘verbatim’, ‘Verbatim’) and all the equation environments handled by MathJax (‘equation’, ‘align’, etc.). We take care to keep count of the number of begins we see so that the code handles nested environments correctly. I.e.,

\begin{verbatim}
  \begin{verbatim}
  \emph{foo bar}
  \end{verbatim}
\end{verbatim}
lorem ipsum

includes the internal literal text without stopping after the first endverbatim.

The control flow here is really nasty, but attempts to refactor it into a multi-pass solution have only resulted in even more complexity, and even then I’ve failed to get it to work. Thus, it shall for now follow the “ball of mud” pattern. (The only saving grace is that it’s very thoroughly tested.)



62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
# File 'lib/polytexnic/literal.rb', line 62

def cache_literal_environments(lines, output, format, cache = nil)
  latex = (format == :latex)
  language = nil
  in_verbatim = false
  in_codelisting = false
  while (line = lines.shift)
    if line =~ LANG_REGEX && !in_verbatim
      language = $1
      highlight_options = $2
    elsif line =~ /\s*\\begin\{codelisting\}/ && !in_verbatim
      in_codelisting = true
      output << line
    elsif line =~ /\s*\\end\{codelisting\}/ && !in_verbatim
      in_codelisting = false
      output << line
    elsif line =~ CODE_INCLUSION_REGEX && !in_verbatim
      # Reduce to a previously solved problem.
      # We transform
      # %= <<(/path/to/file.rb)
      # to
      # %= lang:rb
      # \begin{code}
      # <content of file or section.rb>
      # \end{code}
      # and then prepend the code to the current `lines` array.
      filename, sectionname, custom_language, highlight_options = $1, $2, $3, $4
      if filename
        lines.unshift(*include_code(filename, sectionname, custom_language, highlight_options))
      end
    elsif line.begin_literal?
      in_verbatim = true
      literal_type = line.literal_type
      skip = line.math_environment? || latex
      if line.math_environment? && !latex
        output << '\begin{xmlelement*}{equation}'
        output << '\begin{equation}'
      end
      math = line.math_environment?
      label = nil
      output << xmlelement(element(literal_type), skip) do
        count = 1
        text = []
        text << line if line.math_environment? || (latex && !language)
        while (line = lines.shift)
          if line.begin_literal?(literal_type)
            count += 1
          elsif line.end_literal?(literal_type)
            count -= 1
            if count.zero?
              in_verbatim = false
              text << line if line.math_environment? || (latex && !language)
              break
            end
          end
          label = line if math && line =~ /^\s*\\label{.*?}\s*$/
          text << line
        end
        raise "Missing \\end{#{line.literal_type}}" if count != 0
        content = text.join("\n")
        if math
          key = digest(content)
          literal_cache[key] = content
        elsif language.nil?
          key = digest(content)
          literal_cache[key] = content
          tag = 'literal'
        else
          format = latex ? 'latex' : 'html'
          id = "#{content}--#{language}--#{format}--#{in_codelisting}--#{highlight_options}"
          key = digest(id, salt: code_salt)
          code_cache[key] = [content, language, in_codelisting, highlight_options]
          tag = 'code'
        end
        if latex || tag == 'code' || math
          key
        else
          xmlelement(tag) { key }
        end
      end
      if math && !latex
        unless label.nil?
          key = digest(label)
          math_label_cache[key] = label
          output << key
        end
        output << '\end{equation}'
        unless label.nil?
          string = label.scan(/\{(.*?)\}/).flatten.first
          string = string.gsub(':', '-').gsub('_', underscore_digest)
          output << "\\xbox{data-label}{#{string}}"
        end
        output << '\end{xmlelement*}'
      end
      language = nil
      (output << '') unless latex # Force the next element to be a paragraph
    else
      output << line
    end
  end
end

#cache_unicode(string) ⇒ Object

Handles non-ASCII Unicode characters. The Tralics part of the pipeline doesn’t properly handle Unicode, which is odd since Tralics is a French project. Nevertheless, we can hack around the restriction by treating non-ASCII Unicode characters as literal elements and simply pass them through the pipeline intact.



275
276
277
278
279
280
281
282
# File 'lib/polytexnic/literal.rb', line 275

def cache_unicode(string)
  non_ascii_unicode = /([^\x00-\x7F]+)/
  string.gsub!(non_ascii_unicode) do
    key = digest($1)
    literal_cache[key] = $1
    xmlelement('unicode') { key }
  end
end

#code_error(details) ⇒ Object



181
182
183
# File 'lib/polytexnic/literal.rb', line 181

def code_error(details)
  "\\verb+ERROR: #{details}+"
end

#code_language(filename, custom_language) ⇒ Object



185
186
187
188
189
# File 'lib/polytexnic/literal.rb', line 185

def code_language(filename, custom_language)
  extension_array = File.extname(filename).scan(/\.(.*)/).first
  lang_from_extension = extension_array.nil? ? nil : extension_array[0]
  language = custom_language || lang_from_extension || 'text'
end

#code_saltObject

Returns a permanent salt for the syntax highlighting cache.



192
193
194
# File 'lib/polytexnic/literal.rb', line 192

def code_salt
  'fbbc13ed4a51e27608037365e1d27a5f992b6339'
end

#element(literal_type) ⇒ Object



284
285
286
287
288
289
290
# File 'lib/polytexnic/literal.rb', line 284

def element(literal_type)
  if math_environments.include?(literal_type)
    'equation'
  else
    literal_type
  end
end

#equation_element(content) ⇒ Object

Returns an equation element while caching the given content. We use this only for unnumbered, display equations, which requires using the ‘equation*` environment in place of `equation`.



216
217
218
219
220
221
222
223
224
# File 'lib/polytexnic/literal.rb', line 216

def equation_element(content)
  key = digest(content)
  literal_cache[key] = content
  "\\begin{xmlelement*}{equation}
    \\begin{equation*}
    #{key}
    \\end{equation*}
    \\end{xmlelement*}"
end

#hyperrefs(string) ⇒ Object

Converts references to hyperrefs. We want to convert

Chapter~\ref{cha:foo}

to

\hyperref[cha:foo]{Chapter~\ref{cha:foo}

which is then handled by LaTeX’s hyperref package or by Tralics (where it converted to a link by the postprocessor). For completeness, we handle the case where the author neglects to use the nonbreak space ~.



251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
# File 'lib/polytexnic/literal.rb', line 251

def hyperrefs(string)
  chapter  = language_labels["chapter"]["word"]
  section  = language_labels["section"]
  table    = language_labels["table"]
  box      = language_labels["aside"]
  figure   = language_labels["figure"]
  fig      = language_labels["fig"]
  listing  = language_labels["listing"]
  equation = language_labels["equation"]
  eq       = language_labels["eq"]
  linked_item = "(#{chapter}|#{section}|#{table}|#{box}|#{figure}" +
                "|#{fig}\.|#{listing}|#{equation}|#{eq}\.)"
  ref = /(?:#{linked_item}(~| ))*(\\(?:eq)*ref){(.*?)}/i
  string.gsub!(ref) do
    "\\hyperref[#{$4}]{#{$1}#{$2}#{$3}{#{$4}}}"
  end
end

#include_code(filename, sectionname, custom_language, highlight_options) ⇒ Object

Returns the marked up file or section to be included, or an error message if file or section does not exist.



165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
# File 'lib/polytexnic/literal.rb', line 165

def include_code(filename, sectionname, custom_language, highlight_options)
  reader = (sectionname ? IncludedSectionReader : IncludedFileReader).new
  lang = "#{code_language(filename, custom_language)}#{highlight_options}"
  code = ["%= lang:#{lang}"]
  code << '\begin{code}'
  code.concat(reader.read(filename, sectionname))
  code << '\end{code}'

  rescue FileNotFound => e
    code_error("File '#{e.message}' does not exist")
  rescue SectionNotFound => e
    msg = e.message
    err = "Could not find section header '#{msg}' in file '#{filename}'"
    code_error(err)
end

#literal_typesObject

Returns a list of all literal types.



39
40
41
# File 'lib/polytexnic/literal.rb', line 39

def literal_types
  %w[verbatim Vertatim code metadcode] + math_environments
end

#math_environmentsObject

Returns supported math environments. Note that the custom AMS-TeX environments are supported in addition to the LaTeX defaults.



30
31
32
33
34
35
36
# File 'lib/polytexnic/literal.rb', line 30

def math_environments
  %w[align align*
     eqnarray eqnarray* equation equation*
     gather gather* gathered
     multline multline*
    ]
end