Module: Polytexnic::Literal

Extended by:
Literal
Included in:
Literal, Preprocessor, Preprocessor::Polytex, String
Defined in:
lib/polytexnic/literal.rb

Constant Summary collapse

LANG_REGEX =

Matches the line for syntax highlighting. %= lang: <language>[, options: …]

/^\s*%=\s+lang:\s*([\w+]+)(?:,\s*options:(.*))?/

Instance Method Summary collapse

Instance Method Details

#cache_display_inline_math(output) ⇒ Object

Caches both display and inline math.



157
158
159
160
161
162
# File 'lib/polytexnic/literal.rb', line 157

def cache_display_inline_math(output)
  output.tap do
    cache_display_math(output)
    cache_inline_math(output)
  end
end

#cache_display_math(output) ⇒ Object

Caches display math. We support both TeX-style $$…$$ and LaTeX-style [ … ].



166
167
168
169
170
171
# File 'lib/polytexnic/literal.rb', line 166

def cache_display_math(output)
  output.gsub!(/\\\[(.*?)\\\]|\$\$(.*?)\$\$/m) do
    math = "\\[ #{$1 || $2} \\]"
    equation_element(math)
  end
end

#cache_inline_math(output) ⇒ Object

Caches inline math. We support both TeX-style $…$ and LaTeX-style ( … ). There’s an annoying edge case involving literal dollar signs, as in $. Handling it significantly complicates the regex, and necessesitates introducing an additional group to catch the character before the math dollar sign in $2 and prepend it to the inline math element.



192
193
194
195
196
197
198
199
# File 'lib/polytexnic/literal.rb', line 192

def cache_inline_math(output)
  output.gsub!(/(?:\\\((.*?)\\\)|([^\\]|^)\$(.*?[^\\])\$)/m) do
    math = "\\( #{$1 || $3} \\)"
    key = digest(math)
    literal_cache[key] = math
    $2.to_s + xmlelement('inline') { key }
  end
end

#cache_literal(polytex, format = :html) ⇒ Object

Makes the caches for literal environments.



11
12
13
14
15
16
# File 'lib/polytexnic/literal.rb', line 11

def cache_literal(polytex, format = :html)
  output = []
  lines = polytex.split("\n")
  cache_literal_environments(lines, output, format)
  output.join("\n")
end

#cache_literal_environments(lines, output, format, cache = nil) ⇒ Object

Handles environments that should be passed through the pipeline intact. The includes verbatim environments (‘verbatim’, ‘Verbatim’) and all the equation environments handled by MathJax (‘equation’, ‘align’, etc.). We take care to keep count of the number of begins we see so that the code handles nested environments correctly. I.e.,

\begin{verbatim}
  \begin{verbatim}
  \emph{foo bar}
  \end{verbatim}
\end{verbatim}
lorem ipsum

includes the internal literal text without stopping after the first endverbatim.

The control flow here is really nasty, but attempts to refactor it into a multi-pass solution have only resulted in even more complexity, and even then I’ve failed to get it to work. Thus, it shall for now follow the “ball of mud” pattern. (The only saving grace is that it’s very thoroughly tested.)



53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# File 'lib/polytexnic/literal.rb', line 53

def cache_literal_environments(lines, output, format, cache = nil)
  latex = (format == :latex)
  language = nil
  in_verbatim = false
  in_codelisting = false
  while (line = lines.shift)
    if line =~ LANG_REGEX && !in_verbatim
      language = $1
      highlight_options = $2
    elsif line =~ /\s*\\begin\{codelisting\}/ && !in_verbatim
      in_codelisting = true
      output << line
    elsif line =~ /\s*\\end\{codelisting\}/ && !in_verbatim
      in_codelisting = false
      output << line
    elsif (included_code = CodeInclusion::Code.for(line)) && !in_verbatim
      # Reduce to a previously solved problem.
      # We transform
      # %= <<(/path/to/file.rb)
      # to
      # %= lang:rb
      # \begin{code}
      # <content of file or section.rb>
      # \end{code}
      # and then prepend the code to the current `lines` array.
      lines.unshift(*included_code.to_s)
    elsif line.begin_literal?
      in_verbatim = true
      literal_type = line.literal_type
      skip = line.math_environment? || latex
      if line.math_environment? && !latex
        output << '\begin{xmlelement*}{equation}'
        output << '\begin{equation}'
      end
      math = line.math_environment?
      label = nil
      output << xmlelement(element(literal_type), skip) do
        count = 1
        text = []
        text << line if line.math_environment? || (latex && !language)
        while (line = lines.shift)
          if line.begin_literal?(literal_type)
            count += 1
          elsif line.end_literal?(literal_type)
            count -= 1
            if count.zero?
              in_verbatim = false
              text << line if line.math_environment? || (latex && !language)
              break
            end
          end
          label = line if math && line =~ /^\s*\\label{.*?}\s*$/
          text << line
        end
        raise "Missing \\end{#{line.literal_type}}" if count != 0
        content = text.join("\n")
        if math
          key = digest(content)
          literal_cache[key] = content
        elsif language.nil?
          key = digest(content)
          literal_cache[key] = content
          tag = 'literal'
        else
          format = latex ? 'latex' : 'html'
          id = "#{content}--#{language}--#{format}--#{in_codelisting}--#{highlight_options}"
          key = digest(id, salt: code_salt)
          code_cache[key] = [content, language, in_codelisting, highlight_options]
          tag = 'code'
        end
        if latex || tag == 'code' || math
          key
        else
          xmlelement(tag) { key }
        end
      end
      if math && !latex
        unless label.nil?
          key = digest(label)
          math_label_cache[key] = label
          output << key
        end
        output << '\end{equation}'
        unless label.nil?
          string = label.scan(/\{(.*?)\}/).flatten.first
          string = string.gsub(':', '-').gsub('_', underscore_digest)
          output << "\\xbox{data-label}{#{string}}"
        end
        output << '\end{xmlelement*}'
      end
      language = nil
      (output << '') unless latex # Force the next element to be a paragraph
    else
      output << line
    end
  end
end

#cache_unicode(string) ⇒ Object

Handles non-ASCII Unicode characters. The Tralics part of the pipeline doesn’t properly handle Unicode, which is odd since Tralics is a French project. Nevertheless, we can hack around the restriction by treating non-ASCII Unicode characters as literal elements and simply pass them through the pipeline intact.



235
236
237
238
239
240
241
242
# File 'lib/polytexnic/literal.rb', line 235

def cache_unicode(string)
  non_ascii_unicode = /([^\x00-\x7F]+)/
  string.gsub!(non_ascii_unicode) do
    key = digest($1)
    unicode_cache[key] = $1
    key
  end
end

#code_saltObject

Returns a permanent salt for the syntax highlighting cache.



152
153
154
# File 'lib/polytexnic/literal.rb', line 152

def code_salt
  'fbbc13ed4a51e27608037365e1d27a5f992b6339'
end

#element(literal_type) ⇒ Object



244
245
246
247
248
249
250
# File 'lib/polytexnic/literal.rb', line 244

def element(literal_type)
  if math_environments.include?(literal_type)
    'equation'
  else
    literal_type
  end
end

#equation_element(content) ⇒ Object

Returns an equation element while caching the given content. We use this only for unnumbered, display equations, which requires using the ‘equation*` environment in place of `equation`.



176
177
178
179
180
181
182
183
184
# File 'lib/polytexnic/literal.rb', line 176

def equation_element(content)
  key = digest(content)
  literal_cache[key] = content
  "\\begin{xmlelement*}{equation}
    \\begin{equation*}
    #{key}
    \\end{equation*}
    \\end{xmlelement*}"
end

#hyperrefs(string) ⇒ Object

Converts references to hyperrefs. We want to convert

Chapter~\ref{cha:foo}

to

\hyperref[cha:foo]{Chapter~\ref{cha:foo}

which is then handled by LaTeX’s hyperref package or by Tralics (where it converted to a link by the postprocessor). For completeness, we handle the case where the author neglects to use the nonbreak space ~.



211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
# File 'lib/polytexnic/literal.rb', line 211

def hyperrefs(string)
  chapter  = language_labels["chapter"]["word"]
  section  = language_labels["section"]
  table    = language_labels["table"]
  box      = language_labels["aside"]
  figure   = language_labels["figure"]
  fig      = language_labels["fig"]
  listing  = language_labels["listing"]
  equation = language_labels["equation"]
  eq       = language_labels["eq"]
  linked_item = "(#{chapter}|#{section}|#{table}|#{box}|#{figure}" +
                "|#{fig}\.|#{listing}|#{equation}|#{eq}\.)"
  ref = /(?:#{linked_item}(~| ))*(\\(?:eq)*ref){(.*?)}/i
  string.gsub!(ref) do
    "\\hyperref[#{$4}]{#{$1}#{$2}#{$3}{#{$4}}}"
  end
end

#literal_typesObject

Returns a list of all literal types.



30
31
32
# File 'lib/polytexnic/literal.rb', line 30

def literal_types
  %w[verbatim Vertatim code metacode] + math_environments
end

#math_environmentsObject

Returns supported math environments. Note that the custom AMS-TeX environments are supported in addition to the LaTeX defaults.



21
22
23
24
25
26
27
# File 'lib/polytexnic/literal.rb', line 21

def math_environments
  %w[align align*
     eqnarray eqnarray* equation equation*
     gather gather* gathered
     multline multline*
    ]
end