Class: Interscript::Stdlib

Inherits:
Object
  • Object
show all
Defined in:
lib/interscript/stdlib.rb

Defined Under Namespace

Modules: Functions

Constant Summary collapse

ALIASES =
{
  any_character: '.',
  none: "",
  space: " ",
  whitespace: "[\\b \\t\\0\\r\\n]",
  boundary: "\\b",
  non_word_boundary: "\\B",
  word: "\\w",
  not_word: "\\W",
  alpha: "[a-zA-Z]",
  not_alpha: "[^a-zA-Z]",
  digit: "\\d",
  not_digit: "\\D",
  line_start: "^",
  line_end: "$",
  string_start: "\\A",
  string_end: "\\z"
}

Class Method Summary collapse

Class Method Details

.available_functionsObject



173
174
175
# File 'lib/interscript/stdlib.rb', line 173

def self.available_functions
  %i[title_case downcase compose decompose separate unseparate secryst rababa rababa_reverse]
end

.boundary_like_alias?(a) ⇒ Boolean

Returns:

  • (Boolean)


25
26
27
# File 'lib/interscript/stdlib.rb', line 25

def self.boundary_like_alias?(a)
  %i[line_start line_end string_start string_end boundary non_word_boundary].include?(a)
end

.deterministic_sort_by_max_length(ary) ⇒ Object

On Windows at least, sort_by is non-deterministic. Let’s add some determinism to our efforts.



166
167
168
169
170
171
# File 'lib/interscript/stdlib.rb', line 166

def self.deterministic_sort_by_max_length(ary)
  # Deterministic on Linux:
  # ary.sort_by{ |rule| -rule.max_length }

  ary.each_with_index.sort_by{ |rule,idx| -rule.max_length*100000 + idx }.map(&:first)
end

.parallel_regexp_compile(subs_hash) ⇒ Object



31
32
33
34
35
36
37
38
# File 'lib/interscript/stdlib.rb', line 31

def self.parallel_regexp_compile(subs_hash)
  # puts subs_hash.inspect
  regexp = subs_hash.each_with_index.map do |p,i|
    "(?<_%d>%s)" % [i,p[0]]
  end.join("|")
  subs_regexp = Regexp.compile(regexp)
  # puts subs_regexp.inspect
end

.parallel_regexp_gsub(string, subs_regexp, subs_hash) ⇒ Object



40
41
42
43
44
45
46
47
# File 'lib/interscript/stdlib.rb', line 40

def self.parallel_regexp_gsub(string, subs_regexp, subs_hash)
  string.gsub(subs_regexp) do |match|
    lm = Regexp.last_match
    # Extract the match name
    idx = lm.named_captures.compact.keys.first[1..-1].to_i
    subs_hash[idx]
  end
end

.parallel_regexp_gsub_debug(string, subs_regexp, subs_array) ⇒ Object



49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/interscript/stdlib.rb', line 49

def self.parallel_regexp_gsub_debug(string, subs_regexp, subs_array)
  # only gathering debug info, test data is available in maps_analyze_staging
  $subs_matches = []
  $subs_regexp = subs_regexp
  #$subs_array = subs_array
  string.gsub(subs_regexp) do |match|
    lm = Regexp.last_match
    # puts lm.inspect
    # Extract the match name
    matched = lm.named_captures.compact.keys.first
    # puts matched.inspect
    # puts [lm.begin(matched), lm.end(matched)].inspect
    idx = matched[1..-1].to_i
    debug_info = {begin: lm.begin(matched), end: lm.end(matched), idx: idx, result: subs_array[idx]}
    $subs_matches << debug_info
    subs_array[idx]
  end
end

.parallel_replace(str, hash) ⇒ Object



159
160
161
162
# File 'lib/interscript/stdlib.rb', line 159

def self.parallel_replace(str, hash)
  tree = parallel_replace_compile_tree(hash)
  parallel_replace_tree(str, tree)
end

.parallel_replace_compile_hash(a) ⇒ Object



69
70
71
72
73
74
75
# File 'lib/interscript/stdlib.rb', line 69

def self.parallel_replace_compile_hash(a)
  h = {}
  a.each do |from,to|
    h[from] = to
  end
  h
end

.parallel_replace_compile_tree(hash) ⇒ Object

hash can be either a hash or a hash-like array



98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# File 'lib/interscript/stdlib.rb', line 98

def self.parallel_replace_compile_tree(hash)
  hh = hash.hash
  if @treecache[hh]
    tree = @treecache[hh]
  else
    tree = {}
    hash.each do |from, to|
      from = Array(from)
      from.each do |f|
        branch = tree
        chars = f.split("")
        chars[0..-2].each do |c|
          branch[c.ord] ||= {}
          branch = branch[c.ord]
        end
        branch[chars.last.ord] ||= {}
        branch[chars.last.ord][nil] = to
      end
    end
    @treecache[hh] = tree
  end
end

.parallel_replace_hash(str, h) ⇒ Object



77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# File 'lib/interscript/stdlib.rb', line 77

def self.parallel_replace_hash(str,h)
  newstr = ""
  len = str.length
  max_key_len = h.keys.map(&:length).max
  i = 0
  while i < len
    max_key_len.downto(1).each do |checked_len|
      substr = str[i,checked_len]
      if h[substr]
        newstr << h[substr]
        i += substr.length
      elsif checked_len==1
        newstr << str[i,1]
        i += 1
      end
    end
  end
  newstr
end

.parallel_replace_tree(str, tree) ⇒ Object



121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# File 'lib/interscript/stdlib.rb', line 121

def self.parallel_replace_tree(str, tree)
  newstr = ""
  len = str.length
  i = 0
  while i < len
    c = str[i]

    sub = ""
    branch = tree
    match, repl = nil, nil

    j = 0
    while j < len-i
      cc = str[i+j]
      if branch.include? cc.ord
        branch = branch[cc.ord]
        sub << cc
        if branch.include? nil
          match = sub.dup
          repl = branch[nil]
        end
        j += 1
      else
        break
      end
    end

    if match
      i += match.length
      newstr << repl
    else
      newstr << c
      i += 1
    end
  end
  newstr
end

.re_only_alias?(a) ⇒ Boolean

Returns:

  • (Boolean)


21
22
23
# File 'lib/interscript/stdlib.rb', line 21

def self.re_only_alias?(a)
  ! %i[none space].include?(a)
end

.reverse_functionObject



177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
# File 'lib/interscript/stdlib.rb', line 177

def self.reverse_function
  {
    title_case: :downcase, # Those two are best-effort,
    downcase: :title_case, # but probably wrong.

    compose: :decompose,
    decompose: :compose,

    separate: :unseparate,
    unseparate: :separate,

    rababa: :rababa_reverse,
    rababa_reverse: :rababa,
  }
end