Class: String

Inherits:
Object
  • Object
show all
Defined in:
lib/searchlink/semver.rb,
lib/searchlink/string.rb,
lib/searchlink/curl/html.rb,
lib/searchlink/searches/hook.rb

Overview

Hookmark String helpers

Instance Method Summary collapse

Instance Method Details

#add_query_stringString

Format and append a query string

Returns:

  • (String)

    The formatted query string



86
87
88
89
90
91
92
93
94
95
96
97
98
# File 'lib/searchlink/string.rb', line 86

def add_query_string
  return self if SL.query.empty?

  query = SL.query.map { |k, v| "#{k}=#{v}" }.join("&")

  query = if self =~ /\?[^= ]+=\S+/
            "&#{query}"
          else
            "?#{query}"
          end

  "#{self}#{query}"
end

#add_query_string!String

Destructive version of #add_query_string

Returns:

  • (String)

    The formatted query string

See Also:



103
104
105
# File 'lib/searchlink/string.rb', line 103

def add_query_string!
  replace add_query_string
end

#append_affiliate_string(aff_string) ⇒ String

Append an affiliate string to a URL

Parameters:

  • aff_string (String)

    The affiliate string

Returns:

  • (String)

    The URL with the affiliate string

See Also:



237
238
239
240
# File 'lib/searchlink/string.rb', line 237

def append_affiliate_string(aff_string)
  separator = self =~ /\?/ ? "&" : "?"
  "#{self}#{aff_string.sub(/^[?&]?/, separator)}"
end

#append_affiliate_string!(aff_string) ⇒ String

Destructively append an affiliate string to a URL

Parameters:

  • aff_string (String)

    The affiliate string

Returns:

  • (String)

    The URL with the affiliate string

See Also:



249
250
251
# File 'lib/searchlink/string.rb', line 249

def append_affiliate_string!(aff_string)
  replace append_affiliate_string(aff_string)
end

#cleanString

Remove newlines, escape quotes, and remove Google Analytics strings

Returns:

  • (String)

    cleaned URL/String



211
212
213
214
215
216
217
# File 'lib/searchlink/string.rb', line 211

def clean
  gsub(/\n+/, " ")
    .gsub(/"/, "&quot")
    .gsub(/\|/, "-")
    .gsub(/([&?]utm_[scm].+=[^&\s!,.)\]]++?)+(&.*)/, '\2')
    .sub(/\?&/, "").strip
end

#close_punctuationString

Complete incomplete punctuation pairs

Returns:

  • (String)

    string with all punctuation properly paired



303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
# File 'lib/searchlink/string.rb', line 303

def close_punctuation
  return self unless self =~ /[“‘\[(<]/

  words = split(/\s+/)

  punct_chars = {
    "“" => "”",
    "‘" => "’",
    "[" => "]",
    "(" => ")",
    "<" => ">"
  }

  left_punct = []

  words.each do |w|
    punct_chars.each do |k, v|
      left_punct.push(k) if w =~ /#{Regexp.escape(k)}/
      left_punct.delete_at(left_punct.rindex(k)) if w =~ /#{Regexp.escape(v)}/
    end
  end

  tail = ""
  left_punct.reverse.each { |c| tail += punct_chars[c] }

  gsub(/[^a-z)\]’”.…]+$/i, "...").strip + tail
end

#close_punctuation!Object

Destructive punctuation close

See Also:



293
294
295
# File 'lib/searchlink/string.rb', line 293

def close_punctuation!
  replace close_punctuation
end

#code_indentString

Indent each line of string with 4 spaces

Returns:

  • (String)

    indented string



603
604
605
# File 'lib/searchlink/string.rb', line 603

def code_indent
  split(/\n/).map { |l| "    #{l}" }.join("\n")
end

#distance(t) ⇒ Object



515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
# File 'lib/searchlink/string.rb', line 515

def distance(t)
  s = dup
  m = s.length
  n = t.length
  return m if n.zero?
  return n if m.zero?

  d = Array.new(m + 1) { Array.new(n + 1) }

  (0..m).each { |i| d[i][0] = i }
  (0..n).each { |j| d[0][j] = j }
  (1..n).each do |j|
    (1..m).each do |i|
      d[i][j] = if s[i - 1] == t[j - 1] # adjust index into string
                  d[i - 1][j - 1] # no operation required
                else
                  [d[i - 1][j] + 1, # deletion
                   d[i][j - 1] + 1, # insertion
                   d[i - 1][j - 1] + 1 # substitution
    ].min
                end
    end
  end
  d[m][n]
end

#extract_query(known_queries = {}) ⇒ Object

Extract query string from search string



41
42
43
44
45
46
47
48
49
50
51
52
53
54
# File 'lib/searchlink/string.rb', line 41

def extract_query(known_queries = {})
  string = gsub(/\?((\S+?)=(\S+?)(?=&|$|\s))+/) do |mtch|
    tokens = mtch.sub(/^\?/, "").split("&")
    tokens.each do |token|
      key, value = token.split("=")

      known_queries[key] = value
    end

    ""
  end.gsub(/ +/, " ").strip

  [known_queries, string]
end

#extract_shortenerObject

Extract a shortner from a string



57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/searchlink/string.rb', line 57

def extract_shortener
  return self unless self =~ /_[ibt]$/i

  shortener = split(/_/).last
  SL.shortener = case shortener
                 when /i/i
                   :isgd
                 when /b/i
                   :bitly
                 when /t/i
                   :tinyurl
                 else
                   :none
                 end

  sub(/_[ibt]$/i, "")
end

#extract_shortener!String

Destructive version of #extract_shortener

Returns:

  • (String)

    The string without the shortener

See Also:



78
79
80
# File 'lib/searchlink/string.rb', line 78

def extract_shortener!
  replace extract_shortener
end

#fix_gist_filedescription_of_the_return_value

Convert file-myfile-rb to myfile.rb

Returns:

  • (description_of_the_return_value)


186
187
188
# File 'lib/searchlink/string.rb', line 186

def fix_gist_file
  sub(/^file-/, "").sub(/-([^-]+)$/, '.\1')
end

#matches_all(terms) ⇒ Object

Test that self matches every word in terms

Parameters:

  • terms (String)

    The terms to test



578
579
580
581
582
# File 'lib/searchlink/string.rb', line 578

def matches_all(terms)
  rx_terms = terms.is_a?(String) ? terms.to_rx_array : terms
  rx_terms.each { |rx| return false unless gsub(/[^a-z0-9 ]/i, "") =~ rx }
  true
end

#matches_any(terms) ⇒ Object

Test if self contains any of terms

Parameters:

  • terms (String)

    The terms to test



567
568
569
570
571
# File 'lib/searchlink/string.rb', line 567

def matches_any(terms)
  rx_terms = terms.is_a?(String) ? terms.to_rx_array : terms
  rx_terms.each { |rx| return true if gsub(/[^a-z0-9 ]/i, "") =~ rx }
  false
end

#matches_exact(string) ⇒ Object

Test if self contains exactl match for string (case insensitive)

Parameters:

  • string (String)

    The string to match



546
547
548
549
# File 'lib/searchlink/string.rb', line 546

def matches_exact(string)
  comp = gsub(/[^a-z0-9 ]/i, "")
  comp =~ /\b#{string.gsub(/[^a-z0-9 ]/i, '').split(/ +/).map { |s| Regexp.escape(s) }.join(' +')}/i
end

#matches_fuzzy(terms, separator: " ", start_word: true, threshhold: 5) ⇒ Object



501
502
503
504
505
506
507
508
509
510
511
512
513
# File 'lib/searchlink/string.rb', line 501

def matches_fuzzy(terms, separator: " ", start_word: true, threshhold: 5)
  sources = split(/(#{separator})+/)
  words = terms.split(/(#{separator})+/)
  matches = 0
  sources.each do |src|
    words.each do |term|
      d = src.distance(term)
      matches += 1 if d <= threshhold
    end
  end

  ((matches / words.count.to_f) * 10).round(3)
end

#matches_none(terms) ⇒ Object

Test that self does not contain any of terms

Parameters:

  • terms (String)

    The terms to test



556
557
558
559
560
# File 'lib/searchlink/string.rb', line 556

def matches_none(terms)
  rx_terms = terms.is_a?(String) ? terms.to_rx_array : terms
  rx_terms.each { |rx| return false if gsub(/[^a-z0-9 ]/i, "") =~ rx }
  true
end

#matches_score(terms, separator: " ", start_word: true) ⇒ Object

Score string based on number of matches, 0 - 10

Parameters:

  • terms (String)

    The terms to match

  • separator (String) (defaults to: " ")

    The word separator

  • start_word (Boolean) (defaults to: true)

    Require match to be at beginning of word



488
489
490
491
492
493
494
495
496
497
498
499
# File 'lib/searchlink/string.rb', line 488

def matches_score(terms, separator: " ", start_word: true)
  matched = 0
  regexes = terms.to_rx_array(separator: separator, start_word: start_word)

  regexes.each do |rx|
    matched += 1 if self =~ rx
  end

  return 0 if matched.zero?

  ((matched / regexes.count.to_f) * 10).round(3)
end

#nil_if_missingNil, String

Test an AppleScript response, substituting nil for ‘Missing Value’

Returns:

  • (Nil, String)

    nil if string is “missing value”



473
474
475
476
477
# File 'lib/searchlink/string.rb', line 473

def nil_if_missing
  return nil if self =~ /missing value/

  self
end

#normalize_triggerString

Adds ?: to any parentheticals in a regular expression to avoid match groups

Returns:

  • (String)

    modified regular expression



125
126
127
# File 'lib/searchlink/string.rb', line 125

def normalize_trigger
  gsub(/\((?!\?:)/, "(?:").gsub(/(^(\^|\\A)|(\$|\\Z)$)/, "").downcase
end

#parse_flagsObject

parse command line flags into long options



147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
# File 'lib/searchlink/string.rb', line 147

def parse_flags
  gsub(/(\+\+|--)([dirtvsc]+)\b/) do
    m = Regexp.last_match
    bool = m[1] == "++" ? "" : "no-"
    output = " "
    m[2].split("").each do |arg|
      output += case arg
                when "c"
                  "--#{bool}confirm"
                when "d"
                  "--#{bool}debug "
                when "i"
                  "--#{bool}inline "
                when "r"
                  "--#{bool}prefix_random "
                when "t"
                  "--#{bool}include_titles "
                when "v"
                  "--#{bool}validate_links "
                when "s"
                  "--#{bool}remove_seo "
                else
                  ""
                end
    end

    output
  end.gsub(/ +/, " ")
end

#parse_flags!Object



177
178
179
# File 'lib/searchlink/string.rb', line 177

def parse_flags!
  replace parse_flags
end

#path_elementsArray

Extract the most relevant portions from a URL path

Returns:

  • (Array)

    array of relevant path elements



275
276
277
278
279
280
281
282
283
284
285
286
# File 'lib/searchlink/string.rb', line 275

def path_elements
  path = url_path
  # force trailing slash
  path.sub!(%r{/?$}, "/")
  # remove last path element
  path.sub!(%r{/[^/]+[.-][^/]+/$}, "")
  # remove starting/ending slashes
  path.gsub!(%r{(^/|/$)}, "")
  # split at slashes, delete sections that are shorter
  # than 5 characters or only consist of numbers
  path.split(%r{/}).delete_if { |section| section =~ /^\d+$/ || section.length < 5 }
end

#remove_entitiesObject



6
7
8
# File 'lib/searchlink/curl/html.rb', line 6

def remove_entities
  gsub(/&nbsp;/, " ")
end

#remove_protocolString

Remove the protocol from a URL

Returns:

  • (String)

    just hostname and path of URL



258
259
260
# File 'lib/searchlink/string.rb', line 258

def remove_protocol
  sub(%r{^(https?|s?ftp|file)://}, "")
end

#remove_seo(url) ⇒ String

Remove SEO elements from a title

Parameters:

  • url

    The url of the page from which the title came

Returns:



350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
# File 'lib/searchlink/string.rb', line 350

def remove_seo(url)
  title = dup
  url = URI.parse(url)
  host = url.hostname
  unless host
    return self unless SL.config["debug"]

    SL.add_error("Invalid URL", "Could not remove SEO for #{url}")
    return self
  end

  path = url.path
  root_page = path =~ %r{^/?$} ? true : false

  title.gsub!(/\s*(&ndash;|&mdash;)\s*/, " - ")
  title.gsub!(/&[lr]dquo;/, '"')
  title.gsub!(/&[lr]dquo;/, "'")
  title.gsub!(/&#8211;/, " — ")
  title = CGI.unescapeHTML(title)
  title.gsub!(/ +/, " ")

  seo_title_separators = %w[| » « — – - · :]

  begin
    re_parts = []

    host_parts = host.sub(/(?:www\.)?(.*?)\.[^.]+$/, '\1').split(/\./).delete_if { |p| p.length < 3 }
    h_re = !host_parts.empty? ? host_parts.map { |seg| seg.downcase.split(//).join(".?") }.join("|") : ""
    re_parts.push(h_re) unless h_re.empty?

    # p_re = path.path_elements.map{|seg| seg.downcase.split(//).join('.?') }.join('|')
    # re_parts.push(p_re) if p_re.length > 0

    site_re = "(#{re_parts.join('|')})"

    dead_switch = 0

    while title.downcase.gsub(/[^a-z]/i, "") =~ /#{site_re}/i
      break if dead_switch > 5

      seo_title_separators.each_with_index do |sep, i|
        parts = title.split(/ *#{Regexp.escape(sep)} +/)

        next if parts.length == 1

        remaining_separators = seo_title_separators[i..].map { |s| Regexp.escape(s) }.join("")
        seps = Regexp.new("^[^#{remaining_separators}]+$")

        longest = parts.longest_element.strip

        unless parts.empty?
          parts.delete_if do |pt|
            compressed = pt.strip.downcase.gsub(/[^a-z]/i, "")
            compressed =~ /#{site_re}/ && pt =~ seps ? !root_page : false
          end
        end

        title = if parts.empty?
                  longest
                elsif parts.length < 2
                  parts.join(sep)
                elsif parts.length > 2
                  parts.longest_element.strip
                else
                  parts.join(sep)
                end
      end
      dead_switch += 1
    end
  rescue StandardError => e
    return self unless SL.config["debug"]

    SL.add_error("Error SEO processing title for #{url}", e)
    return self
  end

  seps = Regexp.new(" *[#{seo_title_separators.map { |s| Regexp.escape(s) }.join('')}] +")
  if title =~ seps
    seo_parts = title.split(seps)
    title = seo_parts.longest_element.strip if seo_parts.length.positive?
  end

  title && title.length > 5 ? title.gsub(/\s+/, " ") : CGI.unescapeHTML(self)
end

#remove_seo!(url) ⇒ Object

Destructively remove SEO elements from a title

Parameters:

  • url

    The url of the page from which the title came

See Also:



339
340
341
# File 'lib/searchlink/string.rb', line 339

def remove_seo!(url)
  replace remove_seo(url)
end

#scrubupObject

Scrub invalid characters from string



31
32
33
# File 'lib/searchlink/string.rb', line 31

def scrubup
  encode("utf-16", invalid: :replace).encode("utf-8").gsub(/\u00A0/, " ")
end

#scrubup!Object

See Also:

  • #scrub


36
37
38
# File 'lib/searchlink/string.rb', line 36

def scrubup!
  replace scrub
end

#shorten_pathObject

Shorten path by adding ~ for home directory



610
611
612
613
# File 'lib/searchlink/string.rb', line 610

def shorten_path
  home_directory = ENV["HOME"]
  sub(home_directory, "~")
end

#slugifyString

Turn a string into a slug, removing spaces and non-alphanumeric characters

Returns:

  • (String)

    slugified string



195
196
197
# File 'lib/searchlink/string.rb', line 195

def slugify
  downcase.gsub(/[^a-z0-9_]/i, "-").gsub(/-+/, "-").sub(/-?$/, "")
end

#slugify!Object

Destructive slugify

See Also:



201
202
203
# File 'lib/searchlink/string.rb', line 201

def slugify!
  replace slugify
end

#spacerString

Generate a spacer based on character widths for help dialog display

Returns:

  • (String)

    string containing tabs



134
135
136
137
138
139
140
141
142
143
144
# File 'lib/searchlink/string.rb', line 134

def spacer
  len = length
  scan(/[mwv]/).each { len += 1 }
  scan(/t/).each { len -= 1 }
  case len
  when 0..3
    "\t\t"
  when 4..12
    " \t"
  end
end

#split_hookObject



8
9
10
11
12
13
14
15
# File 'lib/searchlink/searches/hook.rb', line 8

def split_hook
  elements = split(/\|\|/)
  {
    name: elements[0].nil_if_missing,
    url: elements[1].nil_if_missing,
    path: elements[2].nil_if_missing
  }
end

#split_hooksObject



17
18
19
# File 'lib/searchlink/searches/hook.rb', line 17

def split_hooks
  split(/\^\^/).map(&:split_hook)
end

#to_amString

convert itunes to apple music link

Returns:

  • (String)

    apple music link



222
223
224
225
226
227
# File 'lib/searchlink/string.rb', line 222

def to_am
  input = dup
  input.sub!(%r{/itunes\.apple\.com}, "geo.itunes.apple.com")
  append = input =~ %r{\?[^/]+=} ? "&app=music" : "?app=music"
  input + append
end

#to_rx_array(separator: " ", start_word: true) ⇒ Array

Break a string into an array of Regexps

Parameters:

  • separator (String) (defaults to: " ")

    The word separator

  • start_word (Boolean) (defaults to: true)

    Require matches at start of word

Returns:

  • (Array)

    array of regular expressions



593
594
595
596
597
# File 'lib/searchlink/string.rb', line 593

def to_rx_array(separator: " ", start_word: true)
  bound = start_word ? '\b' : ""
  str = gsub(/(#{separator})+/, separator)
  str.split(/#{separator}/).map { |arg| /#{bound}#{arg.gsub(/[^a-z0-9]/i, '.?')}/i }
end

#truncate(max) ⇒ Object

Truncate string to given length, preserving words

Parameters:

  • max (Number)

    The maximum length



451
452
453
454
455
456
457
458
459
460
461
462
463
464
# File 'lib/searchlink/string.rb', line 451

def truncate(max)
  return self if length < max

  trunc_title = []

  words = split(/\s+/)
  words.each do |word|
    break unless trunc_title.join(" ").length.close_punctuation + word.length <= max

    trunc_title << word
  end

  trunc_title.empty? ? words[0] : trunc_title.join(" ")
end

#truncate!(max) ⇒ Object

Truncate in place

Parameters:

  • max (Number)

    The maximum length

See Also:



442
443
444
# File 'lib/searchlink/string.rb', line 442

def truncate!(max)
  replace truncate(max)
end

#url_decodeObject



115
116
117
# File 'lib/searchlink/string.rb', line 115

def url_decode
  CGI.unescape(self)
end

#url_encodeString

URL Encode string

Returns:

  • (String)

    url encoded string



111
112
113
# File 'lib/searchlink/string.rb', line 111

def url_encode
  ERB::Util.url_encode(gsub(/%22/, '"'))
end

#url_pathString

Return just the path of a URL

Returns:



267
268
269
# File 'lib/searchlink/string.rb', line 267

def url_path
  URI.parse(self).path
end

#valid_version?Boolean

Test if given string is a valid semantic version number with major, minor and patch (and optionally pre)

Returns:

  • (Boolean)

    string is semantic version number



39
40
41
42
# File 'lib/searchlink/semver.rb', line 39

def valid_version?
  pattern = /^\d+\.\d+\.\d+(-?([^0-9]+\d*))?$/
  self =~ pattern ? true : false
end

#word_wrap(col_width = 60, prefix = "") ⇒ Object

As with #word_wrap, but modifies the string in place.

CREDIT: Gavin Kistner, Dayne Broderson



23
24
25
26
27
28
# File 'lib/searchlink/string.rb', line 23

def word_wrap(col_width = 60, prefix = "")
  str = dup
  str.gsub!(/(\S{#{col_width}})(?=\S)/, "#{prefix}\\1")
  str.gsub!(/(.{1,#{col_width}})(?:\s+|$)/, "#{prefix}\\1\n")
  str
end

#word_wrap!(col_width = 60, prefix = "") ⇒ Object

Word wrap a string not exceeding max width. CREDIT: Gavin Kistner, Dayne Broderson



15
16
17
# File 'lib/searchlink/string.rb', line 15

def word_wrap!(col_width = 60, prefix = "")
  replace dup.word_wrap(col_width, prefix)
end

#yaml_valObject

Quote a YAML value if needed



7
8
9
10
# File 'lib/searchlink/string.rb', line 7

def yaml_val
  yaml = YAML.safe_load("key: '#{self}'")
  YAML.dump(yaml).match(/key: (.*?)$/)[1]
end