Class: String

Inherits:
Object show all
Defined in:
lib/sup/util.rb

Direct Known Subclasses

Ncurses::CharCode

Defined Under Namespace

Classes: CheckError

Instance Method Summary collapse

Instance Method Details

#asciiObject



465
466
467
468
469
470
471
472
473
474
475
476
# File 'lib/sup/util.rb', line 465

def ascii
  out = ""
  each_byte do |b|
    if (b & 128) != 0
      out << "\\x#{b.to_s 16}"
    else
      out << b.chr
    end
  end
  out = out.fix_encoding! # this should now be an utf-8 string of ascii
                         # compat chars.
end

#ascii_only?Boolean

Returns:

  • (Boolean)


479
480
481
482
# File 'lib/sup/util.rb', line 479

def ascii_only?
  size.times { |i| return false if self[i] & 128 != 0 }
  return true
end

#camel_to_hyphyObject



287
288
289
# File 'lib/sup/util.rb', line 287

def camel_to_hyphy
  self.gsub(/([a-z])([A-Z0-9])/, '\1-\2').downcase
end

#checkObject



456
457
458
459
460
461
462
463
# File 'lib/sup/util.rb', line 456

def check
  begin
    fail "unexpected encoding #{encoding}" if respond_to?(:encoding) && !(encoding == Encoding::UTF_8 || encoding == Encoding::ASCII)
    fail "invalid encoding" if respond_to?(:valid_encoding?) && !valid_encoding?
  rescue
    raise CheckError.new($!.message)
  end
end

#display_lengthObject



268
269
270
271
272
273
274
275
276
277
278
# File 'lib/sup/util.rb', line 268

def display_length
  @display_length ||= Unicode.width(self.fix_encoding!, false)

  # if Unicode.width fails and returns -1, fall back to
  # regular String#length, see pull-request: #256.
  if @display_length < 0
    @display_length = self.length
  end

  @display_length
end

#each(&b) ⇒ Object



443
444
445
# File 'lib/sup/util.rb', line 443

def each &b
  each_line &b
end

#find_all_positions(x) ⇒ Object



291
292
293
294
295
296
297
298
299
300
301
# File 'lib/sup/util.rb', line 291

def find_all_positions x
  ret = []
  start = 0
  while start < length
    pos = index x, start
    break if pos.nil?
    ret << pos
    start = pos + 1
  end
  ret
end

#fix_encoding!Object

Fix the damn string! make sure it is valid utf-8, then convert to user encoding.

Not Ruby 1.8 compatible



387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
# File 'lib/sup/util.rb', line 387

def fix_encoding!
  # first try to encode to utf-8 from whatever current encoding
  encode!('UTF-8', :invalid => :replace, :undef => :replace)

  # do this anyway in case string is set to be UTF-8, encoding to
  # something else (UTF-16 which can fully represent UTF-8) and back
  # ensures invalid chars are replaced.
  encode!('UTF-16', 'UTF-8', :invalid => :replace, :undef => :replace)
  encode!('UTF-8', 'UTF-16', :invalid => :replace, :undef => :replace)

  fail "Could not create valid UTF-8 string out of: '#{self.to_s}'." unless valid_encoding?

  # now convert to $encoding
  encode!($encoding, :invalid => :replace, :undef => :replace)

  fail "Could not create valid #{$encoding.inspect} string out of: '#{self.to_s}'." unless valid_encoding?

  self
end

#normalize_whitespaceObject



431
432
433
434
# File 'lib/sup/util.rb', line 431

def normalize_whitespace
  fix_encoding!
  gsub(/\t/, "    ").gsub(/\r/, "")
end

#ordObject



437
438
439
# File 'lib/sup/util.rb', line 437

def ord
  self[0]
end

#slice_by_display_length(len) ⇒ Object



280
281
282
283
284
285
# File 'lib/sup/util.rb', line 280

def slice_by_display_length len
  each_char.each_with_object "" do |c, buffer|
    len -= c.display_length
    buffer << c if len >= 0
  end
end

#split_on_commasObject

a very complicated regex found on teh internets to split on commas, unless they occurr within double quotes.



305
306
307
# File 'lib/sup/util.rb', line 305

def split_on_commas
  normalize_whitespace().split(/,\s*(?=(?:[^"]*"[^"]*")*(?![^"]*"))/)
end

#split_on_commas_with_remainderObject

ok, here we do it the hard way. got to have a remainder for purposes of tab-completing full email addresses



311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
# File 'lib/sup/util.rb', line 311

def split_on_commas_with_remainder
  ret = []
  state = :outstring
  pos = 0
  region_start = 0
  while pos <= length
    newpos = case state
      when :escaped_instring, :escaped_outstring then pos
      else index(/[,"\\]/, pos)
    end

    if newpos
      char = self[newpos]
    else
      char = nil
      newpos = length
    end

    case char
    when ?"
      state = case state
        when :outstring then :instring
        when :instring then :outstring
        when :escaped_instring then :instring
        when :escaped_outstring then :outstring
      end
    when ?,, nil
      state = case state
        when :outstring, :escaped_outstring then
          ret << self[region_start ... newpos].gsub(/^\s+|\s+$/, "")
          region_start = newpos + 1
          :outstring
        when :instring then :instring
        when :escaped_instring then :instring
      end
    when ?\\
      state = case state
        when :instring then :escaped_instring
        when :outstring then :escaped_outstring
        when :escaped_instring then :instring
        when :escaped_outstring then :outstring
      end
    end
    pos = newpos + 1
  end

  remainder = case state
    when :instring
      self[region_start .. -1].gsub(/^\s+/, "")
    else
      nil
    end

  [ret, remainder]
end

#to_set_of_symbols(split_on = nil) ⇒ Object

takes a list of words, and returns an array of symbols. typically used in Sup for translating Xapian’s representation of a list of labels (a string) to an array of label symbols.

split_on will be passed to String#split, so you can leave this nil for space.



453
# File 'lib/sup/util.rb', line 453

def to_set_of_symbols split_on=nil; Set.new split(split_on).map { |x| x.strip.intern } end

#transcode(to_encoding, from_encoding) ⇒ Object

transcode the string if original encoding is know fix if broken.

Not Ruby 1.8 compatible



411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
# File 'lib/sup/util.rb', line 411

def transcode to_encoding, from_encoding
  begin
    encode!(to_encoding, from_encoding, :invalid => :replace, :undef => :replace)

    unless valid_encoding?
      # fix encoding (through UTF-8)
      encode!('UTF-16', from_encoding, :invalid => :replace, :undef => :replace)
      encode!(to_encoding, 'UTF-16', :invalid => :replace, :undef => :replace)
    end

  rescue Encoding::ConverterNotFoundError
    debug "Encoding converter not found for #{from_encoding.inspect} or #{to_encoding.inspect}, fixing string: '#{self.to_s}', but expect weird characters."
    fix_encoding!
  end

  fail "Could not create valid #{to_encoding.inspect} string out of: '#{self.to_s}'." unless valid_encoding?

  self
end

#wrap(len) ⇒ Object



367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
# File 'lib/sup/util.rb', line 367

def wrap len
  ret = []
  s = self
  while s.display_length > len
    cut = s.slice_by_display_length(len).rindex(/\s/)
    if cut
      ret << s[0 ... cut]
      s = s[(cut + 1) .. -1]
    else
      ret << s.slice_by_display_length(len)
      s = s[ret.last.length .. -1]
    end
  end
  ret << s
end