Module: MaRuKu::Strings

Overview

These are strings utilities.

Constant Summary collapse

TabSize =
4
AttributeDefinitionList =

$1 = id $2 = attribute list

/^\s{0,3}\{([\w\d\s]+)\}:\s*(.*)\s*$/
Definition =

Example:

^:blah blah
^: blah blah
^   : blah blah
%r{ 
	^ # begin of line
	[ ]{0,3} # up to 3 spaces
	: # colon
	\s* # whitespace
	(\S.*) # the text    = $1
	$ # end of line
}x
Abbreviation =

Example:

*[HTML]: Hyper Text Markup Language
%r{
	^  # begin of line
	\* # one asterisk
	\[ # opening bracket
	([^\]]+) # any non-closing bracket:  id = $1
	\] # closing bracket
	:  # colon
	\s* # whitespace
	(\S.*\S)* #           definition=$2
	\s* # strip this whitespace
	$   # end of line
}x
FootnoteText =
%r{
	^\s*\[(\^.+)\]: # id = $1 (including '^')
	\s*(\S.*)?$    # text = $2 (not obb.)
}x
LinkRegex =

This regex is taken from BlueCloth sources Link defs are in the form: ^[id]: n? url “optional title”

%r{
		^[ ]*\[([^\]]+)\]:		# id = $1
 [ ]*
		<?(\S+)>?				# url = $2
 [ ]*
		(?:# Titles are delimited by "quotes" or (parens).
["(']
(.+?)			# title = $3
[")']			# Matching ) or "
\s*(.+)?   # stuff = $4
		)?	# title is optional
}x
%r{^\s*\[(.+)\]:\s*$}
HeaderWithId =
/^(.*)\{\#([\w_-]+)\}\s*$/
HeaderWithAttributes =
/^(.*)\{(.*)\}\s*$/
MightBeTableHeader =

if contains a pipe, it could be a table header

%r{\|}
Sep =

————-:

/\s*(\:)?\s*-+\s*(\:)?\s*/
TableSeparator =

| ————-:| —————————— |

%r{^(\|?#{Sep}\|?)+\s*$}
EMailAddress =
/<([^:]+@[^:]+)>/
URL =
/^<http:/

Instance Method Summary collapse

Instance Method Details

#add_tabs(s, n = 1, char = "\t") ⇒ Object



25
26
27
# File 'lib/maruku/string_utils.rb', line 25

def add_tabs(s,n=1,char="\t")
	s.split("\n").map{|x| char*n+x }.join("\n")
end

#dbg_describe_ary(a, prefix = '') ⇒ Object



170
171
172
173
174
175
# File 'lib/maruku/string_utils.rb', line 170

def dbg_describe_ary(a, prefix='')
	i = 0 
	a.each do |l|
		puts "#{prefix} (#{i+=1})# #{l.inspect}"
	end
end

#force_linebreak?(l) ⇒ Boolean

Returns:

  • (Boolean)


177
178
179
# File 'lib/maruku/string_utils.rb', line 177

def force_linebreak?(l)
	l =~ /  $/
end

#line_md_type(l) ⇒ Object



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# File 'lib/maruku/input/type_detection.rb', line 36

def line_md_type(l)
	# The order of evaluation is important (:text is a catch-all)
	return :text   if l =~ /^[a-zA-Z]/
	return :code             if number_of_leading_spaces(l)>=4
	return :empty    if l =~ /^\s*$/
	return :footnote_text    if l =~ FootnoteText
	return :ref_definition   if l =~ LinkRegex or l=~ IncompleteLink
	return :abbreviation     if l =~ Abbreviation
	return :definition       if l =~ Definition
	# I had a bug with emails and urls at the beginning of the 
	# line that were mistaken for raw_html
	return :text     if l=~EMailAddress or l=~ URL
	# raw html is like PHP Markdown Extra: at most three spaces before
	return :raw_html if l =~ %r{^[ ]?[ ]?[ ]?</?\s*\w+}
	return :raw_html if l =~ %r{[ ]{0,3}<\!\-\-}
	return :ulist    if l =~ /^\s?([\*\-\+])\s+.*\w+/
	return :olist    if l =~ /^\s?\d+\..*\w+/
	return :header1  if l =~ /^(=)+/ 
	return :header2  if l =~ /^([-\s])+$/ 
	return :header3  if l =~ /^(#)+\s*\S+/ 
	# at least three asterisks on a line, and only whitespace
	return :hrule    if l =~ /^(\s*\*\s*){3,1000}$/ 
	return :hrule    if l =~ /^(\s*-\s*){3,1000}$/ # or hyphens
	return :hrule    if l =~ /^(\s*_\s*){3,1000}$/ # or underscores	
	return :quote    if l =~ /^>/
	return :metadata if l =~ /^@/
#		if @@new_meta_data?
		return :ald   if l =~ AttributeDefinitionList
		return :ial   if l =~ /^\s{0,3}\{.*\}/
#		end
	return :text # else, it's just text
end

#normalize_key_and_value(k, v) ⇒ Object

‘.xyz` => class: xyz `#xyz` => id: xyz



57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# File 'lib/maruku/string_utils.rb', line 57

def normalize_key_and_value(k,v)
	v = v ? v.strip : true # no value defaults to true
	k = k.strip
	
	# `.xyz` => class: xyz
	if k =~ /^\.([\w\d]+)/
		return :class, $1
	# `#xyz` => id: xyz
	elsif k =~ /^\#([\w\d]+)/
		return :id, $1
	else
		# check synonyms
		v = true if ['yes','true'].include?(v.to_s.downcase)
		v = false if ['no','false'].include?(v.to_s.downcase)
	
		k = k.downcase.gsub(' ','_')
		return k, v
	end
end

#num_leading_hashes(s) ⇒ Object

Counts the number of leading ‘#’ in the string



133
134
135
136
137
# File 'lib/maruku/string_utils.rb', line 133

def num_leading_hashes(s)
	i=0;
	while i<(s.size-1) && (s[i,1]=='#'); i+=1 end
	i	
end

#number_of_leading_spaces(s) ⇒ Object

Returns the number of leading spaces, considering that a tab counts as ‘TabSize` spaces.



79
80
81
82
83
84
85
86
87
88
89
90
91
92
# File 'lib/maruku/string_utils.rb', line 79

def number_of_leading_spaces(s)
	n=0; i=0;
	while i < s.size 
		c = s[i,1]
		if c == ' '
			i+=1; n+=1;
		elsif c == "\t"
			i+=1; n+=TabSize;
		else
			break
		end
	end
	n
end

#parse_email_headers(s) ⇒ Object

This parses email headers. Returns an hash. hash is the message.



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# File 'lib/maruku/string_utils.rb', line 36

def parse_email_headers(s)
	keys={}
	match = (s =~ /((\w+: .*\n)+)\n/)
	if match != 0
		keys[:data] = s
	else
		keys[:data] = $'
		headers = $1
		headers.split("\n").each do |l| 
			k, v = l.split(':')
			k, v = normalize_key_and_value(k, v)
			k = k.to_sym
#				puts "K = #{k}, V=#{v}"
			keys[k] = v
		end
	end
	keys
end

#spaces_before_first_char(s) ⇒ Object

This returns the position of the first real char in a list item

For example:

'*Hello' # => 1
'* Hello' # => 2
' * Hello' # => 3
' *   Hello' # => 5
'1.Hello' # => 2
' 1.  Hello' # => 5


104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# File 'lib/maruku/string_utils.rb', line 104

def spaces_before_first_char(s)
	case s.md_type
	when :ulist
		i=0;
		# skip whitespace if present
		while s[i,1] =~ /\s/; i+=1 end
		# skip indicator (+, -, *)
		i+=1
		# skip optional whitespace
		while s[i,1] =~ /\s/; i+=1 end
		return i
	when :olist
		i=0;
		# skip whitespace
		while s[i,1] =~ /\s/; i+=1 end
		# skip digits
		while s[i,1] =~ /\d/; i+=1 end
		# skip dot
		i+=1
		# skip whitespace
		while s[i,1] =~ /\s/; i+=1 end
		return i
	else
		tell_user "BUG (my bad): '#{s}' is not a list"
		0
	end
end

#split_lines(s) ⇒ Object



31
32
33
# File 'lib/maruku/string_utils.rb', line 31

def split_lines(s)
	s.split("\n")
end

#strip_hashes(s) ⇒ Object

Strips initial and final hashes



140
141
142
143
144
145
# File 'lib/maruku/string_utils.rb', line 140

def strip_hashes(s)
	s = s[num_leading_hashes(s), s.size]
	i = s.size-1
	while i > 0 && (s[i,1] =~ /(#|\s)/); i-=1; end
	s[0, i+1].strip
end

#strip_indent(s, n) ⇒ Object

toglie al massimo n caratteri



154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
# File 'lib/maruku/string_utils.rb', line 154

def strip_indent(s, n) 
	i = 0
	while i < s.size && n>0
		c = s[i,1]
		if c == ' '
			n-=1;
		elsif c == "\t"
			n-=TabSize;
		else
			break
		end
		i+=1
	end
	s[i, s.size-1]
end

#unquote(s) ⇒ Object

removes initial quote



149
150
151
# File 'lib/maruku/string_utils.rb', line 149

def unquote(s)
	s.gsub(/^>\s?/,'')
end