Module: MaRuKu::Strings

Overview

These are strings utilities.

Constant Summary collapse

TabSize =
4
AttributeDefinitionList =

$1 = id $2 = attribute list

/^\s{0,3}\{([\w\d\s]+)\}:\s*(.*)\s*$/
InlineAttributeList =
/^\s{0,3}\{([:#\.].*)\}\s*$/
Definition =

Example:

^:blah blah
^: blah blah
^   : blah blah
%r{ 
	^ # begin of line
	[ ]{0,3} # up to 3 spaces
	: # colon
	\s* # whitespace
	(\S.*) # the text    = $1
	$ # end of line
}x
Abbreviation =

Example:

*[HTML]: Hyper Text Markup Language
%r{
	^  # begin of line
	[ ]{0,3} # up to 3 spaces
	\* # one asterisk
	\[ # opening bracket
	([^\]]+) # any non-closing bracket:  id = $1
	\] # closing bracket
	:  # colon
	\s* # whitespace
	(\S.*\S)* #           definition=$2
	\s* # strip this whitespace
	$   # end of line
}x
FootnoteText =
%r{
	^  # begin of line
	[ ]{0,3} # up to 3 spaces
	\[(\^.+)\]: # id = $1 (including '^')
	\s*(\S.*)?$    # text = $2 (not obb.)
}x
LinkRegex =

This regex is taken from BlueCloth sources Link defs are in the form: ^[id]: n? url “optional title”

%r{
		^[ ]{0,3}\[([^\[\]]+)\]:		# id = $1
 [ ]*
		<?([^>\s]+)>?				# url = $2
 [ ]*
		(?:# Titles are delimited by "quotes" or (parens).
["(']
(.+?)			# title = $3
[")']			# Matching ) or "
\s*(.+)?   # stuff = $4
		)?	# title is optional
}x
%r{^[ ]{0,3}\[([^\[\]]+)\]:\s*$}
HeaderWithId =
/^(.*)\{\#([\w_-]+)\}\s*$/
HeaderWithAttributes =
/^(.*)\{(.*)\}\s*$/
MightBeTableHeader =

if contains a pipe, it could be a table header

%r{\|}
Sep =

————-:

/\s*(\:)?\s*-+\s*(\:)?\s*/
TableSeparator =

| ————-:| —————————— |

%r{^(\|?#{Sep}\|?)+\s*$}
EMailAddress =
/<([^:]+@[^:]+)>/

Instance Method Summary collapse

Instance Method Details

#add_tabs(s, n = 1, char = "\t") ⇒ Object



25
26
27
# File 'lib/maruku/string_utils.rb', line 25

def add_tabs(s,n=1,char="\t")
	s.split("\n").map{|x| char*n+x }.join("\n")
end

#dbg_describe_ary(a, prefix = '') ⇒ Object



179
180
181
182
183
184
# File 'lib/maruku/string_utils.rb', line 179

def dbg_describe_ary(a, prefix='')
	i = 0 
	a.each do |l|
		puts "#{prefix} (#{i+=1})# #{l.inspect}"
	end
end

#force_linebreak?(l) ⇒ Boolean

Returns:

  • (Boolean)


186
187
188
# File 'lib/maruku/string_utils.rb', line 186

def force_linebreak?(l)
	l =~ /  $/
end

#line_md_type(l) ⇒ Object



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/maruku/input/type_detection.rb', line 36

def line_md_type(l)
	# The order of evaluation is important (:text is a catch-all)
	return :text   if l =~ /^[a-zA-Z]/
	return :code             if number_of_leading_spaces(l)>=4
	return :empty    if l =~ /^\s*$/
	return :footnote_text    if l =~ FootnoteText
	return :ref_definition   if l =~ LinkRegex or l=~ IncompleteLink
	return :abbreviation     if l =~ Abbreviation
	return :definition       if l =~ Definition
	# I had a bug with emails and urls at the beginning of the 
	# line that were mistaken for raw_html
	return :text if l=~ /^[ ]{0,3}#{EMailAddress}/
	return :text if l=~ /^[ ]{0,3}<http:/
	# raw html is like PHP Markdown Extra: at most three spaces before
	return :xml_instr if l =~ %r{^\s*<\?}
	return :raw_html if l =~ %r{^[ ]?[ ]?[ ]?</?\s*\w+}
	return :raw_html if l =~ %r{^[ ]?[ ]?[ ]?<\!\-\-}
	# Something is wrong with how we parse lists! :-(
	#return :ulist    if l =~ /^[ ]{0,3}([\*\-\+])\s+.*\w+/
	#return :olist    if l =~ /^[ ]{0,3}\d+\..*\w+/
	return :ulist    if l =~ /^[ ]{0,1}([\*\-\+])\s+.*\w+/
	return :olist    if l =~ /^[ ]{0,1}\d+\..*\w+/
	return :header1  if l =~ /^(=)+/ 
	return :header2  if l =~ /^([-\s])+$/ 
	return :header3  if l =~ /^(#)+\s*\S+/ 
	# at least three asterisks on a line, and only whitespace
	return :hrule    if l =~ /^(\s*\*\s*){3,1000}$/ 
	return :hrule    if l =~ /^(\s*-\s*){3,1000}$/ # or hyphens
	return :hrule    if l =~ /^(\s*_\s*){3,1000}$/ # or underscores	
	return :quote    if l =~ /^>/
	return :metadata if l =~ /^@/
#		if @@new_meta_data?
		return :ald   if l =~ AttributeDefinitionList
		return :ial   if l =~ InlineAttributeList
#		end
#		return :equation_end if l =~ EquationEnd
	return :text # else, it's just text
end

#normalize_key_and_value(k, v) ⇒ Object

Keys are downcased, space becomes underscore, converted to symbols.



69
70
71
72
73
74
75
76
77
78
79
# File 'lib/maruku/string_utils.rb', line 69

def normalize_key_and_value(k,v)
	v = v ? v.strip : true # no value defaults to true
	k = k.strip
	
	# check synonyms
	v = true if ['yes','true'].include?(v.to_s.downcase)
	v = false if ['no','false'].include?(v.to_s.downcase)

	k = k.downcase.gsub(' ','_')
	return k, v
end

#num_leading_hashes(s) ⇒ Object

Counts the number of leading ‘#’ in the string



137
138
139
140
141
# File 'lib/maruku/string_utils.rb', line 137

def num_leading_hashes(s)
	i=0;
	while i<(s.size-1) && (s[i,1]=='#'); i+=1 end
	i	
end

#number_of_leading_spaces(s) ⇒ Object

Returns the number of leading spaces, considering that a tab counts as ‘TabSize` spaces.



83
84
85
86
87
88
89
90
91
92
93
94
95
96
# File 'lib/maruku/string_utils.rb', line 83

def number_of_leading_spaces(s)
	n=0; i=0;
	while i < s.size 
		c = s[i,1]
		if c == ' '
			i+=1; n+=1;
		elsif c == "\t"
			i+=1; n+=TabSize;
		else
			break
		end
	end
	n
end

#parse_email_headers(s) ⇒ Object

This parses email headers. Returns an hash.

hash is the message.

Keys are downcased, space becomes underscore, converted to symbols.

My key: true

becomes:

{:my_key => true}


47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/maruku/string_utils.rb', line 47

def parse_email_headers(s)
	keys={}
	match = (s =~ /\A((\w[\w\s\_\-]+: .*\n)+)\s*\n/)
	if match != 0
		keys[:data] = s
	else
		keys[:data] = $'
		headers = $1
		headers.split("\n").each do |l| 
# Fails if there are other ':' characters.
#				k, v = l.split(':')
			k, v = l.split(':', 2)
			k, v = normalize_key_and_value(k, v)
			k = k.to_sym
#				puts "K = #{k}, V=#{v}"
			keys[k] = v
		end
	end
	keys
end

#sanitize_ref_id(x) ⇒ Object

change space to “_” and remove any non-word character



152
153
154
# File 'lib/maruku/string_utils.rb', line 152

def sanitize_ref_id(x)
	x.strip.downcase.gsub(' ','_').gsub(/[^\w]/,'')
end

#spaces_before_first_char(s) ⇒ Object

This returns the position of the first real char in a list item

For example:

'*Hello' # => 1
'* Hello' # => 2
' * Hello' # => 3
' *   Hello' # => 5
'1.Hello' # => 2
' 1.  Hello' # => 5


108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# File 'lib/maruku/string_utils.rb', line 108

def spaces_before_first_char(s)
	case s.md_type
	when :ulist
		i=0;
		# skip whitespace if present
		while s[i,1] =~ /\s/; i+=1 end
		# skip indicator (+, -, *)
		i+=1
		# skip optional whitespace
		while s[i,1] =~ /\s/; i+=1 end
		return i
	when :olist
		i=0;
		# skip whitespace
		while s[i,1] =~ /\s/; i+=1 end
		# skip digits
		while s[i,1] =~ /\d/; i+=1 end
		# skip dot
		i+=1
		# skip whitespace
		while s[i,1] =~ /\s/; i+=1 end
		return i
	else
		tell_user "BUG (my bad): '#{s}' is not a list"
		0
	end
end

#split_lines(s) ⇒ Object



31
32
33
# File 'lib/maruku/string_utils.rb', line 31

def split_lines(s)
	s.gsub("\r","").split("\n")
end

#strip_hashes(s) ⇒ Object

Strips initial and final hashes



144
145
146
147
148
149
# File 'lib/maruku/string_utils.rb', line 144

def strip_hashes(s)
	s = s[num_leading_hashes(s), s.size]
	i = s.size-1
	while i > 0 && (s[i,1] =~ /(#|\s)/); i-=1; end
	s[0, i+1].strip
end

#strip_indent(s, n) ⇒ Object

toglie al massimo n caratteri



163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
# File 'lib/maruku/string_utils.rb', line 163

def strip_indent(s, n) 
	i = 0
	while i < s.size && n>0
		c = s[i,1]
		if c == ' '
			n-=1;
		elsif c == "\t"
			n-=TabSize;
		else
			break
		end
		i+=1
	end
	s[i, s.size]
end

#unquote(s) ⇒ Object

removes initial quote



158
159
160
# File 'lib/maruku/string_utils.rb', line 158

def unquote(s)
	s.gsub(/^>\s?/,'')
end