Module: MaRuKu::Strings

Overview

These are strings utilities.

Constant Summary collapse

TabSize =
4
AttributeDefinitionList =

$1 = id $2 = attribute list

/^\s{0,3}\{([\w\d\s]+)\}:\s*(.*)\s*$/
InlineAttributeList =
/^\s{0,3}\{(.*)\}\s*$/
Definition =

Example:

^:blah blah
^: blah blah
^   : blah blah
%r{ 
	^ # begin of line
	[ ]{0,3} # up to 3 spaces
	: # colon
	\s* # whitespace
	(\S.*) # the text    = $1
	$ # end of line
}x
Abbreviation =

Example:

*[HTML]: Hyper Text Markup Language
%r{
	^  # begin of line
	\* # one asterisk
	\[ # opening bracket
	([^\]]+) # any non-closing bracket:  id = $1
	\] # closing bracket
	:  # colon
	\s* # whitespace
	(\S.*\S)* #           definition=$2
	\s* # strip this whitespace
	$   # end of line
}x
FootnoteText =
%r{
	^\s*\[(\^.+)\]: # id = $1 (including '^')
	\s*(\S.*)?$    # text = $2 (not obb.)
}x
LinkRegex =

This regex is taken from BlueCloth sources Link defs are in the form: ^[id]: n? url “optional title”

%r{
		^[ ]*\[([^\]]+)\]:		# id = $1
 [ ]*
		<?(\S+)>?				# url = $2
 [ ]*
		(?:# Titles are delimited by "quotes" or (parens).
["(']
(.+?)			# title = $3
[")']			# Matching ) or "
\s*(.+)?   # stuff = $4
		)?	# title is optional
}x
%r{^\s*\[(.+)\]:\s*$}
HeaderWithId =
/^(.*)\{\#([\w_-]+)\}\s*$/
HeaderWithAttributes =
/^(.*)\{(.*)\}\s*$/
MightBeTableHeader =

if contains a pipe, it could be a table header

%r{\|}
Sep =

————-:

/\s*(\:)?\s*-+\s*(\:)?\s*/
TableSeparator =

| ————-:| —————————— |

%r{^(\|?#{Sep}\|?)+\s*$}
EMailAddress =
/<([^:]+@[^:]+)>/

Instance Method Summary collapse

Instance Method Details

#add_tabs(s, n = 1, char = "\t") ⇒ Object



25
26
27
# File 'lib/maruku/string_utils.rb', line 25

def add_tabs(s,n=1,char="\t")
	s.split("\n").map{|x| char*n+x }.join("\n")
end

#dbg_describe_ary(a, prefix = '') ⇒ Object



172
173
174
175
176
177
# File 'lib/maruku/string_utils.rb', line 172

def dbg_describe_ary(a, prefix='')
	i = 0 
	a.each do |l|
		puts "#{prefix} (#{i+=1})# #{l.inspect}"
	end
end

#force_linebreak?(l) ⇒ Boolean

Returns:

  • (Boolean)


179
180
181
# File 'lib/maruku/string_utils.rb', line 179

def force_linebreak?(l)
	l =~ /  $/
end

#line_md_type(l) ⇒ Object



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# File 'lib/maruku/input/type_detection.rb', line 36

def line_md_type(l)
	# The order of evaluation is important (:text is a catch-all)
	return :text   if l =~ /^[a-zA-Z]/
	return :code             if number_of_leading_spaces(l)>=4
	return :empty    if l =~ /^\s*$/
	return :footnote_text    if l =~ FootnoteText
	return :ref_definition   if l =~ LinkRegex or l=~ IncompleteLink
	return :abbreviation     if l =~ Abbreviation
	return :definition       if l =~ Definition
	# I had a bug with emails and urls at the beginning of the 
	# line that were mistaken for raw_html
	return :text if l=~ /^#{EMailAddress}/
	return :text if l=~ /^<http:/
	# raw html is like PHP Markdown Extra: at most three spaces before
	return :xml_instr if l =~ %r{^\s*<\?}
	return :raw_html if l =~ %r{^[ ]?[ ]?[ ]?</?\s*\w+}
	return :raw_html if l =~ %r{^[ ]?[ ]?[ ]?<\!\-\-}
	return :ulist    if l =~ /^\s?([\*\-\+])\s+.*\w+/
	return :olist    if l =~ /^\s?\d+\..*\w+/
	return :header1  if l =~ /^(=)+/ 
	return :header2  if l =~ /^([-\s])+$/ 
	return :header3  if l =~ /^(#)+\s*\S+/ 
	# at least three asterisks on a line, and only whitespace
	return :hrule    if l =~ /^(\s*\*\s*){3,1000}$/ 
	return :hrule    if l =~ /^(\s*-\s*){3,1000}$/ # or hyphens
	return :hrule    if l =~ /^(\s*_\s*){3,1000}$/ # or underscores	
	return :quote    if l =~ /^>/
	return :metadata if l =~ /^@/
#		if @@new_meta_data?
		return :ald   if l =~ AttributeDefinitionList
		return :ial   if l =~ InlineAttributeList
#		end
#		return :equation_end if l =~ EquationEnd
	return :text # else, it's just text
end

#normalize_key_and_value(k, v) ⇒ Object

Keys are downcased, space becomes underscore, converted to symbols.



67
68
69
70
71
72
73
74
75
76
77
# File 'lib/maruku/string_utils.rb', line 67

def normalize_key_and_value(k,v)
	v = v ? v.strip : true # no value defaults to true
	k = k.strip
	
	# check synonyms
	v = true if ['yes','true'].include?(v.to_s.downcase)
	v = false if ['no','false'].include?(v.to_s.downcase)

	k = k.downcase.gsub(' ','_')
	return k, v
end

#num_leading_hashes(s) ⇒ Object

Counts the number of leading ‘#’ in the string



135
136
137
138
139
# File 'lib/maruku/string_utils.rb', line 135

def num_leading_hashes(s)
	i=0;
	while i<(s.size-1) && (s[i,1]=='#'); i+=1 end
	i	
end

#number_of_leading_spaces(s) ⇒ Object

Returns the number of leading spaces, considering that a tab counts as ‘TabSize` spaces.



81
82
83
84
85
86
87
88
89
90
91
92
93
94
# File 'lib/maruku/string_utils.rb', line 81

def number_of_leading_spaces(s)
	n=0; i=0;
	while i < s.size 
		c = s[i,1]
		if c == ' '
			i+=1; n+=1;
		elsif c == "\t"
			i+=1; n+=TabSize;
		else
			break
		end
	end
	n
end

#parse_email_headers(s) ⇒ Object

This parses email headers. Returns an hash.

hash is the message.

Keys are downcased, space becomes underscore, converted to symbols.

My key: true

becomes:

{:my_key => true}


47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/maruku/string_utils.rb', line 47

def parse_email_headers(s)
	keys={}
	match = (s =~ /((\w[\w\s]+: .*\n)+)\n/)
	if match != 0
		keys[:data] = s
	else
		keys[:data] = $'
		headers = $1
		headers.split("\n").each do |l| 
			k, v = l.split(':')
			k, v = normalize_key_and_value(k, v)
			k = k.to_sym
#				puts "K = #{k}, V=#{v}"
			keys[k] = v
		end
	end
	keys
end

#spaces_before_first_char(s) ⇒ Object

This returns the position of the first real char in a list item

For example:

'*Hello' # => 1
'* Hello' # => 2
' * Hello' # => 3
' *   Hello' # => 5
'1.Hello' # => 2
' 1.  Hello' # => 5


106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# File 'lib/maruku/string_utils.rb', line 106

def spaces_before_first_char(s)
	case s.md_type
	when :ulist
		i=0;
		# skip whitespace if present
		while s[i,1] =~ /\s/; i+=1 end
		# skip indicator (+, -, *)
		i+=1
		# skip optional whitespace
		while s[i,1] =~ /\s/; i+=1 end
		return i
	when :olist
		i=0;
		# skip whitespace
		while s[i,1] =~ /\s/; i+=1 end
		# skip digits
		while s[i,1] =~ /\d/; i+=1 end
		# skip dot
		i+=1
		# skip whitespace
		while s[i,1] =~ /\s/; i+=1 end
		return i
	else
		tell_user "BUG (my bad): '#{s}' is not a list"
		0
	end
end

#split_lines(s) ⇒ Object



31
32
33
# File 'lib/maruku/string_utils.rb', line 31

def split_lines(s)
	s.split("\n")
end

#strip_hashes(s) ⇒ Object

Strips initial and final hashes



142
143
144
145
146
147
# File 'lib/maruku/string_utils.rb', line 142

def strip_hashes(s)
	s = s[num_leading_hashes(s), s.size]
	i = s.size-1
	while i > 0 && (s[i,1] =~ /(#|\s)/); i-=1; end
	s[0, i+1].strip
end

#strip_indent(s, n) ⇒ Object

toglie al massimo n caratteri



156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
# File 'lib/maruku/string_utils.rb', line 156

def strip_indent(s, n) 
	i = 0
	while i < s.size && n>0
		c = s[i,1]
		if c == ' '
			n-=1;
		elsif c == "\t"
			n-=TabSize;
		else
			break
		end
		i+=1
	end
	s[i, s.size-1]
end

#unquote(s) ⇒ Object

removes initial quote



151
152
153
# File 'lib/maruku/string_utils.rb', line 151

def unquote(s)
	s.gsub(/^>\s?/,'')
end