Module: MaRuKu::Strings

Included in:: In::Markdown::BlockLevelParser, In::Markdown::BlockLevelParser::LineSource, In::Markdown::SpanLevelParser::CharSourceManual, In::Markdown::SpanLevelParser::CharSourceStrscan, In::Markdown::SpanLevelParser::HTMLHelper, In::Markdown::SpanLevelParser::SpanContext, MDElement, String

Defined in:: lib/maruku.rb,
lib/maruku/string_utils.rb,
lib/maruku/input/type_detection.rb

Overview

These are strings utilities.

Constant Summary collapse

TabSize =

AttributeDefinitionList = $1 = id $2 = attribute list

/^\s{0,3}\{([\w\d\s]+)\}:\s*(.*)\s*$/

InlineAttributeList =

/^\s{0,3}\{(.*)\}\s*$/

Definition = Example: ^:blah blah ^: blah blah ^ : blah blah

%r{ 
	^ # begin of line
	[ ]{0,3} # up to 3 spaces
	: # colon
	\s* # whitespace
	(\S.*) # the text    = $1
	$ # end of line
}x

Abbreviation = Example: *[HTML]: Hyper Text Markup Language

%r{
	^  # begin of line
	\* # one asterisk
	\[ # opening bracket
	([^\]]+) # any non-closing bracket:  id = $1
	\] # closing bracket
	:  # colon
	\s* # whitespace
	(\S.*\S)* #           definition=$2
	\s* # strip this whitespace
	$   # end of line
}x

FootnoteText =

%r{
	^\s*\[(\^.+)\]: # id = $1 (including '^')
	\s*(\S.*)?$    # text = $2 (not obb.)
}x

LinkRegex = This regex is taken from BlueCloth sources Link defs are in the form: ^[id]: n? url “optional title”

%r{
		^[ ]*\[([^\]]+)\]:		# id = $1
 [ ]*
		<?(\S+)>?				# url = $2
 [ ]*
		(?:# Titles are delimited by "quotes" or (parens).
["(']
(.+?)			# title = $3
[")']			# Matching ) or "
\s*(.+)?   # stuff = $4
		)?	# title is optional
}x

IncompleteLink =

%r{^\s*\[(.+)\]:\s*$}

HeaderWithId =

/^(.*)\{\#([\w_-]+)\}\s*$/

HeaderWithAttributes =

/^(.*)\{(.*)\}\s*$/

MightBeTableHeader = if contains a pipe, it could be a table header

%r{\|}

Sep = ————-:

/\s*(\:)?\s*-+\s*(\:)?\s*/

TableSeparator = | ————-:| —————————— |

%r{^(\|?#{Sep}\|?)+\s*$}

EMailAddress =

/<([^:]+@[^:]+)>/

Instance Method Summary collapse

#add_tabs(s, n = 1, char = "\t") ⇒ Object
#dbg_describe_ary(a, prefix = '') ⇒ Object
#force_linebreak?(l) ⇒ Boolean
#line_md_type(l) ⇒ Object
#normalize_key_and_value(k, v) ⇒ Object

Keys are downcased, space becomes underscore, converted to symbols.
#num_leading_hashes(s) ⇒ Object

Counts the number of leading ‘#’ in the string.
#number_of_leading_spaces(s) ⇒ Object

Returns the number of leading spaces, considering that a tab counts as ‘TabSize` spaces.
#parse_email_headers(s) ⇒ Object

This parses email headers.
#spaces_before_first_char(s) ⇒ Object

This returns the position of the first real char in a list item.
#split_lines(s) ⇒ Object
#strip_hashes(s) ⇒ Object

Strips initial and final hashes.
#strip_indent(s, n) ⇒ Object

toglie al massimo n caratteri.
#unquote(s) ⇒ Object

removes initial quote.

Instance Method Details

#add_tabs(s, n = 1, char = "\t") ⇒ `Object`



25
26
27

# File 'lib/maruku/string_utils.rb', line 25

def add_tabs(s,n=1,char="\t")
	s.split("\n").map{|x| char*n+x }.join("\n")
end

#dbg_describe_ary(a, prefix = '') ⇒ `Object`

# File 'lib/maruku/string_utils.rb', line 172

def dbg_describe_ary(a, prefix='')
	i = 0 
	a.each do |l|
		puts "#{prefix} (#{i+=1})# #{l.inspect}"
	end
end

#force_linebreak?(l) ⇒ `Boolean`

Returns:

(Boolean)



179
180
181

# File 'lib/maruku/string_utils.rb', line 179

def force_linebreak?(l)
	l =~ /  $/
end

#line_md_type(l) ⇒ `Object`

# File 'lib/maruku/input/type_detection.rb', line 36

def line_md_type(l)
	# The order of evaluation is important (:text is a catch-all)
	return :text   if l =~ /^[a-zA-Z]/
	return :code             if number_of_leading_spaces(l)>=4
	return :empty    if l =~ /^\s*$/
	return :footnote_text    if l =~ FootnoteText
	return :ref_definition   if l =~ LinkRegex or l=~ IncompleteLink
	return :abbreviation     if l =~ Abbreviation
	return :definition       if l =~ Definition
	# I had a bug with emails and urls at the beginning of the 
	# line that were mistaken for raw_html
	return :text if l=~ /^#{EMailAddress}/
	return :text if l=~ /^<http:/
	# raw html is like PHP Markdown Extra: at most three spaces before
	return :xml_instr if l =~ %r{^\s*<\?}
	return :raw_html if l =~ %r{^[ ]?[ ]?[ ]?</?\s*\w+}
	return :raw_html if l =~ %r{^[ ]?[ ]?[ ]?<\!\-\-}
	return :ulist    if l =~ /^\s?([\*\-\+])\s+.*\w+/
	return :olist    if l =~ /^\s?\d+\..*\w+/
	return :header1  if l =~ /^(=)+/ 
	return :header2  if l =~ /^([-\s])+$/ 
	return :header3  if l =~ /^(#)+\s*\S+/ 
	# at least three asterisks on a line, and only whitespace
	return :hrule    if l =~ /^(\s*\*\s*){3,1000}$/ 
	return :hrule    if l =~ /^(\s*-\s*){3,1000}$/ # or hyphens
	return :hrule    if l =~ /^(\s*_\s*){3,1000}$/ # or underscores	
	return :quote    if l =~ /^>/
	return :metadata if l =~ /^@/
#		if @@new_meta_data?
		return :ald   if l =~ AttributeDefinitionList
		return :ial   if l =~ InlineAttributeList
#		end
#		return :equation_end if l =~ EquationEnd
	return :text # else, it's just text
end

#normalize_key_and_value(k, v) ⇒ `Object`

Keys are downcased, space becomes underscore, converted to symbols.

# File 'lib/maruku/string_utils.rb', line 67

def normalize_key_and_value(k,v)
	v = v ? v.strip : true # no value defaults to true
	k = k.strip
	
	# check synonyms
	v = true if ['yes','true'].include?(v.to_s.downcase)
	v = false if ['no','false'].include?(v.to_s.downcase)

	k = k.downcase.gsub(' ','_')
	return k, v
end

#num_leading_hashes(s) ⇒ `Object`

Counts the number of leading ‘#’ in the string

# File 'lib/maruku/string_utils.rb', line 135

def num_leading_hashes(s)
	i=0;
	while i<(s.size-1) && (s[i,1]=='#'); i+=1 end
	i	
end

#number_of_leading_spaces(s) ⇒ `Object`

Returns the number of leading spaces, considering that a tab counts as ‘TabSize` spaces.

# File 'lib/maruku/string_utils.rb', line 81

def number_of_leading_spaces(s)
	n=0; i=0;
	while i < s.size 
		c = s[i,1]
		if c == ' '
			i+=1; n+=1;
		elsif c == "\t"
			i+=1; n+=TabSize;
		else
			break
		end
	end
	n
end

#parse_email_headers(s) ⇒ `Object`

This parses email headers. Returns an hash.

hash is the message.

Keys are downcased, space becomes underscore, converted to symbols.

My key: true

becomes:

{:my_key => true}

# File 'lib/maruku/string_utils.rb', line 47

def parse_email_headers(s)
	keys={}
	match = (s =~ /((\w[\w\s]+: .*\n)+)\n/)
	if match != 0
		keys[:data] = s
	else
		keys[:data] = $'
		headers = $1
		headers.split("\n").each do |l| 
			k, v = l.split(':')
			k, v = normalize_key_and_value(k, v)
			k = k.to_sym
#				puts "K = #{k}, V=#{v}"
			keys[k] = v
		end
	end
	keys
end

#spaces_before_first_char(s) ⇒ `Object`

This returns the position of the first real char in a list item

For example:

'*Hello' # => 1
'* Hello' # => 2
' * Hello' # => 3
' *   Hello' # => 5
'1.Hello' # => 2
' 1.  Hello' # => 5

# File 'lib/maruku/string_utils.rb', line 106

def spaces_before_first_char(s)
	case s.md_type
	when :ulist
		i=0;
		# skip whitespace if present
		while s[i,1] =~ /\s/; i+=1 end
		# skip indicator (+, -, *)
		i+=1
		# skip optional whitespace
		while s[i,1] =~ /\s/; i+=1 end
		return i
	when :olist
		i=0;
		# skip whitespace
		while s[i,1] =~ /\s/; i+=1 end
		# skip digits
		while s[i,1] =~ /\d/; i+=1 end
		# skip dot
		i+=1
		# skip whitespace
		while s[i,1] =~ /\s/; i+=1 end
		return i
	else
		tell_user "BUG (my bad): '#{s}' is not a list"
		0
	end
end

#split_lines(s) ⇒ `Object`



31
32
33

# File 'lib/maruku/string_utils.rb', line 31

def split_lines(s)
	s.split("\n")
end

#strip_hashes(s) ⇒ `Object`

Strips initial and final hashes

# File 'lib/maruku/string_utils.rb', line 142

def strip_hashes(s)
	s = s[num_leading_hashes(s), s.size]
	i = s.size-1
	while i > 0 && (s[i,1] =~ /(#|\s)/); i-=1; end
	s[0, i+1].strip
end

#strip_indent(s, n) ⇒ `Object`

toglie al massimo n caratteri

# File 'lib/maruku/string_utils.rb', line 156

def strip_indent(s, n) 
	i = 0
	while i < s.size && n>0
		c = s[i,1]
		if c == ' '
			n-=1;
		elsif c == "\t"
			n-=TabSize;
		else
			break
		end
		i+=1
	end
	s[i, s.size-1]
end

#unquote(s) ⇒ `Object`

removes initial quote



151
152
153

# File 'lib/maruku/string_utils.rb', line 151

def unquote(s)
	s.gsub(/^>\s?/,'')
end

Module: MaRuKu::Strings

Overview

Constant Summary collapse

Instance Method Summary collapse

Instance Method Details

#add_tabs(s, n = 1, char = "\t") ⇒ Object

#dbg_describe_ary(a, prefix = '') ⇒ Object

#force_linebreak?(l) ⇒ Boolean

#line_md_type(l) ⇒ Object

#normalize_key_and_value(k, v) ⇒ Object

#num_leading_hashes(s) ⇒ Object

#number_of_leading_spaces(s) ⇒ Object

#parse_email_headers(s) ⇒ Object

#spaces_before_first_char(s) ⇒ Object

#split_lines(s) ⇒ Object

#strip_hashes(s) ⇒ Object

#strip_indent(s, n) ⇒ Object

#unquote(s) ⇒ Object