Module: Linguistics::EN

Defined in:
lib/linguistics/en.rb,
lib/linguistics/en/wordnet.rb,
lib/linguistics/en/infinitive.rb,
lib/linguistics/en/linkparser.rb

Overview

This file contains functions for deriving the infinitive forms of conjugated English words. Requiring this file adds functions and constants to the Linguistics::EN module.

Authors

Copyright © 2003-2005 The FaerieMUD Consortium. All rights reserved.

This module is free software. You may use, modify, and/or redistribute this software under the terms of the Perl Artistic License. (See language.perl.com/misc/Artistic.html)

This code was ported from the excellent ‘Lingua::EN::Infinitive’ Perl module by Ron Savage, which is distributed under the following license:

  Australian copyright (c) 1999-2002 Ron Savage.

  	All Programs of mine are 'OSI Certified Open Source Software';
  	you can redistribute them and/or modify them under the terms of
  	The Artistic License, a copy of which is available at:
  	http://www.opensource.org/licenses/index.html

# == Version

$Id: infinitive.rb,v 1.2 2003/09/14 10:35:32 deveiant Exp $

Defined Under Namespace

Classes: Infinitive

Constant Summary collapse

SVNRev =

Subversion revision

%q$Rev$
SVNId =

Subversion revision tag

%q$Id: en.rb,v 1.8 2003/09/14 10:47:12 deveiant Exp $
PL_sb_irregular_s =

Plurals

{
	"ephemeris"	=> "ephemerides",
	"iris"		=> "irises|irides",
	"clitoris"	=> "clitorises|clitorides",
	"corpus"	=> "corpuses|corpora",
	"opus"		=> "opuses|opera",
	"genus"		=> "genera",
	"mythos"	=> "mythoi",
	"penis"		=> "penises|penes",
	"testis"	=> "testes",
}
PL_sb_irregular_h =
{
	"child"		=> "children",
	"brother"	=> "brothers|brethren",
	"loaf"		=> "loaves",
	"hoof"		=> "hoofs|hooves",
	"beef"		=> "beefs|beeves",
	"money"		=> "monies",
	"mongoose"	=> "mongooses",
	"ox"		=> "oxen",
	"cow"		=> "cows|kine",
	"soliloquy"	=> "soliloquies",
	"graffito"	=> "graffiti",
	"prima donna"	=> "prima donnas|prime donne",
	"octopus"	=> "octopuses|octopodes",
	"genie"		=> "genies|genii",
	"ganglion"	=> "ganglions|ganglia",
	"trilby"	=> "trilbys",
	"turf"		=> "turfs|turves",
}.update( PL_sb_irregular_s )
PL_sb_irregular =
matchgroup PL_sb_irregular_h.keys
PL_sb_C_a_ata =

Classical “..a” -> “..ata”

matchgroup %w[
	anathema bema carcinoma charisma diploma
	dogma drama edema enema enigma lemma
	lymphoma magma melisma miasma oedema
	sarcoma schema soma stigma stoma trauma
	gumma pragma
].collect {|word| word[0...-1]}
PL_sb_U_a_ae =

Unconditional “..a” -> “..ae”

matchgroup %w[
	alumna alga vertebra persona
]
PL_sb_C_a_ae =

Classical “..a” -> “..ae”

matchgroup %w[
	amoeba antenna formula hyperbola
	medusa nebula parabola abscissa
	hydra nova lacuna aurora .*umbra
	flora fauna
]
PL_sb_C_en_ina =

Classical “..en” -> “..ina”

matchgroup %w[
	stamen	foramen	lumen
].collect {|word| word[0...-2] }
PL_sb_U_um_a =

Unconditional “..um” -> “..a”

matchgroup %w[
	bacterium	agendum	desideratum	erratum
	stratum	datum	ovum		extremum
	candelabrum
].collect {|word| word[0...-2] }
PL_sb_C_um_a =

Classical “..um” -> “..a”

matchgroup %w[
	maximum	minimum	momentum	optimum
	quantum	cranium	curriculum	dictum
	phylum	aquarium	compendium	emporium
	enconium	gymnasium	honorarium	interregnum
	lustrum 	memorandum	millenium 	rostrum 
	spectrum	speculum	stadium	trapezium
	ultimatum	medium	vacuum	velum 
	consortium
].collect {|word| word[0...-2]}
PL_sb_U_us_i =

Unconditional “..us” -> “i”

matchgroup %w[
	alumnus	alveolus	bacillus	bronchus
	locus	nucleus	stimulus	meniscus
].collect {|word| word[0...-2]}
PL_sb_C_us_i =

Classical “..us” -> “..i”

matchgroup %w[
	focus	radius	genius
	incubus	succubus	nimbus
	fungus	nucleolus	stylus
	torus	umbilicus	uterus
	hippopotamus
].collect {|word| word[0...-2]}
PL_sb_C_us_us =

Classical “..us” -> “..us” (assimilated 4th declension latin nouns)

matchgroup %w[
	status apparatus prospectus sinus
	hiatus impetus plexus
]
PL_sb_U_on_a =

Unconditional “..on” -> “a”

matchgroup %w[
	criterion	perihelion	aphelion
	phenomenon	prolegomenon	noumenon
	organon	asyndeton	hyperbaton
].collect {|word| word[0...-2]}
PL_sb_C_on_a =

Classical “..on” -> “..a”

matchgroup %w[
	oxymoron
].collect {|word| word[0...-2]}
PL_sb_C_o_i_a =

Classical “..o” -> “..i” (but normally -> “..os”)

%w[
	solo		soprano	basso	alto
	contralto	tempo	piano
]
PL_sb_C_o_i =
matchgroup PL_sb_C_o_i_a.collect{|word| word[0...-1]}
PL_sb_U_o_os =

Always “..o” -> “..os”

matchgroup( %w[
	albino	archipelago	armadillo
	commando	crescendo	fiasco
	ditto	dynamo	embryo
	ghetto	guano	inferno
	jumbo	lumbago	magneto
	manifesto	medico	octavo
	photo	pro		quarto	
	canto	lingo	generalissimo
	stylo	rhino
] | PL_sb_C_o_i_a )
PL_sb_U_ex_ices =

Unconditional “..[ei]x” -> “..ices”

matchgroup %w[
	codex	murex	silex
].collect {|word| word[0...-2]}
PL_sb_U_ix_ices =
matchgroup %w[
	radix	helix
].collect {|word| word[0...-2]}
PL_sb_C_ex_ices =

Classical “..[ei]x” -> “..ices”

matchgroup %w[
	vortex	vertex	cortex	latex
	pontifex	apex		index	simplex
].collect {|word| word[0...-2]}
PL_sb_C_ix_ices =
matchgroup %w[
	appendix
].collect {|word| word[0...-2]}
PL_sb_C_i =

Arabic: “..” -> “..i”

matchgroup %w[
	afrit	afreet	efreet
]
PL_sb_C_im =

Hebrew: “..” -> “..im”

matchgroup %w[
	goy		seraph	cherub
]
PL_sb_U_man_mans =

Unconditional “..man” -> “..mans”

matchgroup %w[
	human
	Alabaman Bahaman Burman German
	Hiroshiman Liman Nakayaman Oklahoman
	Panaman Selman Sonaman Tacoman Yakiman
	Yokohaman Yuman
]
PL_sb_uninflected_s =
[
	# Pairs or groups subsumed to a singular...
	"breeches", "britches", "clippers", "gallows", "hijinks",
	"headquarters", "pliers", "scissors", "testes", "herpes",
	"pincers", "shears", "proceedings", "trousers",

	# Unassimilated Latin 4th declension
	"cantus", "coitus", "nexus",

	# Recent imports...
	"contretemps", "corps", "debris",
	".*ois",

	# Diseases
	".*measles", "mumps",

	# Miscellaneous others...
	"diabetes", "jackanapes", "series", "species", "rabies",
	"chassis", "innings", "news", "mews",
]
PL_sb_uninflected_herd =

Don’t inflect in classical mode, otherwise normal inflection

matchgroup %w[
	wildebeest swine eland bison buffalo
	elk moose rhinoceros
]
PL_sb_uninflected =
matchgroup [

	# Some fish and herd animals
	".*fish", "tuna", "salmon", "mackerel", "trout",
	"bream", "sea[- ]bass", "carp", "cod", "flounder", "whiting", 

	".*deer", ".*sheep", 

	# All nationals ending in -ese
	"Portuguese", "Amoyese", "Borghese", "Congoese", "Faroese",
	"Foochowese", "Genevese", "Genoese", "Gilbertese", "Hottentotese",
	"Kiplingese", "Kongoese", "Lucchese", "Maltese", "Nankingese",
	"Niasese", "Pekingese", "Piedmontese", "Pistoiese", "Sarawakese",
	"Shavese", "Vermontese", "Wenchowese", "Yengeese",
	".*[nrlm]ese",

	# Some words ending in ...s (often pairs taken as a whole)
	PL_sb_uninflected_s,

	# Diseases
	".*pox",

	# Other oddities
	"graffiti", "djinn"
]
PL_sb_singular_s =

Singular words ending in …s (all inflect with …es)

matchgroup %w[
	.*ss
	acropolis aegis alias arthritis asbestos atlas
	bathos bias bronchitis bursitis caddis cannabis
	canvas chaos cosmos dais digitalis encephalitis
	epidermis ethos eyas gas glottis hepatitis
	hubris ibis lens mantis marquis metropolis
	neuritis pathos pelvis polis rhinoceros
	sassafras tonsillitis trellis .*us
]
PL_v_special_s =
matchgroup [
	PL_sb_singular_s,
	PL_sb_uninflected_s,
	PL_sb_irregular_s.keys,
	'(.*[csx])is',
	'(.*)ceps',
	'[A-Z].*s',
]
PL_sb_postfix_adj =
'(' + {

	'general' => ['(?!major|lieutenant|brigadier|adjutant)\S+'],
	'martial' => ["court"],

}.collect {|key,val|
	matchgroup( matchgroup(val) + "(?=(?:-|\\s+)#{key})" )
}.join("|") + ")(.*)"
PL_sb_military =
%r'major|lieutenant|brigadier|adjutant|quartermaster'
PL_sb_general =
%r'((?!#{PL_sb_military.source}).*?)((-|\s+)general)'
PL_prep =
matchgroup %w[
	about above across after among around at athwart before behind
	below beneath beside besides between betwixt beyond but by
	during except for from in into near of off on onto out over
	since till to under until unto upon with
]
PL_sb_prep_dual_compound =
%r'(.*?)((?:-|\s+)(?:#{PL_prep}|d[eu])(?:-|\s+))a(?:-|\s+)(.*)'
PL_sb_prep_compound =
%r'(.*?)((-|\s+)(#{PL_prep}|d[eu])((-|\s+)(.*))?)'
PL_pron_nom_h =
{
	#	Nominative		Reflexive
	"i"		=> "we",	"myself"   =>	"ourselves",
	"you"	=> "you",	"yourself" =>	"yourselves",
	"she"	=> "they",	"herself"  =>	"themselves",
	"he"	=> "they",	"himself"  =>	"themselves",
	"it"	=> "they",	"itself"   =>	"themselves",
	"they"	=> "they",	"themself" =>	"themselves",

	#	Possessive
	"mine"	 => "ours",
	"yours"	 => "yours",
	"hers"	 => "theirs",
	"his"	 => "theirs",
	"its"	 => "theirs",
	"theirs" => "theirs",
}
PL_pron_nom =
matchgroup PL_pron_nom_h.keys
PL_pron_acc_h =
{
	#	Accusative		Reflexive
	"me"	=> "us",	"myself"   =>	"ourselves",
	"you"	=> "you",	"yourself" =>	"yourselves",
	"her"	=> "them",	"herself"  =>	"themselves",
	"him"	=> "them",	"himself"  =>	"themselves",
	"it"	=> "them",	"itself"   =>	"themselves",
	"them"	=> "them",	"themself" =>	"themselves",
}
PL_pron_acc =
matchgroup PL_pron_acc_h.keys
PL_v_irregular_pres_h =
{
	#	1St pers. sing.		2nd pers. sing.		3rd pers. singular
	#				3rd pers. (indet.)	
	"am"	=> "are",	"are"	=> "are",	"is"	 => "are",
	"was"	=> "were",	"were"	=> "were",	"was"	 => "were",
	"have"  => "have",	"have"  => "have",	"has"	 => "have",
}
PL_v_irregular_pres =
matchgroup PL_v_irregular_pres_h.keys
PL_v_ambiguous_pres_h =
{
	#	1st pers. sing.		2nd pers. sing.		3rd pers. singular
	#				3rd pers. (indet.)	
	"act"	=> "act",	"act"	=> "act",	"acts"	  => "act",
	"blame"	=> "blame",	"blame"	=> "blame",	"blames"  => "blame",
	"can"	=> "can",	"can"	=> "can",	"can"	  => "can",
	"must"	=> "must",	"must"	=> "must",	"must"	  => "must",
	"fly"	=> "fly",	"fly"	=> "fly",	"flies"	  => "fly",
	"copy"	=> "copy",	"copy"	=> "copy",	"copies"  => "copy",
	"drink"	=> "drink",	"drink"	=> "drink",	"drinks"  => "drink",
	"fight"	=> "fight",	"fight"	=> "fight",	"fights"  => "fight",
	"fire"	=> "fire",	"fire"	=> "fire",	"fires"   => "fire",
	"like"	=> "like",	"like"	=> "like",	"likes"   => "like",
	"look"	=> "look",	"look"	=> "look",	"looks"   => "look",
	"make"	=> "make",	"make"	=> "make",	"makes"   => "make",
	"reach"	=> "reach",	"reach"	=> "reach",	"reaches" => "reach",
	"run"	=> "run",	"run"	=> "run",	"runs"    => "run",
	"sink"	=> "sink",	"sink"	=> "sink",	"sinks"   => "sink",
	"sleep"	=> "sleep",	"sleep"	=> "sleep",	"sleeps"  => "sleep",
	"view"	=> "view",	"view"	=> "view",	"views"   => "view",
}
PL_v_ambiguous_pres =
matchgroup PL_v_ambiguous_pres_h.keys
PL_v_irregular_non_pres =
matchgroup %w[
	did had ate made put 
	spent fought sank gave sought
	shall could ought should
]
PL_v_ambiguous_non_pres =
matchgroup %w[
	thought saw bent will might cut
]
PL_count_zero =
matchgroup %w[
	0 no zero nil
]
PL_count_one =
matchgroup %w[
	1 a an one each every this that
]
PL_adj_special_h =
{
	"a"    => "some",	"an"   =>  "some",
	"this" => "these",	"that" => "those",
}
PL_adj_special =
matchgroup PL_adj_special_h.keys
PL_adj_poss_h =
{
	"my"    => "our",
	"your"	=> "your",
	"its"	=> "their",
	"her"	=> "their",
	"his"	=> "their",
	"their"	=> "their",
}
PL_adj_poss =
matchgroup PL_adj_poss_h.keys
Nth =

Numerical inflections

{
	0 => 'th',
	1 => 'st',
	2 => 'nd',
	3 => 'rd',
	4 => 'th',
	5 => 'th',
	6 => 'th',
	7 => 'th',
	8 => 'th',
	9 => 'th',
	11 => 'th',
	12 => 'th',
	13 => 'th',
}
Ordinals =

Ordinal word parts

{
	'ty' => 'tieth',
	'one' => 'first',
	'two' => 'second',
	'three' => 'third',
	'five' => 'fifth',
	'eight' => 'eighth',
	'nine' => 'ninth',
	'twelve' => 'twelfth',
}
OrdinalSuffixes =
Ordinals.keys.join("|") + "|"
Units =

Numeral names

[''] + %w[one two three four five six seven eight nine]
Teens =
%w[ten eleven twelve thirteen fourteen
fifteen sixteen seventeen eighteen nineteen]
Tens =
['',''] + %w[twenty thirty forty fifty sixty seventy eighty ninety]
Thousands =
[' ', ' thousand'] + %w[
	m b tr quadr quint sext sept oct non dec undec duodec tredec
	quattuordec quindec sexdec septemdec octodec novemdec vigint
].collect {|prefix| ' ' + prefix + 'illion'}
NumberToWordsFunctions =

A collection of functions for transforming digits into word phrases. Indexed by the number of digits being transformed; e.g., NumberToWordsFunctions[2] is the function for transforming double-digit numbers.

[
	proc {|*args| raise "No digits (#{args.inspect})"},

	# Single-digits
	proc {|zero,x|
		(x.nonzero? ? to_units(x) : "#{zero} ")
	},

	# Double-digits
	proc {|zero,x,y|
		if x.nonzero?
			to_tens( x, y )
		elsif y.nonzero?
			"#{zero} " + NumberToWordsFunctions[1].call( zero, y )
		else
			([zero] * 2).join(" ")
		end
	},

	# Triple-digits
	proc {|zero,x,y,z|
		NumberToWordsFunctions[1].call(zero,x) + 
		NumberToWordsFunctions[2].call(zero,y,z)
	}
]
A_abbrev =

This pattern matches strings of capitals starting with a “vowel-sound” consonant followed by another consonant, and which are not likely to be real words (oh, all right then, it’s just magic!)

%{
	(?! FJO | [HLMNS]Y.  | RY[EO] | SQU
	  | ( F[LR]? | [HL] | MN? | N | RH? | S[CHKLMNPTVW]? | X(YL)?) [AEIOU])
	[FHLMNRSX][A-Z]
}
A_y_cons =

This pattern codes the beginnings of all english words begining with a ‘y’ followed by a consonant. Any other y-consonant prefix therefore implies an abbreviation.

'y(b[lor]|cl[ea]|fere|gg|p[ios]|rou|tt)'
A_explicit_an =

Exceptions to exceptions

matchgroup(	"euler", "hour(?!i)", "heir", "honest", "hono" )
NumwordDefaults =

Default configuration arguments for the #numwords function

{
	:group		=> 0,
	:comma		=> ', ',
	:and		=> ' and ',
	:zero		=> 'zero',
	:decimal	=> 'point',
	:asArray	=> false,
}
SeveralRange =

Default ranges for #quantify

2..5
NumberRange =
6..19
NumerousRange =
20..45
ManyRange =
46..99
QuantifyDefaults =

Default configuration arguments for the #quantify function

{
	:joinword	=> " of ",
}
ConjunctionDefaults =

Default configuration arguments for the #conjunction (junction, what’s your) function.

{
	:separator		=> ', ',
	:altsep			=> '; ',
	:penultimate	=> true,
	:conjunctive	=> 'and',
	:combine		=> true,
	:casefold		=> true,
	:generalize		=> false,
	:quantsort		=> true,
}
Articles =

Build the list of exceptions to title-capitalization

%w[a and the]
ShortPrepositions =
["amid", "at", "but", "by", "down", "from", "in",
"into", "like", "near", "of", "off", "on", "onto", "out", "over",
"past", "save", "with", "till", "to", "unto", "up", "upon", "with"]
CoordConjunctions =
%w[and but as]
TitleCaseExceptions =
Articles | ShortPrepositions | CoordConjunctions
IrregularInfinitives =

Irregular words => infinitive forms

{
	'abided'			=> 'abide',
	'abode'				=> 'abide',
	'am'				=> 'be',
	'are'				=> 'be',
	'arisen'			=> 'arise',
	'arose'				=> 'arise',
	'ate'				=> 'eat',
	'awaked'			=> 'awake',
	'awoke'				=> 'awake',
	'bade'				=> 'bid',
	'beaten'			=> 'beat',
	'became'			=> 'become',
	'been'				=> 'be',
	'befallen'			=> 'befall',
	'befell'			=> 'befall',
	'began'				=> 'begin',
	'begat'				=> 'beget',
	'begot'				=> 'beget',
	'begotten'			=> 'beget',
	'begun'				=> 'begin',
	'beheld'			=> 'behold',
	'bent'				=> 'bend',
	'bereaved'			=> 'bereave',
	'bereft'			=> 'bereave',
	'beseeched'			=> 'beseech',
	'besought'			=> 'beseech',
	'bespoke'			=> 'bespeak',
	'bespoken'			=> 'bespeak',
	'bestrewed'			=> 'bestrew',
	'bestrewn'			=> 'bestrew',
	'bestrid'			=> 'bestride',
	'bestridden'		=> 'bestride',
	'bestrode'			=> 'bestride',
	'betaken'			=> 'betake',
	'bethought'			=> 'bethink',
	'betook'			=> 'betake',
	'betted'			=> 'bet',
	'bidden'			=> 'bid',
	'bided'				=> 'bide',
	'bit'				=> 'bite',
	'bitten'			=> 'bite',
	'bled'				=> 'bleed',
	'blended'			=> 'blend',
	'blent'				=> 'blend',
	'blessed'			=> 'bless',
	'blest'				=> 'bless',
	'blew'				=> 'blow',
	'blown'				=> 'blow',
	'bode'				=> 'bide',
	'bore'				=> 'bear',
	'born'				=> 'bear',
	'borne'				=> 'bear',
	'bought'			=> 'buy',
	'bound'				=> 'bind',
	'bred'				=> 'breed',
	'broadcasted'		=> 'broadcast',
	'broke'				=> 'break',
	'broken'			=> 'break',
	'brought'			=> 'bring',
	'browbeaten'		=> 'browbeat',
	'built'				=> 'build',
	'burned'			=> 'burn',
	'burnt'				=> 'burn',
	'came'				=> 'come',
	'caught'			=> 'catch',
	'chid'				=> 'chide',
	'chidden'			=> 'chide',
	'chided'			=> 'chide',
	'chose'				=> 'choose',
	'chosen'			=> 'choose',
	'clad'				=> 'clothe',
	'clave'				=> 'cleave',
	'cleaved'			=> 'cleave',
	'cleft'				=> 'cleave',
	'clothed'			=> 'clothe',
	'clove'				=> 'cleave',
	'cloven'			=> 'cleave',
	'clung'				=> 'cling',
	'costed'			=> 'cost',
	'could'				=> 'can',
	'crept'				=> 'creep',
	'crew'				=> 'crow',
	'crowed'			=> 'crow',
	'dealt'				=> 'deal',
	'did'				=> 'do',
	'done'				=> 'do',
	'dove'				=> 'dive',
	'drank'				=> 'drink',
	'drawn'				=> 'draw',
	'dreamed'			=> 'dream',
	'dreamt'			=> 'dream',
	'drew'				=> 'draw',
	'driven'			=> 'drive',
	'drove'				=> 'drive',
	'drunk'				=> 'drink',
	'dug'				=> 'dig',
	'dwelled'			=> 'dwell',
	'dwelt'				=> 'dwell',
	'eaten'				=> 'eat',
	'fallen'			=> 'fall',
	'fed'				=> 'feed',
	'fell'				=> 'fall',
	'felt'				=> 'feel',
	'fled'				=> 'flee',
	'flew'				=> 'fly',
	'flown'				=> 'fly',
	'flung'				=> 'fling',
	'forbad'			=> 'forbid',
	'forbade'			=> 'forbid',
	'forbidden'			=> 'forbid',
	'forbore'			=> 'forbear',
	'forborne'			=> 'forbear',
	'fordid'			=> 'fordo',
	'fordone'			=> 'fordo',
	'forecasted'		=> 'forecast',
	'foregone'			=> 'forego',
	'foreknew'			=> 'foreknow',
	'foreknown'			=> 'foreknow',
	'foreran'			=> 'forerun',
	'foresaw'			=> 'foresee',
	'foreshowed'		=> 'foreshow',
	'foreshown'			=> 'foreshow',
	'foretold'			=> 'foretell',
	'forewent'			=> 'forego',
	'forgave'			=> 'forgive',
	'forgiven'			=> 'forgive',
	'forgot'			=> 'forget',
	'forgotten'			=> 'forget',
	'forsaken'			=> 'forsake',
	'forseen'			=> 'foresee',
	'forsook'			=> 'forsake',
	'forswore'			=> 'forswear',
	'forsworn'			=> 'forswear',
	'fought'			=> 'fight',
	'found'				=> 'find',
	'froze'				=> 'freeze',
	'frozen'			=> 'freeze',
	'gainsaid'			=> 'gainsay',
	'gave'				=> 'give',
	'gilded'			=> 'gild',
	'gilt'				=> 'gild',
	'girded'			=> 'gird',
	'girt'				=> 'gird',
	'given'				=> 'give',
	'gone'				=> 'go',
	'got'				=> 'get',
	'gotten'			=> 'get',
	'graved'			=> 'grave',
	'graven'			=> 'grave',
	'grew'				=> 'grow',
	'ground'			=> 'grind',
	'grown'				=> 'grow',
	'had'				=> 'have',
	'hamstringed'		=> 'hamstring',
	'hamstrung'			=> 'hamstring',
	'hanged'			=> 'hang',
	'heard'				=> 'hear',
	'heaved'			=> 'heave',
	'held'				=> 'hold',
	'hewed'				=> 'hew',
	'hewn'				=> 'hew',
	'hid'				=> 'hide',
	'hidden'			=> 'hide',
	'hove'				=> 'heave',
	'hung'				=> 'hang',
	'inlaid'			=> 'inlay',
	'is'				=> 'be',
	'kept'				=> 'keep',
	'kneeled'			=> 'kneel',
	'knelt'				=> 'kneel',
	'knew'				=> 'know',
	'knitted'			=> 'knit',
	'known'				=> 'know',
	'laded'				=> 'lade',
	'laden'				=> 'lade',
	'laid'				=> 'lay',
	'lain'				=> 'lie',
	'lay'				=> 'lie',
	'leaned'			=> 'lean',
	'leant'				=> 'lean',
	'leaped'			=> 'leap',
	'leapt'				=> 'leap',
	'learned'			=> 'learn',
	'learnt'			=> 'learn',
	'led'				=> 'lead',
	'left'				=> 'leave',
	'lent'				=> 'lend',
	'lighted'			=> 'light',
	'lit'				=> 'light',
	'lost'				=> 'lose',
	'made'				=> 'make',
	'meant'				=> 'mean',
	'melted'			=> 'melt',
	'met'				=> 'meet',
	'might'				=> 'may',
	'misdealt'			=> 'misdeal',
	'misgave'			=> 'misgive',
	'misgiven'			=> 'misgive',
	'mislaid'			=> 'mislay',
	'misled'			=> 'mislead',
	'mistaken'			=> 'mistake',
	'mistook'			=> 'mistake',
	'misunderstood'		=> 'misunderstand',
	'molten'			=> 'melt',
	'mowed'				=> 'mow',
	'mown'				=> 'mow',
	'outate'			=> 'outeat',
	'outbade'			=> 'outbid',
	'outbidden'			=> 'outbid',
	'outbred'			=> 'outbreed',
	'outdid'			=> 'outdo',
	'outdone'			=> 'outdo',
	'outeaten'			=> 'outeat',
	'outfought'			=> 'outfight',
	'outgone'			=> 'outgo',
	'outgrew'			=> 'outgrow',
	'outgrown'			=> 'outgrow',
	'outlaid'			=> 'outlay',
	'outran'			=> 'outrun',
	'outridden'			=> 'outride',
	'outrode'			=> 'outride',
	'outsat'			=> 'outsit',
	'outshone'			=> 'outshine',
	'outshot'			=> 'outshoot',
	'outsold'			=> 'outsell',
	'outspent'			=> 'outspend',
	'outthrew'			=> 'outthrow',
	'outthrown'			=> 'outthrow',
	'outwent'			=> 'outgo',
	'outwore'			=> 'outwear',
	'outworn'			=> 'outwear',
	'overate'			=> 'overeat',
	'overbade'			=> 'overbid',
	'overbidden'		=> 'overbid',
	'overblew'			=> 'overblow',
	'overblown'			=> 'overblow',
	'overbore'			=> 'overbear',
	'overborn'			=> 'overbear',
	'overborne'			=> 'overbear',
	'overbought'		=> 'overbuy',
	'overbuilt'			=> 'overbuild',
	'overcame'			=> 'overcome',
	'overdid'			=> 'overdo',
	'overdone'			=> 'overdo',
	'overdrawn'			=> 'overdraw',
	'overdrew'			=> 'overdraw',
	'overdriven'		=> 'overdrive',
	'overdrove'			=> 'overdrive',
	'overeaten'			=> 'overeat',
	'overfed'			=> 'overfeed',
	'overflew'			=> 'overfly',
	'overflown'			=> 'overfly',
	'overgrew'			=> 'overgrow',
	'overgrown'			=> 'overgrow',
	'overhanged'		=> 'overhang',
	'overheard'			=> 'overhear',
	'overhung'			=> 'overhang',
	'overladed'			=> 'overlade',
	'overladen'			=> 'overlade',
	'overlaid'			=> 'overlay',
	'overlain'			=> 'overlie',
	'overlay'			=> 'overlie',
	'overleaped'		=> 'overleap',
	'overleapt'			=> 'overleap',
	'overpaid'			=> 'overpay',
	'overran'			=> 'overrun',
	'overridden'		=> 'override',
	'overrode'			=> 'override',
	'oversaw'			=> 'oversee',
	'overseen'			=> 'oversee',
	'oversewed'			=> 'oversew',
	'oversewn'			=> 'oversew',
	'overshot'			=> 'overshoot',
	'overslept'			=> 'oversleep',
	'overspent'			=> 'overspend',
	'overtaken'			=> 'overtake',
	'overthrew'			=> 'overthrow',
	'overthrown'		=> 'overthrow',
	'overtook'			=> 'overtake',
	'overwinded'		=> 'overwind',
	'overwound'			=> 'overwind',
	'overwritten'		=> 'overwrite',
	'overwrote'			=> 'overwrite',
	'paid'				=> 'pay',
	'partaken'			=> 'partake',
	'partook'			=> 'partake',
	'prechose'			=> 'prechoose',
	'prechosen'			=> 'prechoose',
	'proved'			=> 'prove',
	'proven'			=> 'prove',
	'quitted'			=> 'quit',
	'ran'				=> 'run',
	'rang'				=> 'ring',
	'reaved'			=> 'reave',
	'rebuilt'			=> 'rebuild',
	'reeved'			=> 'reeve',
	'reft'				=> 'reave',
	'relaid'			=> 'relay',
	'rent'				=> 'rend',
	'repaid'			=> 'repay',
	'retold'			=> 'retell',
	'ridded'			=> 'rid',
	'ridden'			=> 'ride',
	'risen'				=> 'rise',
	'rived'				=> 'rive',
	'riven'				=> 'rive',
	'rode'				=> 'ride',
	'rose'				=> 'rise',
	'rove'				=> 'reeve',
	'rung'				=> 'ring',
	'said'				=> 'say',
	'sang'				=> 'sing',
	'sank'				=> 'sink',
	'sat'				=> 'sit',
	'saw'				=> 'see',
	'sawed'				=> 'saw',
	'sawn'				=> 'saw',
	'seen'				=> 'see',
	'sent'				=> 'send',
	'sewed'				=> 'sew',
	'sewn'				=> 'sew',
	'shaken'			=> 'shake',
	'shaved'			=> 'shave',
	'shaven'			=> 'shave',
	'sheared'			=> 'shear',
	'shined'			=> 'shine',
	'shod'				=> 'shoe',
	'shoed'				=> 'shoe',
	'shone'				=> 'shine',
	'shook'				=> 'shake',
	'shorn'				=> 'shear',
	'shot'				=> 'shoot',
	'showed'			=> 'show',
	'shown'				=> 'show',
	'shrank'			=> 'shrink',
	'shredded'			=> 'shred',
	'shrived'			=> 'shrive',
	'shriven'			=> 'shrive',
	'shrove'			=> 'shrive',
	'shrunk'			=> 'shrink',
	'shrunken'			=> 'shrink',
	'slain'				=> 'slay',
	'slept'				=> 'sleep',
	'slew'				=> 'slay',
	'slid'				=> 'slide',
	'slidden'			=> 'slide',
	'slung'				=> 'sling',
	'slunk'				=> 'slink',
	'smelled'			=> 'smell',
	'smelt'				=> 'smell',
	'smitten'			=> 'smite',
	'smote'				=> 'smite',
	'snuck'				=> 'sneak',
	'sold'				=> 'sell',
	'sought'			=> 'seek',
	'sowed'				=> 'sow',
	'sown'				=> 'sow',
	'span'				=> 'spin',
	'spat'				=> 'spit',
	'sped'				=> 'speed',
	'speeded'			=> 'speed',
	'spelled'			=> 'spell',
	'spelt'				=> 'spell',
	'spent'				=> 'spend',
	'spilled'			=> 'spill',
	'spilt'				=> 'spill',
	'spoiled'			=> 'spoil',
	'spoilt'			=> 'spoil',
	'spoke'				=> 'speak',
	'spoken'			=> 'speak',
	'sprang'			=> 'spring',
	'sprung'			=> 'spring',
	'spun'				=> 'spin',
	'stank'				=> 'stink',
	'staved'			=> 'stave',
	'stole'				=> 'steal',
	'stolen'			=> 'steal',
	'stood'				=> 'stand',
	'stove'				=> 'stave',
	'strewed'			=> 'strew',
	'strewn'			=> 'strew',
	'stricken'			=> 'strike',
	'strid'				=> 'stride',
	'stridden'			=> 'stride',
	'strived'			=> 'strive',
	'striven'			=> 'strive',
	'strode'			=> 'stride',
	'strove'			=> 'strive',
	'struck'			=> 'strike',
	'strung'			=> 'string',
	'stuck'				=> 'stick',
	'stung'				=> 'sting',
	'stunk'				=> 'stink',
	'sung'				=> 'sing',
	'sunk'				=> 'sink',
	'sunken'			=> 'sink',
	'swam'				=> 'swim',
	'sweated'			=> 'sweat',
	'swelled'			=> 'swell',
	'swept'				=> 'sweep',
	'swollen'			=> 'swell',
	'swore'				=> 'swear',
	'sworn'				=> 'swear',
	'swum'				=> 'swim',
	'swung'				=> 'swing',
	'taken'				=> 'take',
	'taught'			=> 'teach',
	'thought'			=> 'think',
	'threw'				=> 'throw',
	'thrived'			=> 'thrive',
	'thriven'			=> 'thrive',
	'throve'			=> 'thrive',
	'thrown'			=> 'throw',
	'told'				=> 'tell',
	'took'				=> 'take',
	'tore'				=> 'tear',
	'torn'				=> 'tear',
	'trod'				=> 'tread',
	'trodden'			=> 'tread',
	'unbent'			=> 'unbend',
	'unbound'			=> 'unbind',
	'unbuilt'			=> 'unbuild',
	'underbought'		=> 'underbuy',
	'underfed'			=> 'underfeed',
	'undergone'			=> 'undergo',
	'underlaid'			=> 'underlay',
	'underlain'			=> 'underlie',
	'underlay'			=> 'underlie',
	'underpaid'			=> 'underpay',
	'underran'			=> 'underrun',
	'undershot'			=> 'undershoot',
	'undersold'			=> 'undersell',
	'understood'		=> 'understand',
	'undertaken'		=> 'undertake',
	'undertook'			=> 'undertake',
	'underwent'			=> 'undergo',
	'underwritten'		=> 'underwrite',
	'underwrote'		=> 'underwrite',
	'undid'				=> 'undo',
	'undone'			=> 'undo',
	'undrawn'			=> 'undraw',
	'undrew'			=> 'undraw',
	'unfroze'			=> 'unfreeze',
	'unfrozen'			=> 'unfreeze',
	'ungirded'			=> 'ungird',
	'ungirt'			=> 'ungird',
	'unhanged'			=> 'unhang',
	'unhung'			=> 'unhang',
	'unknitted'			=> 'unknit',
	'unladed'			=> 'unlade',
	'unladen'			=> 'unlade',
	'unlaid'			=> 'unlay',
	'unlearned'			=> 'unlearn',
	'unlearnt'			=> 'unlearn',
	'unmade'			=> 'unmake',
	'unreeved'			=> 'unreeve',
	'unrove'			=> 'unreeve',
	'unsaid'			=> 'unsay',
	'unslung'			=> 'unsling',
	'unspoke'			=> 'unspeak',
	'unspoken'			=> 'unspeak',
	'unstrung'			=> 'unstring',
	'unstuck'			=> 'unstick',
	'unswore'			=> 'unswear',
	'unsworn'			=> 'unswear',
	'untaught'			=> 'unteach',
	'unthought'			=> 'unthink',
	'untrod'			=> 'untread',
	'untrodden'			=> 'untread',
	'unwinded'			=> 'unwind',
	'unwound'			=> 'unwind',
	'unwove'			=> 'unweave',
	'unwoven'			=> 'unweave',
	'upbuilt'			=> 'upbuild',
	'upheld'			=> 'uphold',
	'uprisen'			=> 'uprise',
	'uprose'			=> 'uprise',
	'upswept'			=> 'upsweep',
	'upswung'			=> 'upswing',
	'waked'				=> 'wake',
	'was'				=> 'be',
	'waylaid'			=> 'waylay',
	'wedded'			=> 'wed',
	'went'				=> 'go',
	'wept'				=> 'weep',
	'were'				=> 'be',
	'wetted'			=> 'wet',
	'winded'			=> 'wind',
	'wist'				=> 'wit',
	'wot'				=> 'wit',
	'withdrawn'			=> 'withdraw',
	'withdrew'			=> 'withdraw',
	'withheld'			=> 'withhold',
	'withstood'			=> 'withstand',
	'woke'				=> 'wake',
	'woken'				=> 'wake',
	'won'				=> 'win',
	'wore'				=> 'wear',
	'worked'			=> 'work',
	'worn'				=> 'wear',
	'wound'				=> 'wind',
	'wove'				=> 'weave',
	'woven'				=> 'weave',
	'written'			=> 'write',
	'wrote'				=> 'write',
	'wrought'			=> 'work',
	'wrung'				=> 'wring',
}
InfSuffixRules =

Mapping of word suffixes to infinitive rules.

{
	# '<suffix>' => {
	#	:order => <sort order>,
	#	:rule  => <rule number>,

	# :word1 == 0 => Use 0, the index of the longest prefix
	#	within @{$prefix{$self->{'suffix'} } }, below.

	# :word1 == 1 => Use 1, the index of the 2nd longest prefix
	#	within @{$prefix{$self->{'suffix'} } }, below.

	# :word1 == -1 => Use the index of the shortest prefix
	#	within @{$prefix{$self->{'suffix'} } }, below + a letter.

	# :word1 == -2 => Use the index of the shortest prefix
	#	within @{$prefix{$self->{'suffix'} } }, below + a letter,
	#	and use the shortest prefix as well.

	# :word1 == -3 => Use the index of the shortest prefix
	#	within @{$prefix{$self->{'suffix'} } }, below + meter,
	#	and use the shortest prefix + metre as well.

	# :word1 == -4 => Use the original string.
	'hes' => {
		:order		=> 1011,
		:rule		=> '1',
		:word1		=> 0,	# Longest prefix.
		:suffix1	=> '',
		:suffix2	=> '',
	},
	'ses' => {
		:order		=> 1021,
		:rule		=> '2',
		:word1		=> 0,	# Longest prefix.
		:suffix1	=> '',
		:suffix2	=> '',
	},
	'xes' => {
		:order		=> 1031,
		:rule		=> '3',
		:word1		=> 0,	# Longest prefix.
		:suffix1	=> '',
		:suffix2	=> '',
	},
	'zes' => {
		:order		=> 1041,
		:rule		=> '4',
		:word1		=> 0,	# Longest prefix.
		:suffix1	=> '',
		:suffix2	=> '',
	},
	'iless' => {
		:order		=> 1051,
		:rule		=> '43a',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'y',
		:suffix2	=> '',
	},
	'less' => {
		:order		=> 1052,
		:rule		=> '43b',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> '',
		:suffix2	=> '',
	},
	'iness' => {
		:order		=> 1053,
		:rule		=> '44a',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'y',
		:suffix2	=> '',
	},
	'ness' => {
		:order		=> 1054,
		:rule		=> '44b',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> '',
		:suffix2	=> '',
	},
	"'s" => {
		:order		=> 1055,
		:rule		=> '7',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> '',
		:suffix2	=> '',
	},
	'ies' => {
		:order		=> 1056,
		:rule		=> '13a',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'y',
		:suffix2	=> '',
	},
	'es' => {
		:order		=> 1057,
		:rule		=> '13b',
		:word1		=> 0,	# Longest prefix.
		:suffix1	=> '',
		:suffix2	=> '',
	},
	'ss' => {
		:order		=> 1061,
		:rule		=> '6a',
		:word1		=> -4,	# Original string.
		:suffix1	=> '',
		:suffix2	=> '',
	},
	's'	 => {
		:order		=> 1062,
		:rule		=> '6b',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> '',
		:suffix2	=> '',
	},
	'ater' => {
		:order		=> 1081,
		:rule		=> '8',
		:word1		=> -4,	# Original string.
		:suffix1	=> '',
		:suffix2	=> '',
	},
	'cter' => {
		:order		=> 1091,
		:rule		=> '9',
		:word1		=> -4,	# Original string.
		:suffix1	=> '',
		:suffix2	=> '',
	},
	'ier' => {
		:order		=> 1101,
		:rule		=> '10',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'y',
		:suffix2	=> '',
	},
	'er' => {
		:order		=> 1111,
		:rule		=> '11',
		:word1		=> 0,	# Longest prefix.
		:suffix1	=> '',
		:suffix2	=> '',
	},
	'ied' => {
		:order		=> 1121,
		:rule		=> '12a',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'y',
		:suffix2	=> '',
	},
	'ed' => {
		:order		=> 1122,
		:rule		=> '12b',	# There is extra code for 12b below.
		:word1		=> 0,	# Longest prefix.
		:suffix1	=> '',
		:suffix2	=> '',
	},
	'iest' => {
		:order		=> 1141,
		:rule		=> '14a',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'y',
		:suffix2	=> '',
	},
	'est' => {
		:order		=> 1142,
		:rule		=> '14b',
		:word1		=> -2,	# Shortest prefix + a letter, and shortest prefix.
		:suffix1	=> 'e',
		:suffix2	=> '',
	},
	'blity' => {
		:order		=> 1143,
		:rule		=> '21',
		:word1		=> -4,	# Original string.
		:suffix1	=> '',
		:suffix2	=> '',
	},
	'bility' => {
		:order		=> 1144,
		:rule		=> '22',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'ble',
		:suffix2	=> '',
	},
	'fiable' => {
		:order		=> 1145,
		:rule		=> '23',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'fy',
		:suffix2	=> '',
	},
	'logist' => {
		:order		=> 1146,
		:rule		=> '24',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'logy',
		:suffix2	=> '',
	},
	'ing' => {
		:order		=> 1151,
		:rule		=> '15',	# There is extra code for 15 below.
		:word1		=> -2,	# Shortest prefix + a letter, and shortest prefix.
		:suffix1	=> 'e',
		:suffix2	=> '',
	},
	'ist' => {
		:order		=> 1161,
		:rule		=> '16',
		:word1		=> -2,	# Shortest prefix + a letter, and shortest prefix.
		:suffix1	=> 'e',
		:suffix2	=> '',
	},
	'ism' => {
		:order		=> 1171,
		:rule		=> '17',
		:word1		=> -2,	# Shortest prefix + a letter, and shortest prefix.
		:suffix1	=> 'e',
		:suffix2	=> '',
	},
	'ity' => {
		:order		=> 1181,
		:rule		=> '18',
		:word1		=> -2,	# Shortest prefix + a letter, and shortest prefix.
		:suffix1	=> 'e',
		:suffix2	=> '',
	},
	'ize' => {
		:order		=> 1191,
		:rule		=> '19',
		:word1		=> -2,	# Shortest prefix + a letter, and shortest prefix.
		:suffix1	=> 'e',
		:suffix2	=> '',
	},
	'cable' => {
		:order		=> 1201,
		:rule		=> '20a',
		:word1		=> -4,	# Original string.
		:suffix1	=> '',
		:suffix2	=> '',
	},
	'gable' => {
		:order		=> 1202,
		:rule		=> '20b',
		:word1		=> -4,	# Original string.
		:suffix1	=> '',
		:suffix2	=> '',
	},
	'able' => {
		:order		=> 1203,
		:rule		=> '20c',
		:word1		=> -2,	# Shortest prefix + a letter, and shortest prefix.
		:suffix1	=> 'e',
		:suffix2	=> '',
	},
	'graphic' => {
		:order		=> 1251,
		:rule		=> '25',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'graphy',
		:suffix2	=> '',
	},
	'istic' => {
		:order		=> 1261,
		:rule		=> '26',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'ist',
		:suffix2	=> '',
	},
	'itic' => {
		:order		=> 1271,
		:rule		=> '27',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'ite',
		:suffix2	=> '',
	},
	'like' => {
		:order		=> 1281,
		:rule		=> '28',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> '',
		:suffix2	=> '',
	},
	'logic' => {
		:order		=> 1291,
		:rule		=> '29',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'logy',
		:suffix2	=> '',
	},
	'ment' => {
		:order		=> 1301,
		:rule		=> '30',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> '',
		:suffix2	=> '',
	},
	'mental' => {
		:order		=> 1311,
		:rule		=> '31',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'ment',
		:suffix2	=> '',
	},
	'metry' => {
		:order		=> 1321,
		:rule		=> '32',
		:word1		=> -3,	# Shortest prefix + meter, and shortest perfix + metre.
		:suffix1	=> 'meter',
		:suffix2	=> 'metre',
	},
	'nce' => {
		:order		=> 1331,
		:rule		=> '33',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'nt',
		:suffix2	=> '',
	},
	'ncy' => {
		:order		=> 1341,
		:rule		=> '34',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'nt',
		:suffix2	=> '',
	},
	'ship' => {
		:order		=> 1351,
		:rule		=> '35',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> '',
		:suffix2	=> '',
	},
	'ical' => {
		:order		=> 1361,
		:rule		=> '36',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'ic',
		:suffix2	=> '',
	},
	'ional' => {
		:order		=> 1371,
		:rule		=> '37',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'ion',
		:suffix2	=> '',
	},
	'bly' => {
		:order		=> 1381,
		:rule		=> '38',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'ble',
		:suffix2	=> '',
	},
	'ily' => {
		:order		=> 1391,
		:rule		=> '39',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'y',
		:suffix2	=> '',
	},
	'ly' => {
		:order		=> 1401,
		:rule		=> '40',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> '',
		:suffix2	=> '',
	},
	'iful' => {
		:order		=> 1411,
		:rule		=> '41a',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'y',
		:suffix2	=> '',
	},
	'ful' => {
		:order		=> 1412,
		:rule		=> '41b',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> '',
		:suffix2	=> '',
	},
	'ihood' => {
		:order		=> 1421,
		:rule		=> '42a',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'y',
		:suffix2	=> '',
	},
	'hood' => {
		:order		=> 1422,
		:rule		=> '42b',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> '',
		:suffix2	=> '',
	},
	'ification' => {
		:order		=> 1451,
		:rule		=> '45',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'ify',
		:suffix2	=> '',
	},
	'ization' => {
		:order		=> 1461,
		:rule		=> '46',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'ize',
		:suffix2	=> '',
	},
	'ction' => {
		:order		=> 1471,
		:rule		=> '47',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'ct',
		:suffix2	=> '',
	},
	'rtion' => {
		:order		=> 1481,
		:rule		=> '48',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'rt',
		:suffix2	=> '',
	},
	'ation' => {
		:order		=> 1491,
		:rule		=> '49',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'ate',
		:suffix2	=> '',
	},
	'ator' => {
		:order		=> 1501,
		:rule		=> '50',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'ate',
		:suffix2	=> '',
	},
	'ctor' => {
		:order		=> 1511,
		:rule		=> '51',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'ct',
		:suffix2	=> '',
	},
	'ive' => {
		:order		=> 1521,
		:rule		=> '52',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'ion',
		:suffix2	=> '',
	},
	'onian' => {
		:order		=> 1530,
		:rule		=> '54',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'on',
		:suffix2	=> '',
	},
	'an' => {
		:order		=> 1531,
		:rule		=> '53',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'a',
		:suffix2	=> '',
	},
}
InfSuffixRuleOrder =
InfSuffixRules.keys.sort_by {|rule| InfSuffixRules[rule][:order]}

Class Attribute Summary collapse

Class Method Summary collapse

Class Attribute Details

.lprintf_formattersObject

Returns the value of attribute lprintf_formatters.



125
126
127
# File 'lib/linguistics/en.rb', line 125

def lprintf_formatters
  @lprintf_formatters
end

Class Method Details

.a(phrase, count = nil) ⇒ Object Also known as: an

Return the given phrase with the appropriate indefinite article (“a” or “an”) prepended.



1190
1191
1192
1193
1194
1195
1196
1197
# File 'lib/linguistics/en.rb', line 1190

def a( phrase, count=nil )
	md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
	pre, word, post = md.to_a[1,3]
	return phrase if word.nil? or word.empty?

	result = indef_article( word, count )
	return pre + result + post
end

.camel_case_to_english(string) ⇒ Object

Turns a camel-case string (“camelCaseToEnglish”) to plain English (“camel case to english”). Each word is decapitalized.



1623
1624
1625
1626
1627
# File 'lib/linguistics/en.rb', line 1623

def camel_case_to_english( string )
	string.to_s.
		gsub( /([A-Z])([A-Z])/ ) { "#$1 #$2" }.
		gsub( /([a-z])([A-Z])/ ) { "#$1 #$2" }.downcase
end

.conjunction(obj, args = {}) ⇒ Object

Return the specified obj (which must support the #collect method) as a conjunction. Each item is converted to a String if it is not already (using #to_s) unless a block is given, in which case it is called once for each object in the array, and the stringified return value from the block is used instead. Returning nil causes that particular element to be omitted from the resulting conjunction. The following options can be used to control the makeup of the returned conjunction String:

:separator

Specify one or more characters to separate items in the resulting list. Defaults to ', '.

:altsep

An alternate separator to use if any of the resulting conjunction’s clauses contain the :separator character/s. Defaults to '; '.

:penultimate

Flag that indicates whether or not to join the last clause onto the rest of the conjunction using a penultimate :separator. E.g.,

%w{duck, cow, dog}.en.conjunction
# => "a duck, a cow, and a dog"
%w{duck cow dog}.en.conjunction( :penultimate => false )
"a duck, a cow and a dog"

Default to true.

:conjunctive

Sets the word used as the conjunctive (separating word) of the resulting string. Default to 'and'.

:combine

If set to true (the default), items which are indentical (after surrounding spaces are stripped) will be combined in the resulting conjunction. E.g.,

%w{goose cow goose dog}.en.conjunction
# => "two geese, a cow, and a dog"
%w{goose cow goose dog}.en.conjunction( :combine => false )
# => "a goose, a cow, a goose, and a dog"
:casefold

If set to true (the default), then items are compared case-insensitively when combining them. This has no effect if :combine is false.

:generalize

If set to true, then quantities of combined items are turned into general descriptions instead of exact amounts.

ary = %w{goose pig dog horse goose reindeer goose dog horse}
ary.en.conjunction
# => "three geese, two dogs, two horses, a pig, and a reindeer"
ary.en.conjunction( :generalize => true )
# => "several geese, several dogs, several horses, a pig, and a reindeer"

See the #quantify method for specifics on how quantities are generalized. Generalization defaults to false, and has no effect if :combine is false.

:quantsort

If set to true (the default), items which are combined in the resulting conjunction will be listed in order of amount, with greater quantities sorted first. If :quantsort is false, combined items will appear where the first instance of them occurred in the list. This sort is also the fallback for indentical quantities (ie., items of the same quantity will be listed in the order they appeared in the source list).



1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
# File 'lib/linguistics/en.rb', line 1512

def conjunction( obj, args={} )
	config = ConjunctionDefaults.merge( args )
	phrases = []

	# Transform items in the obj to phrases
	if block_given?
		phrases = obj.collect {|item| yield(item) }.compact
	else
		phrases = obj.collect {|item| item.to_s }
	end

	# No need for a conjunction if there's only one thing
	return a(phrases[0]) if phrases.length < 2

	# Set up a Proc to derive a collector key from a phrase depending on the
	# configuration
	keyfunc =
		if config[:casefold]
			proc {|key| key.downcase.strip}
		else
			proc {|key| key.strip}
		end
	
	# Count and delete phrases that hash the same when the keyfunc munges
	# them into the same thing if we're combining (:combine => true).
	collector = {}
	if config[:combine]
	
		phrases.each_index do |i|
			# Stop when reaching the end of a truncated list
			break if phrases[i].nil?

			# Make the key using the configured key function
			phrase = keyfunc[ phrases[i] ]

			# If the collector already has this key, increment its count,
			# eliminate the duplicate from the phrase list, and redo the loop.
			if collector.key?( phrase )
				collector[ phrase ] += 1
				phrases.delete_at( i )
				redo
			end

			collector[ phrase ] = 1
		end
	else
		# If we're not combining, just make everything have a count of 1.
		phrases.uniq.each {|key| collector[ keyfunc[key] ] = 1}
	end

	# If sort-by-quantity is turned on, sort the phrases first by how many
	# there are (most-first), and then by the order they were specified in.
	if config[:quantsort] && config[:combine]
		origorder = {}
		phrases.each_with_index {|phrase,i| origorder[ keyfunc[phrase] ] ||= i }
		phrases.sort! {|a,b|
			(collector[ keyfunc[b] ] <=> collector[ keyfunc[a] ]).nonzero? ||
			(origorder[ keyfunc[a] ] <=> origorder[ keyfunc[b] ])
		}
	end

	# Set up a filtering function that adds either an indefinite article, an
	# indefinite quantifier, or a definite quantifier to each phrase
	# depending on the configuration and the count of phrases in the
	# collector.
	filter =
		if config[:generalize]
			proc {|phrase, count| quantify(phrase, count) }
		else
			proc {|phrase, count|
			if count > 1
				"%s %s" % [
					# :TODO: Make this threshold settable
					count < 10 ? count.en.numwords : count.to_s,
					plural(phrase, count)
				]
			else
				a( phrase )
			end
		}
		end

	# Now use the configured filter to turn each phrase into its final
	# form. Hmmm... square-bracket Lisp?
	phrases.collect! {|phrase| filter[phrase, collector[ keyfunc[phrase] ]] }

	# Prepend the conjunctive to the last element unless it's empty or
	# there's only one element
	phrases[-1].insert( 0, config[:conjunctive] + " " ) unless
		config[:conjunctive].strip.empty? or
		phrases.length < 2

	# Concatenate the last two elements if there's no penultimate separator,
	# and pick a separator based on how many phrases there are and whether
	# or not there's already an instance of it in the phrases.
	phrase_count = phrases.length
	phrases[-2] << " " << phrases.pop unless config[:penultimate]
	sep = config[:separator]
	if phrase_count <= 2
		sep = ' '
	elsif phrases.find {|str| str.include?(config[:separator]) }
		sep = config[:altsep]
	end

	return phrases.join( sep )
end

.debug_msg(*msgs) ⇒ Object

Debugging output



685
686
687
# File 'lib/linguistics/en.rb', line 685

def debug_msg( *msgs ) # :nodoc:
	$stderr.puts msgs.join(" ") if $DEBUG
end

.def_lprintf_formatter(name, meth) ⇒ Object

Add the specified method (which can be either a Method object or a Symbol for looking up a method)



130
131
132
133
# File 'lib/linguistics/en.rb', line 130

def self::def_lprintf_formatter( name, meth )
	meth = self.method( meth ) unless meth.is_a?( Method )
	self.lprintf_formatters[ name ] = meth
end

.def_synset_function(meth) ⇒ Object

Make a function that calls the method meth on the synset of an input word.



126
127
128
129
130
131
132
133
134
135
136
137
138
# File 'lib/linguistics/en/wordnet.rb', line 126

def def_synset_function( meth )
	(class << self; self; end).instance_eval do
		define_method( meth ) {|*args|
			word, pos, sense = *args
			raise ArgumentError,
				"wrong number of arguments (0 for 1)" unless word
			sense ||= 1

			syn = synset( word.to_s, pos, sense )
			return syn.nil? ? nil : syn.send( meth )
		}
	end
end

.english_to_camel_case(string) ⇒ Object

Turns an English language string into a CamelCase word.



1631
1632
1633
# File 'lib/linguistics/en.rb', line 1631

def english_to_camel_case( string )
	string.to_s.gsub( /\s+([a-z])/ ) { $1.upcase }
end

Returns true if LinkParser was loaded okay

Returns:

  • (Boolean)


81
# File 'lib/linguistics/en/linkparser.rb', line 81

def has_link_parser? ; @has_link_parser ; end

.has_wordnet?Boolean

Returns true if WordNet was loaded okay

Returns:

  • (Boolean)


106
# File 'lib/linguistics/en/wordnet.rb', line 106

def has_wordnet? ; @has_wordnet; end

.indef_article(word, count) ⇒ Object

Returns the given word with a prepended indefinite article, unless count is non-nil and not singular.



962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
# File 'lib/linguistics/en.rb', line 962

def indef_article( word, count )
	count ||= Linguistics::num
	return "#{count} #{word}" if
		count && /^(#{PL_count_one})$/i !~ count.to_s

	# Handle user-defined variants
	# return value if value = ud_match( word, A_a_user_defined )

	case word

	# Handle special cases
	when /^(#{A_explicit_an})/i
		return "an #{word}"

	# Handle abbreviations
	when /^(#{A_abbrev})/x
		return "an #{word}"
	when /^[aefhilmnorsx][.-]/i
		return "an #{word}"
	when /^[a-z][.-]/i	
		return "a #{word}"

	# Handle consonants
	when /^[^aeiouy]/i
		return "a #{word}"

	# Handle special vowel-forms
	when /^e[uw]/i	
		return "a #{word}"
	when /^onc?e\b/i	
		return "a #{word}"
	when /^uni([^nmd]|mo)/i
		return "a #{word}"
	when /^u[bcfhjkqrst][aeiou]/i
		return "a #{word}"

	# Handle vowels
	when /^[aeiou]/i
		return "an #{word}"

	# Handle y... (before certain consonants implies (unnaturalized) "i.." sound)
	when /^(#{A_y_cons})/i
		return "an #{word}"

	# Otherwise, guess "a"
	else
		return "a #{word}"
	end
end

.infinitive(word) ⇒ Object

Return the infinitive form of the given word



1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
# File 'lib/linguistics/en/infinitive.rb', line 1052

def infinitive( word )
	word = word.to_s
	word1 = word2 = suffix = rule = newword = ''

	if IrregularInfinitives.key?( word )
		word1	= IrregularInfinitives[ word ]
		rule	= 'irregular'
	else
		# Build up $prefix{$suffix} as an array of prefixes, from longest to shortest.
		prefix, suffix = nil
		prefixes = Hash::new {|hsh,key| hsh[key] = []}

		# Build the hash of prefixes for the word
		1.upto( word.length ) {|i|
			prefix = word[0, i]
			suffix = word[i..-1]

			(suffix.length - 1).downto( 0 ) {|j|
				newword = prefix + suffix[0, j]
				prefixes[ suffix ].push( newword )
			}
		}

		$stderr.puts "prefixes: %p" % prefixes if $DEBUG

		# Now check for rules covering the prefixes for this word, picking
		# the first one if one was found.
		if (( suffix = ((InfSuffixRuleOrder & prefixes.keys).first) ))
			rule = InfSuffixRules[ suffix ][:rule]
			shortestPrefix = InfSuffixRules[ suffix ][:word1]
			$stderr.puts "Using rule %p (%p) for suffix %p" % 
				[ rule, shortestPrefix, suffix ] if $DEBUG

			case shortestPrefix
			when 0
				word1 = prefixes[ suffix ][ 0 ]
				word2 = prefixes[ suffix ][ 1 ]
				$stderr.puts "For sp = 0: word1: %p, word2: %p" %
					[ word1, word2 ] if $DEBUG

			when -1
				word1 = prefixes[ suffix ].last +
					InfSuffixRules[ suffix ][:suffix1]
				word2 = ''
				$stderr.puts "For sp = -1: word1: %p, word2: %p" %
					[ word1, word2 ] if $DEBUG

			when -2
				word1 = prefixes[ suffix ].last +
					InfSuffixRules[ suffix ][:suffix1]
				word2 = prefixes[ suffix ].last
				$stderr.puts "For sp = -2: word1: %p, word2: %p" %
					[ word1, word2 ] if $DEBUG

			when -3
				word1 = prefixes[ suffix ].last +
					InfSuffixRules[ suffix ][:suffix1]
				word2 = prefixes[ suffix ].last +
					InfSuffixRules[ suffix ][:suffix2]
				$stderr.puts "For sp = -3: word1: %p, word2: %p" %
					[ word1, word2 ] if $DEBUG

			when -4
				word1 = word
				word2 = ''
				$stderr.puts "For sp = -4: word1: %p, word2: %p" %
					[ word1, word2 ] if $DEBUG

			else
				raise IndexError,
					"Couldn't find rule for shortest prefix %p" %
					shortestPrefix
			end

			# Rules 12b and 15: Strip off 'ed' or 'ing'.
			if rule == '12b' or rule == '15'
				# Do we have a monosyllable of this form:
				# o 0+ Consonants
				# o 1+ Vowel
				# o	2 Non-wx
				# Eg: tipped => tipp?
				# Then return tip and tipp.
				# Eg: swimming => swimm?
				# Then return tipswim and swimm.

				if /^([^aeiou]*[aeiou]+)([^wx])\2$/ =~ word2
					word1 = $1 + $2
					word2 = $1 + $2 + $2
				end
			end
		end
	end

	return Infinitive::new( word1, word2, suffix, rule )
end

.language(unused = nil) ⇒ Object

Return the name of the language this module is for.



1121
1122
1123
# File 'lib/linguistics/en.rb', line 1121

def language( unused=nil )
	"English"
end

.lp_dictObject

The instance of LinkParser used for all Linguistics LinkParser functions.



89
90
91
92
93
94
95
96
97
# File 'lib/linguistics/en/linkparser.rb', line 89

def lp_dict
	if @lp_error
		raise NotImplementedError, 
			"LinkParser functions are not loaded: %s" %
			@lp_error.message
	end

	return @lp_dict ||= LinkParser::Dictionary.new( :verbosity => 0 )
end

.lp_errorObject

If #has_link_parser? returns false, this can be called to fetch the exception which was raised when trying to load LinkParser.



85
# File 'lib/linguistics/en/linkparser.rb', line 85

def lp_error ; @lp_error ; end

.lprintf(fmt, *args) ⇒ Object

Format the given fmt string by replacing %-escaped sequences with the result of performing a specified operation on the corresponding argument, ala Kernel.sprintf.

%PL

Plural.

%A, %AN

Prepend indefinite article.

%NO

Zero-quantified phrase.

%NUMWORDS

Convert a number into the corresponding words.

%CONJUNCT

Conjunction.



1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
# File 'lib/linguistics/en.rb', line 1706

def lprintf( fmt, *args )
	fmt.to_s.gsub( /%([A-Z_]+)/ ) do |match|
		op = $1.to_s.upcase.to_sym
		if self.lprintf_formatters.key?( op )
			arg = args.shift
			self.lprintf_formatters[ op ].call( arg )
		else
			raise "no such formatter %p" % op
		end
	end
end

.matchgroup(*parts) ⇒ Object

Wrap one or more parts in a non-capturing alteration Regexp



117
118
119
120
# File 'lib/linguistics/en.rb', line 117

def self::matchgroup( *parts )
	re = parts.flatten.join("|")
	"(?:#{re})"
end

.no(phrase, count = nil) ⇒ Object

Translate zero-quantified phrase to “no phrase.plural



1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
# File 'lib/linguistics/en.rb', line 1204

def no( phrase, count=nil )
	md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
	pre, word, post = md.to_a[1,3]
	count ||= Linguistics::num || 0

	unless /^#{PL_count_zero}$/ =~ count.to_s
		return "#{pre}#{count} " + plural( word, count ) + post
	else
		return "#{pre}no " + plural( word, 0 ) + post
	end
end

.normalize_count(count, default = 2) ⇒ Object

Normalize a count to either 1 or 2 (singular or plural)



691
692
693
694
695
696
697
698
699
700
# File 'lib/linguistics/en.rb', line 691

def normalize_count( count, default=2 )
	return default if count.nil? # Default to plural
	if /^(#{PL_count_one})$/i =~ count.to_s ||
			Linguistics::classical? &&
			/^(#{PL_count_zero})$/ =~ count.to_s
		return 1
	else
		return default
	end
end

.number_to_words(num, config) ⇒ Object

Return the specified number num as an array of number phrases.



1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
# File 'lib/linguistics/en.rb', line 1067

def number_to_words( num, config )
	return [config[:zero]] if num.to_i.zero?
	chunks = []

	# Break into word-groups if groups is set
	if config[:group].nonzero?

		# Build a Regexp with <config[:group]> number of digits. Any past
		# the first are optional.
		re = Regexp::new( "(\\d)" + ("(\\d)?" * (config[:group] - 1)) )

		# Scan the string, and call the word-chunk function that deals with
		# chunks of the found number of digits.
		num.to_s.scan( re ) {|digits|
			debug_msg "   digits = #{digits.inspect}"
			fn = NumberToWordsFunctions[ digits.nitems ]
			numerals = digits.flatten.compact.collect {|i| i.to_i}
			debug_msg "   numerals = #{numerals.inspect}"
			chunks.push fn.call( config[:zero], *numerals ).strip
		}
	else
		phrase = num.to_s
		phrase.sub!( /\A\s*0+/, '' )
		mill = 0

		# Match backward from the end of the digits in the string, turning
		# chunks of three, of two, and of one into words.
		mill += 1 while
			phrase.sub!( /(\d)(\d)(\d)(?=\D*\Z)/ ) {
				words = to_hundreds( $1.to_i, $2.to_i, $3.to_i, mill, 
									 config[:and] )
				chunks.unshift words.strip.squeeze(' ') unless words.nil?
				''
			}				

		phrase.sub!( /(\d)(\d)(?=\D*\Z)/ ) {
			chunks.unshift to_tens( $1.to_i, $2.to_i, mill ).strip.squeeze(' ')
			''
		}
		phrase.sub!( /(\d)(?=\D*\Z)/ ) {
			chunks.unshift to_units( $1.to_i, mill ).strip.squeeze(' ')
			''
		}
	end

	return chunks
end

.numwords(number, hashargs = {}) ⇒ Object

Return the specified number as english words. One or more configuration values may be passed to control the returned String:

:group

Controls how many numbers at a time are grouped together. Valid values are 0 (normal grouping), 1 (single-digit grouping, e.g., “one, two, three, four”), 2 (double-digit grouping, e.g., “twelve, thirty-four”, or 3 (triple-digit grouping, e.g., “one twenty-three, four”).

:comma

Set the character/s used to separate word groups. Defaults to ", ".

:and

Set the word and/or characters used where ' and ' (the default) is normally used. Setting :and to ' ', for example, will cause 2556 to be returned as “two-thousand, five hundred fifty-six” instead of “two-thousand, five hundred and fifty-six”.

:zero

Set the word used to represent the numeral 0 in the result. 'zero' is the default.

:decimal

Set the translation of any decimal points in the number; the default is 'point'.

:asArray

If set to a true value, the number will be returned as an array of word groups instead of a String.



1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
# File 'lib/linguistics/en.rb', line 1264

def numwords( number, hashargs={} )
	num = number.to_s
	config = NumwordDefaults.merge( hashargs )
	raise "Bad chunking option: #{config[:group]}" unless
		config[:group].between?( 0, 3 )

	# Array of number parts: first is everything to the left of the first
	# decimal, followed by any groups of decimal-delimted numbers after that
	parts = []

	# Wordify any sign prefix
	sign = (/\A\s*\+/ =~ num) ? 'plus' : (/\A\s*\-/ =~ num) ? 'minus' : ''

	# Strip any ordinal suffixes
	ord = true if num.sub!( /(st|nd|rd|th)\Z/, '' )

	# Split the number into chunks delimited by '.'
	chunks = if !config[:decimal].empty? then
				 if config[:group].nonzero?
					 num.split(/\./)
				 else
					 num.split(/\./, 2)
				 end
			 else
				 [ num ]
			 end

	# Wordify each chunk, pushing arrays into the parts array
	chunks.each_with_index {|chunk,section|
		chunk.gsub!( /\D+/, '' )

		# If there's nothing in this chunk of the number, set it to zero
		# unless it's the whole-number part, in which case just push an
		# empty array.
		if chunk.empty?
			if section.zero?
				parts.push []
				next 
			end
		end

		# Split the number section into wordified parts unless this is the
		# second or succeeding part of a non-group number
		unless config[:group].zero? && section.nonzero?
			parts.push number_to_words( chunk, config )
		else
			parts.push number_to_words( chunk, config.merge(:group => 1) )
		end					
	}

	debug_msg "Parts => #{parts.inspect}"
	
	# Turn the last word of the whole-number part back into an ordinal if
	# the original number came in that way.
	if ord && !parts[0].empty?
		parts[0][-1] = ordinal( parts[0].last )
	end

	# If the caller's expecting an Array return, just flatten and return the
	# parts array.
	if config[:asArray]
		unless sign.empty?
			parts[0].unshift( sign )
		end
		return parts.flatten
	end

	# Catenate each sub-parts array into a whole number part and one or more
	# post-decimal parts. If grouping is turned on, all sub-parts get joined
	# with commas, otherwise just the whole-number part is.
	if config[:group].zero?
		if parts[0].nitems > 1

			# Join all but the last part together with commas
			wholenum = parts[0][0...-1].join( config[:comma] )

			# If the last part is just a single word, append it to the
			# wholenum part with an 'and'. This is to get things like 'three
			# thousand and three' instead of 'three thousand, three'.
			if /^\s*(\S+)\s*$/ =~ parts[0].last
				wholenum += config[:and] + parts[0].last
			else
				wholenum += config[:comma] + parts[0].last
			end
		else
			wholenum = parts[0][0]
		end
		decimals = parts[1..-1].collect {|part| part.join(" ")}

		debug_msg "Wholenum: #{wholenum.inspect}; decimals: #{decimals.inspect}"

		# Join with the configured decimal; if it's empty, just join with
		# spaces.
		unless config[:decimal].empty?
			return sign + ([ wholenum ] + decimals).
				join( " #{config[:decimal]} " ).strip
		else
			return sign + ([ wholenum ] + decimals).
				join( " " ).strip
		end
	else
		return parts.compact.
			separate( config[:decimal] ).
			delete_if {|el| el.empty?}.
			join( config[:comma] ).
			strip
	end
end

.ordinal(number) ⇒ Object

Transform the given number into an ordinal word. The number object can be either an Integer or a String.



1377
1378
1379
1380
1381
1382
1383
1384
1385
# File 'lib/linguistics/en.rb', line 1377

def ordinal( number )
	case number
	when Integer
		return number.to_s + (Nth[ number % 100 ] || Nth[ number % 10 ])

	else
		return number.to_s.sub( /(#{OrdinalSuffixes})\Z/ ) { Ordinals[$1] }
	end
end

.ordinate(number) ⇒ Object

Transform the given number into an ordinate word.



1390
1391
1392
# File 'lib/linguistics/en.rb', line 1390

def ordinate( number )
	numwords( number ).ordinal
end

.plural(phrase, count = nil) ⇒ Object

Return the plural of the given phrase if count indicates it should be plural.



1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
# File 'lib/linguistics/en.rb', line 1128

def plural( phrase, count=nil )
	phrase = numwords( phrase ) if phrase.is_a?( Numeric )

	md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
	pre, word, post = md.to_a[1,3]
	return phrase if word.nil? or word.empty?

	plural = postprocess( word,
		pluralize_special_adjective(word, count) ||
		pluralize_special_verb(word, count) ||
		pluralize_noun(word, count) )

	return pre + plural + post
end

.plural_adjective(phrase, count = nil) ⇒ Object Also known as: plural_adj

Return the plural of the given adjectival phrase if count indicates it should be plural.



1175
1176
1177
1178
1179
1180
1181
1182
1183
# File 'lib/linguistics/en.rb', line 1175

def plural_adjective( phrase, count=nil )
	md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
	pre, word, post = md.to_a[1,3]
	return phrase if word.nil? or word.empty?

	plural = postprocess( word,
		pluralize_special_adjective(word, count) || word )
	return pre + plural + post
end

.plural_noun(phrase, count = nil) ⇒ Object

Return the plural of the given noun phrase if count indicates it should be plural.



1147
1148
1149
1150
1151
1152
1153
1154
# File 'lib/linguistics/en.rb', line 1147

def plural_noun( phrase, count=nil )
	md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
	pre, word, post = md.to_a[1,3]
	return phrase if word.nil? or word.empty?

	plural = postprocess( word, pluralize_noun(word, count) )
	return pre + plural + post
end

.plural_verb(phrase, count = nil) ⇒ Object

Return the plural of the given verb phrase if count indicates it should be plural.



1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
# File 'lib/linguistics/en.rb', line 1160

def plural_verb( phrase, count=nil )
	md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
	pre, word, post = md.to_a[1,3]
	return phrase if word.nil? or word.empty?

	plural = postprocess( word,
		pluralize_special_verb(word, count) ||
		pluralize_general_verb(word, count) )
	return pre + plural + post
end

.pluralize_general_verb(word, count) ⇒ Object

Pluralize regular verbs



900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
# File 'lib/linguistics/en.rb', line 900

def pluralize_general_verb( word, count )
	count ||= Linguistics::num
	count = normalize_count( count )
	
	return word if /^(#{PL_count_one})$/i =~ count.to_s

	case word

	# Handle ambiguous present tenses  (simple and compound)
	when /^(#{PL_v_ambiguous_pres})((\s.*)?)$/i
		return PL_v_ambiguous_pres_h[ $1.downcase ] + $2

	# Handle ambiguous preterite and perfect tenses
	when /^(#{PL_v_ambiguous_non_pres})((\s.*)?)$/i
		return word

	# Otherwise, 1st or 2nd person is uninflected
	else
		return word
	end
end

.pluralize_noun(word, count = nil) ⇒ Object

Pluralize nouns



727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
# File 'lib/linguistics/en.rb', line 727

def pluralize_noun( word, count=nil )
	value = nil
	count ||= Linguistics::num
	count = normalize_count( count )

	return word if count == 1

	# Handle user-defined nouns
	#if value = ud_match( word, PL_sb_user_defined )
	#	return value
	#end

	# Handle empty word, singular count and uninflected plurals
	case word
	when ''
		return word
	when /^(#{PL_sb_uninflected})$/i
		return word
	else
		if Linguistics::classical? &&
		   /^(#{PL_sb_uninflected_herd})$/i =~ word
			return word
		end
	end

	# Handle compounds ("Governor General", "mother-in-law", "aide-de-camp", etc.)
	case word
	when /^(?:#{PL_sb_postfix_adj})$/i
		value = $2
		return pluralize_noun( $1, 2 ) + value

	when /^(?:#{PL_sb_prep_dual_compound})$/i
		value = [ $2, $3 ] 
		return pluralize_noun( $1, 2 ) + value[0] + pluralize_noun( value[1] )

	when /^(?:#{PL_sb_prep_compound})$/i
		value = $2 
		return pluralize_noun( $1, 2 ) + value

	# Handle pronouns
	when /^((?:#{PL_prep})\s+)(#{PL_pron_acc})$/i
		return $1 + PL_pron_acc_h[ $2.downcase ]

	when /^(#{PL_pron_nom})$/i
		return PL_pron_nom_h[ word.downcase ]

	when /^(#{PL_pron_acc})$/i
		return PL_pron_acc_h[ $1.downcase ]

	# Handle isolated irregular plurals 
	when /(.*)\b(#{PL_sb_irregular})$/i
		return $1 + PL_sb_irregular_h[ $2.downcase ]

	when /(#{PL_sb_U_man_mans})$/i
		return "#{$1}s"

	# Handle families of irregular plurals
	when /(.*)man$/i ;					return "#{$1}men"
	when /(.*[ml])ouse$/i ;				return "#{$1}ice"
	when /(.*)goose$/i ;				return "#{$1}geese"
	when /(.*)tooth$/i ;				return "#{$1}teeth"
	when /(.*)foot$/i ;					return "#{$1}feet"

	# Handle unassimilated imports
	when /(.*)ceps$/i ;					return word
	when /(.*)zoon$/i ;					return "#{$1}zoa"
	when /(.*[csx])is$/i ;				return "#{$1}es"
	when /(#{PL_sb_U_ex_ices})ex$/i;	return "#{$1}ices"
	when /(#{PL_sb_U_ix_ices})ix$/i;	return "#{$1}ices"
	when /(#{PL_sb_U_um_a})um$/i ;		return "#{$1}a"
	when /(#{PL_sb_U_us_i})us$/i ;		return "#{$1}i"
	when /(#{PL_sb_U_on_a})on$/i ;		return "#{$1}a"
	when /(#{PL_sb_U_a_ae})$/i ;		return "#{$1}e"
	end

	# Handle incompletely assimilated imports
	if Linguistics::classical?
		case word
		when /(.*)trix$/i ;				return "#{$1}trices"
		when /(.*)eau$/i ;				return "#{$1}eaux"
		when /(.*)ieu$/i ;				return "#{$1}ieux"
		when /(.{2,}[yia])nx$/i ;		return "#{$1}nges"
		when /(#{PL_sb_C_en_ina})en$/i; return "#{$1}ina"
		when /(#{PL_sb_C_ex_ices})ex$/i;	return "#{$1}ices"
		when /(#{PL_sb_C_ix_ices})ix$/i;	return "#{$1}ices"
		when /(#{PL_sb_C_um_a})um$/i ;	return "#{$1}a"
		when /(#{PL_sb_C_us_i})us$/i ;	return "#{$1}i"
		when /(#{PL_sb_C_us_us})$/i ;	return "#{$1}"
		when /(#{PL_sb_C_a_ae})$/i ;	return "#{$1}e"
		when /(#{PL_sb_C_a_ata})a$/i ;	return "#{$1}ata"
		when /(#{PL_sb_C_o_i})o$/i ;	return "#{$1}i"
		when /(#{PL_sb_C_on_a})on$/i ;	return "#{$1}a"
		when /#{PL_sb_C_im}$/i ;		return "#{word}im"
		when /#{PL_sb_C_i}$/i ;			return "#{word}i"
		end
	end


	# Handle singular nouns ending in ...s or other silibants
	case word
	when /^(#{PL_sb_singular_s})$/i;	return "#{$1}es"
	when /^([A-Z].*s)$/;				return "#{$1}es"
	when /(.*)([cs]h|[zx])$/i ;			return "#{$1}#{$2}es"
	# when /(.*)(us)$/i ;				return "#{$1}#{$2}es"

	# Handle ...f -> ...ves
	when /(.*[eao])lf$/i ;				return "#{$1}lves"; 
	when /(.*[^d])eaf$/i ;				return "#{$1}eaves"
	when /(.*[nlw])ife$/i ;				return "#{$1}ives"
	when /(.*)arf$/i ;					return "#{$1}arves"

	# Handle ...y
	when /(.*[aeiou])y$/i ;				return "#{$1}ys"
	when /([A-Z].*y)$/ ;				return "#{$1}s"
	when /(.*)y$/i ;					return "#{$1}ies"

	# Handle ...o
	when /#{PL_sb_U_o_os}$/i ;			return "#{word}s"
	when /[aeiou]o$/i ;					return "#{word}s"
	when /o$/i ;						return "#{word}es"

	# Otherwise just add ...s
	else
		return "#{word}s"
	end
end

.pluralize_special_adjective(word, count) ⇒ Object

Handle special adjectives



924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
# File 'lib/linguistics/en.rb', line 924

def pluralize_special_adjective( word, count )
	count ||= Linguistics::num
	count = normalize_count( count )

	return word if /^(#{PL_count_one})$/i =~ count.to_s

	# Handle user-defined verbs
	#if value = ud_match( word, PL_adj_user_defined )
	#	return value
	#end

	case word

	# Handle known cases
	when /^(#{PL_adj_special})$/i
		return PL_adj_special_h[ $1.downcase ]

	# Handle possessives
	when /^(#{PL_adj_poss})$/i
		return PL_adj_poss_h[ $1.downcase ]

	when /^(.*)'s?$/
		pl = plural_noun( $1 )
		if /s$/ =~ pl
			return "#{pl}'"
		else
			return "#{pl}'s"
		end

	# Otherwise, no idea
	else
		return nil
	end
end

.pluralize_special_verb(word, count) ⇒ Object

Pluralize special verbs



857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
# File 'lib/linguistics/en.rb', line 857

def pluralize_special_verb( word, count )
	count ||= Linguistics::num
	count = normalize_count( count )
	
	return nil if /^(#{PL_count_one})$/i =~ count.to_s

	# Handle user-defined verbs
	#if value = ud_match( word, PL_v_user_defined )
	#	return value
	#end

	case word

	# Handle irregular present tense (simple and compound)
	when /^(#{PL_v_irregular_pres})((\s.*)?)$/i
		return PL_v_irregular_pres_h[ $1.downcase ] + $2

	# Handle irregular future, preterite and perfect tenses 
	when /^(#{PL_v_irregular_non_pres})((\s.*)?)$/i
		return word

	# Handle special cases
	when /^(#{PL_v_special_s})$/, /\s/
		return nil

	# Handle standard 3rd person (chop the ...(e)s off single words)
	when /^(.*)([cs]h|[x]|zz|ss)es$/i
		return $1 + $2
	when /^(..+)ies$/i
		return "#{$1}y"
	when /^(.+)oes$/i
		return "#{$1}o"
	when /^(.*[^s])s$/i
		return $1

	# Otherwise, a regular verb (handle elsewhere)
	else
		return nil
	end
end

.postprocess(original, inflected) ⇒ Object

Do normal/classical switching and match capitalization in inflected by examining the original input.



705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
# File 'lib/linguistics/en.rb', line 705

def postprocess( original, inflected )
	inflected.sub!( /([^|]+)\|(.+)/ ) {
		Linguistics::classical? ? $2 : $1
	}

	case original
	when "I"
		return inflected
	when /^[A-Z]+$/
		return inflected.upcase
	when /^[A-Z]/
		# Can't use #capitalize, as it will downcase the rest of the string,
		# too.
		inflected[0,1] = inflected[0,1].upcase
		return inflected
	else
		return inflected
	end
end

.present_participle(word) ⇒ Object Also known as: part_pres

Participles



1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
# File 'lib/linguistics/en.rb', line 1219

def present_participle( word )
       plural = plural_verb( word.to_s, 2 )
	
	plural.sub!( /ie$/, 'y' ) or
		plural.sub!( /ue$/, 'u' ) or
		plural.sub!( /([auy])e$/, '$1' ) or
		plural.sub!( /i$/, '' ) or
		plural.sub!( /([^e])e$/, "\\1" ) or
		/er$/.match( plural ) or
		plural.sub!( /([^aeiou][aeiouy]([bdgmnprst]))$/, "\\1\\2" )

       return "#{plural}ing"
end

.proper_noun(string) ⇒ Object

Returns the proper noun form of a string by capitalizing most of the words.

Examples:

English.proper_noun("bosnia and herzegovina") ->
  "Bosnia and Herzegovina"
English.proper_noun("macedonia, the former yugoslav republic of") ->
  "Macedonia, the Former Yugoslav Republic of"
English.proper_noun("virgin islands, u.s.") ->
  "Virgin Islands, U.S."


1684
1685
1686
1687
1688
1689
1690
# File 'lib/linguistics/en.rb', line 1684

def proper_noun( string )
	return string.split(/([ .]+)/).collect {|word|
		next word unless /^[a-z]/.match( word ) &&
			! (%w{and the of}.include?( word ))
		word.capitalize
	}.join
end

.quantify(phrase, number = 0, args = {}) ⇒ Object

:joinword

Sets the word (and any surrounding spaces) used as the word separating the quantity from the noun in the resulting string. Defaults to ' of '.



1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
# File 'lib/linguistics/en.rb', line 1403

def quantify( phrase, number=0, args={} )
	num = number.to_i
	config = QuantifyDefaults.merge( args )
	
	case num
	when 0
		no( phrase )
	when 1
		a( phrase )
	when SeveralRange
		"several " + plural( phrase, num )
	when NumberRange
		"a number of " + plural( phrase, num )
	when NumerousRange
		"numerous " + plural( phrase, num )
	when ManyRange
		"many " + plural( phrase, num )
	else

		# Anything bigger than the ManyRange gets described like
		# "hundreds of thousands of..." or "millions of..."
		# depending, of course, on how many there are.
		thousands, subthousands = Math::log10( num ).to_i.divmod( 3 )
		stword =
			case subthousands
			when 2
				"hundreds"
			when 1
				"tens"
			else
				nil
			end
		thword = plural( to_thousands(thousands).strip )
		thword = nil if thword.empty?

		[	# Hundreds (of)...
			stword,

			# thousands (of)
			thword,

			# stars.
			plural(phrase, number)
		].compact.join( config[:joinword] )
	end
end

.sentence(obj) ⇒ Object

Return a LinkParser::Sentence for the stringified obj.



110
111
112
# File 'lib/linguistics/en/linkparser.rb', line 110

def sentence( obj )
	return Linguistics::EN::lp_dict.parse( obj.to_s )
end

.synset(word, pos = nil, sense = 1) ⇒ Object

Look up the synset associated with the given word or collocation in the WordNet lexicon and return a WordNet::Synset object.



153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
# File 'lib/linguistics/en/wordnet.rb', line 153

def synset( word, pos=nil, sense=1 )
	lex = Linguistics::EN::wn_lexicon
	if pos.is_a?( Fixnum )
		sense = pos
		pos = nil
	end
	postries = pos ? [pos] : [:noun, :verb, :adjective, :adverb, :other]
	syn = nil

	postries.each do |pos|
		break if syn = lex.lookupSynsets( word.to_s, pos, sense )
	end

	return syn
end

.synsets(word, pos = nil) ⇒ Object

Look up all the synsets associated with the given word or collocation in the WordNet lexicon and return an Array of WordNet::Synset objects. If pos is nil, return synsets for all parts of speech.



173
174
175
176
177
178
179
180
181
182
183
# File 'lib/linguistics/en/wordnet.rb', line 173

def synsets( word, pos=nil )
	lex = Linguistics::EN::wn_lexicon
	postries = pos ? [pos] : [:noun, :verb, :adjective, :adverb, :other]
	syns = []

	postries.each {|pos|
		syns << lex.lookupSynsets( word.to_s, pos )
	}

	return syns.flatten.compact
end

.titlecase(string) ⇒ Object

Returns the given string as a title-cased phrase.



1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
# File 'lib/linguistics/en.rb', line 1643

def titlecase( string ) # :nodoc:

	# Split on word-boundaries
	words = string.split( /\b/ )
		
	# Always capitalize the first and last words
	words.first.capitalize!
	words.last.capitalize!

	# Now scan the rest of the tokens, skipping non-words and capitalization
	# exceptions.
	words.each_with_index do |word, i|

		# Non-words
		next unless /^\w+$/.match( word )

		# Skip exception-words
		next if TitleCaseExceptions.include?( word )

		# Skip second parts of contractions
		next if words[i - 1] == "'" && /\w/.match( words[i - 2] )

		# Have to do it this way instead of capitalize! because that method
		# also downcases all other letters.
		word.gsub!( /^(\w)(.*)/ ) { $1.upcase + $2 }
	end

	return words.join
end

.to_hundreds(hundreds, tens = 0, units = 0, thousands = 0, joinword = " and ") ⇒ Object

Transform the specified number of hundreds-, tens-, and units-place numerals into a word phrase. If the number of thousands (thousands) is greater than 0, it will be used to determine where the decimal point is in relation to the hundreds-place number.



1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
# File 'lib/linguistics/en.rb', line 1036

def to_hundreds( hundreds, tens=0, units=0, thousands=0, joinword=" and " )
	joinword = ' ' if joinword.empty?
	if hundreds.nonzero?
		return to_units( hundreds ) + " hundred" +
			(tens.nonzero? || units.nonzero? ? joinword : '') +
			to_tens( tens, units ) +
			to_thousands( thousands )
	elsif tens.nonzero? || units.nonzero?
		return to_tens( tens, units ) + to_thousands( thousands )
	else
		return nil
	end
end

.to_tens(tens, units, thousands = 0) ⇒ Object

Transform the specified number of tens- and units-place numerals into a word-phrase at the given number of thousands places.



1022
1023
1024
1025
1026
1027
1028
1029
# File 'lib/linguistics/en.rb', line 1022

def to_tens( tens, units, thousands=0 )
	unless tens == 1
		return Tens[ tens ] + ( tens.nonzero? && units.nonzero? ? '-' : '' ) +
			to_units( units, thousands )
	else
		return Teens[ units ] + to_thousands( thousands )
	end
end

.to_thousands(thousands = 0) ⇒ Object

Transform the specified number into one or more words like ‘thousand’, ‘million’, etc. Uses the thousands (American) system.



1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
# File 'lib/linguistics/en.rb', line 1052

def to_thousands( thousands=0 )
	parts = []
	(0..thousands).step( Thousands.length - 1 ) {|i|
		if i.zero?
			parts.push Thousands[ thousands % (Thousands.length - 1) ]
		else
			parts.push Thousands.last
		end
	}

	return parts.join(" ")
end

.to_units(units, thousands = 0) ⇒ Object

Transform the specified number of units-place numerals into a word-phrase at the given number of thousands places.



1015
1016
1017
# File 'lib/linguistics/en.rb', line 1015

def to_units( units, thousands=0 )
	return Units[ units ] + to_thousands( thousands )
end

.wn_errorObject

If #haveWordnet? returns false, this can be called to fetch the exception which was raised when WordNet was loaded.



110
# File 'lib/linguistics/en/wordnet.rb', line 110

def wn_error ; @wn_error; end

.wn_lexiconObject

The instance of the WordNet::Lexicon used for all Linguistics WordNet functions.



114
115
116
117
118
119
120
121
122
# File 'lib/linguistics/en/wordnet.rb', line 114

def wn_lexicon
	if @wn_error
		raise NotImplementedError,
			"WordNet functions are not loaded: %s" %
			@wn_error.message
	end

	@wn_lexicon ||= WordNet::Lexicon::new
end