Module: Linguistics::EN::Pluralization

Defined in:
lib/linguistics/en/pluralization.rb

Overview

Plural inflection methods for the English-language Linguistics module.

It provides conversion of plural forms of all nouns, most verbs, and some adjectives. It also provides “classical” variants (for example: “brother” -> “brethren”, “dogma” -> “dogmata”, etc.) where appropriate.

Constant Summary collapse

PL_sb_irregular_s =

Plurals

{
	"ephemeris"	=> "ephemerides",
	"iris"		=> "irises|irides",
	"clitoris"	=> "clitorises|clitorides",
	"corpus"	=> "corpuses|corpora",
	"opus"		=> "opuses|opera",
	"genus"		=> "genera",
	"mythos"	=> "mythoi",
	"penis"		=> "penises|penes",
	"testis"	=> "testes",
}
PL_sb_irregular_h =
{
	"child"		=> "children",
	"brother"	=> "brothers|brethren",
	"loaf"		=> "loaves",
	"hoof"		=> "hoofs|hooves",
	"beef"		=> "beefs|beeves",
	"money"		=> "monies",
	"mongoose"	=> "mongooses",
	"ox"		=> "oxen",
	"cow"		=> "cows|kine",
	"soliloquy"	=> "soliloquies",
	"graffito"	=> "graffiti",
	"prima donna"	=> "prima donnas|prime donne",
	"octopus"	=> "octopuses|octopodes",
	"genie"		=> "genies|genii",
	"ganglion"	=> "ganglions|ganglia",
	"trilby"	=> "trilbys",
	"turf"		=> "turfs|turves",
}.update( PL_sb_irregular_s )
PL_sb_irregular =
matchgroup PL_sb_irregular_h.keys
PL_sb_C_a_ata =

Classical “..a” -> “..ata”

matchgroup %w[
	anathema bema carcinoma charisma diploma
	dogma drama edema enema enigma lemma
	lymphoma magma melisma miasma oedema
	sarcoma schema soma stigma stoma trauma
	gumma pragma
].collect {|word| word[0...-1]}
PL_sb_U_a_ae =

Unconditional “..a” -> “..ae”

matchgroup %w[
	alumna alga vertebra persona
]
PL_sb_C_a_ae =

Classical “..a” -> “..ae”

matchgroup [/.*umbra/ ] + %w[
	amoeba antenna formula hyperbola
	medusa nebula parabola abscissa
	hydra nova lacuna aurora
	flora fauna
]
PL_sb_C_en_ina =

Classical “..en” -> “..ina”

matchgroup %w[
	stamen	foramen	lumen
].collect {|word| word[0...-2] }
PL_sb_U_um_a =

Unconditional “..um” -> “..a”

matchgroup %w[
	bacterium agendum desideratum erratum
	stratum datum ovum extremum candelabrum
].collect {|word| word[0...-2] }
PL_sb_C_um_a =

Classical “..um” -> “..a”

matchgroup %w[
	maximum	minimum	momentum	optimum
	quantum	cranium	curriculum	dictum
	phylum	aquarium	compendium	emporium
	enconium	gymnasium	honorarium	interregnum
	lustrum 	memorandum	millenium 	rostrum 
	spectrum	speculum	stadium	trapezium
	ultimatum	medium	vacuum	velum 
	consortium
].collect {|word| word[0...-2]}
PL_sb_U_us_i =

Unconditional “..us” -> “i”

matchgroup %w[
	alumnus	alveolus	bacillus	bronchus
	locus	nucleus	stimulus	meniscus
].collect {|word| word[0...-2]}
PL_sb_C_us_i =

Classical “..us” -> “..i”

matchgroup %w[
	focus	radius	genius
	incubus	succubus	nimbus
	fungus	nucleolus	stylus
	torus	umbilicus	uterus
	hippopotamus
].collect {|word| word[0...-2]}
PL_sb_C_us_us =

Classical “..us” -> “..us” (assimilated 4th declension latin nouns)

matchgroup %w[
	status apparatus prospectus sinus
	hiatus impetus plexus
]
PL_sb_U_on_a =

Unconditional “..on” -> “a”

matchgroup %w[
	criterion	perihelion	aphelion
	phenomenon	prolegomenon	noumenon
	organon	asyndeton	hyperbaton
].collect {|word| word[0...-2]}
PL_sb_C_on_a =

Classical “..on” -> “..a”

matchgroup %w[
	oxymoron
].collect {|word| word[0...-2]}
PL_sb_C_o_i_a =

Classical “..o” -> “..i” (but normally -> “..os”)

%w[
	solo		soprano	basso	alto
	contralto	tempo	piano
]
PL_sb_C_o_i =
matchgroup PL_sb_C_o_i_a.collect{|word| word[0...-1]}
PL_sb_U_o_os =

Always “..o” -> “..os”

matchgroup( %w[
	albino	archipelago	armadillo
	commando	crescendo	fiasco
	ditto	dynamo	embryo
	ghetto	guano	inferno
	jumbo	lumbago	magneto
	manifesto	medico	octavo
	photo	pro		quarto	
	canto	lingo	generalissimo
	stylo	rhino
] | PL_sb_C_o_i_a )
PL_sb_U_ex_ices =

Unconditional “..[ei]x” -> “..ices”

matchgroup %w[
	codex	murex	silex
].collect {|word| word[0...-2]}
PL_sb_U_ix_ices =
matchgroup %w[
	radix	helix
].collect {|word| word[0...-2]}
PL_sb_C_ex_ices =

Classical “..[ei]x” -> “..ices”

matchgroup %w[
	vortex	vertex	cortex	latex
	pontifex	apex		index	simplex
].collect {|word| word[0...-2]}
PL_sb_C_ix_ices =
matchgroup %w[
	appendix
].collect {|word| word[0...-2]}
PL_sb_C_i =

Arabic: “..” -> “..i”

matchgroup %w[
	afrit	afreet	efreet
]
PL_sb_C_im =

Hebrew: “..” -> “..im”

matchgroup %w[
	goy		seraph	cherub
]
PL_sb_U_man_mans =

Unconditional “..man” -> “..mans”

matchgroup %w[
	human
	Alabaman Bahaman Burman German
	Hiroshiman Liman Nakayaman Oklahoman
	Panaman Selman Sonaman Tacoman Yakiman
	Yokohaman Yuman
]
PL_sb_uninflected_s =
[
	# Pairs or groups subsumed to a singular...
	"breeches", "britches", "clippers", "gallows", "hijinks",
	"headquarters", "pliers", "scissors", "testes", "herpes",
	"pincers", "shears", "proceedings", "trousers",

	# Unassimilated Latin 4th declension
	"cantus", "coitus", "nexus",

	# Recent imports...
	"contretemps", "corps", "debris",
	/.*ois/,

	# Diseases
	/.*measles/, "mumps",

	# Miscellaneous others...
	"diabetes", "jackanapes", "series", "species", "rabies",
	"chassis", "innings", "news", "mews",
]
PL_sb_uninflected_herd =

Don’t inflect in classical mode, otherwise normal inflection

matchgroup %w[
	wildebeest swine eland bison buffalo
	elk moose rhinoceros
]
PL_sb_uninflected =
matchgroup(

	# Some fish and herd animals
	/.*fish/, "tuna", "salmon", "mackerel", "trout",
	"bream", /sea[- ]bass/, "carp", "cod", "flounder", "whiting",

	/.*deer/, /.*sheep/,

	# All nationals ending in -ese
	"Portuguese", "Amoyese", "Borghese", "Congoese", "Faroese",
	"Foochowese", "Genevese", "Genoese", "Gilbertese", "Hottentotese",
	"Kiplingese", "Kongoese", "Lucchese", "Maltese", "Nankingese",
	"Niasese", "Pekingese", "Piedmontese", "Pistoiese", "Sarawakese",
	"Shavese", "Vermontese", "Wenchowese", "Yengeese",
	/.*[nrlm]ese/,

	# Some words ending in ...s (often pairs taken as a whole)
	PL_sb_uninflected_s,

	# Diseases
	/.*pox/,

	# Other oddities
	"graffiti", "djinn"
)
PL_sb_singular_s =

Singular words ending in …s (all inflect with …es)

matchgroup [ /.*ss/, /.*us/ ] +
%w[
	acropolis aegis alias arthritis asbestos atlas
	bathos bias bronchitis bursitis caddis cannabis
	canvas chaos cosmos dais digitalis encephalitis
	epidermis ethos eyas gas glottis hepatitis
	hubris ibis lens mantis marquis metropolis
	neuritis pathos pelvis polis rhinoceros
	sassafras tonsillitis trellis 
]
PL_v_special_s =
matchgroup [
	PL_sb_singular_s,
	PL_sb_uninflected_s,
	PL_sb_irregular_s.keys,
	/(.*[csx])is/,
	/(.*)ceps/,
	/[A-Z].*s/,
]
PL_sb_postfix_adj =
'(' + {

	'general' => '(?!major|lieutenant|brigadier|adjutant)\S+',
	'martial' => "court",

}.collect {|key,val|
	"(?:#{val})(?=(?:-|\\s+)#{key})"
}.join("|") + ")(.*)"
PL_sb_military =
%r'major|lieutenant|brigadier|adjutant|quartermaster'
PL_sb_general =
%r'((?!#{PL_sb_military.source}).*?)((-|\s+)general)'
PL_prep =
matchgroup %w[
	about above across after among around at athwart before behind
	below beneath beside besides between betwixt beyond but by
	during except for from in into near of off on onto out over
	since till to under until unto upon with
]
PL_sb_prep_dual_compound =
%r'(.*?)((?:-|\s+)(?:#{PL_prep}|d[eu])(?:-|\s+))a(?:-|\s+)(.*)'
PL_sb_prep_compound =
%r'(.*?)((-|\s+)(#{PL_prep}|d[eu])((-|\s+)(.*))?)'
PL_pron_nom_h =
{
	#	Nominative		Reflexive
	"i"		=> "we",	"myself"   =>	"ourselves",
	"you"	=> "you",	"yourself" =>	"yourselves",
	"she"	=> "they",	"herself"  =>	"themselves",
	"he"	=> "they",	"himself"  =>	"themselves",
	"it"	=> "they",	"itself"   =>	"themselves",
	"they"	=> "they",	"themself" =>	"themselves",

	#	Possessive
	"mine"	 => "ours",
	"yours"	 => "yours",
	"hers"	 => "theirs",
	"his"	 => "theirs",
	"its"	 => "theirs",
	"theirs" => "theirs",
}
PL_pron_nom =
Regexp.new( PL_pron_nom_h.keys.join('|'), Regexp::IGNORECASE )
PL_pron_acc_h =
{
	#	Accusative		Reflexive
	"me"	=> "us",	"myself"   =>	"ourselves",
	"you"	=> "you",	"yourself" =>	"yourselves",
	"her"	=> "them",	"herself"  =>	"themselves",
	"him"	=> "them",	"himself"  =>	"themselves",
	"it"	=> "them",	"itself"   =>	"themselves",
	"them"	=> "them",	"themself" =>	"themselves",
}
PL_pron_acc =
matchgroup PL_pron_acc_h.keys
PL_v_irregular_pres_h =
{
	#	1St pers. sing.		2nd pers. sing.		3rd pers. singular
	#				3rd pers. (indet.)	
	"am"	=> "are",	"are"	=> "are",	"is"	 => "are",
	"was"	=> "were",	"were"	=> "were",	"was"	 => "were",
	"have"  => "have",	"have"  => "have",	"has"	 => "have",
}
PL_v_irregular_pres =
matchgroup PL_v_irregular_pres_h.keys
PL_v_ambiguous_pres_h =
{
	#	1st pers. sing.		2nd pers. sing.		3rd pers. singular
	#				3rd pers. (indet.)	
	"act"	=> "act",	"act"	=> "act",	"acts"	  => "act",
	"blame"	=> "blame",	"blame"	=> "blame",	"blames"  => "blame",
	"can"	=> "can",	"can"	=> "can",	"can"	  => "can",
	"must"	=> "must",	"must"	=> "must",	"must"	  => "must",
	"fly"	=> "fly",	"fly"	=> "fly",	"flies"	  => "fly",
	"copy"	=> "copy",	"copy"	=> "copy",	"copies"  => "copy",
	"drink"	=> "drink",	"drink"	=> "drink",	"drinks"  => "drink",
	"fight"	=> "fight",	"fight"	=> "fight",	"fights"  => "fight",
	"fire"	=> "fire",	"fire"	=> "fire",	"fires"   => "fire",
	"like"	=> "like",	"like"	=> "like",	"likes"   => "like",
	"look"	=> "look",	"look"	=> "look",	"looks"   => "look",
	"make"	=> "make",	"make"	=> "make",	"makes"   => "make",
	"reach"	=> "reach",	"reach"	=> "reach",	"reaches" => "reach",
	"run"	=> "run",	"run"	=> "run",	"runs"    => "run",
	"sink"	=> "sink",	"sink"	=> "sink",	"sinks"   => "sink",
	"sleep"	=> "sleep",	"sleep"	=> "sleep",	"sleeps"  => "sleep",
	"view"	=> "view",	"view"	=> "view",	"views"   => "view",
}
PL_v_ambiguous_pres =
matchgroup PL_v_ambiguous_pres_h.keys
PL_v_irregular_non_pres =
matchgroup %w[
	did had ate made put 
	spent fought sank gave sought
	shall could ought should
]
PL_v_ambiguous_non_pres =
matchgroup %w[
	thought saw bent will might cut
]
PL_count_zero =
matchgroup %w[
	0 no zero nil
]
PL_count_one =
matchgroup %w[
	1 a an one each every this that
]
PL_adj_special_h =
{
	"a"    => "some",	"an"   =>  "some",
	"this" => "these",	"that" => "those",
}
PL_adj_special =
matchgroup PL_adj_special_h.keys
PL_adj_poss_h =
{
	"my"    => "our",
	"your"	=> "your",
	"its"	=> "their",
	"her"	=> "their",
	"his"	=> "their",
	"their"	=> "their",
}
PL_adj_poss =
matchgroup PL_adj_poss_h.keys

Instance Method Summary collapse

Instance Method Details

#plural(count = 2) ⇒ Object

Return the plural of the given phrase if count indicates it should be plural.



401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
# File 'lib/linguistics/en/pluralization.rb', line 401

def plural( count=2 )
	phrase = if self.respond_to?( :to_int )
			self.numwords
		else
			self.to_s
		end

	self.log.debug "Pluralizing %p" % [ phrase ]
	pre = text = post = nil

	# If the string has whitespace, only pluralize the middle bit, but
	# preserve the whitespace to add back to the result.
	if md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
		pre, text, post = md.captures
	else
		return phrase
	end

	plural = postprocess( text,
		pluralize_special_adjective(text, count) ||
		pluralize_special_verb(text, count) ||
		pluralize_noun(text, count) )

	return pre + plural + post
end

#plural_adjective(count = 2) ⇒ Object Also known as: plural_adj

Return the plural of the given adjectival phrase if count indicates it should be plural.



463
464
465
466
467
468
469
470
471
472
473
# File 'lib/linguistics/en/pluralization.rb', line 463

def plural_adjective( count=2 )
	phrase = self.to_s
	md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase )
	pre, word, post = md.captures

	return phrase if word.nil? or word.empty?

	plural = postprocess( word, pluralize_special_adjective(word, count) || word )

	return pre + plural + post
end

#plural_noun(count = 2) ⇒ Object

Return the plural of the given noun phrase if count indicates it should be plural.



431
432
433
434
435
436
437
438
439
440
441
# File 'lib/linguistics/en/pluralization.rb', line 431

def plural_noun( count=2 )
	phrase = self.to_s
	md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase )
	pre, word, post = md.captures

	return phrase if word.nil? or word.empty?

	plural = postprocess( word, pluralize_noun(word, count) )

	return pre + plural + post
end

#plural_verb(count = 2) ⇒ Object

Return the plural of the given verb phrase if count indicates it should be plural.



446
447
448
449
450
451
452
453
454
455
456
457
458
# File 'lib/linguistics/en/pluralization.rb', line 446

def plural_verb( count=2 )
	phrase = self.to_s
	md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase )
	pre, word, post = md.captures

	return phrase if word.nil? or word.empty?

	plural = postprocess( word,
		pluralize_special_verb(word, count) ||
		pluralize_general_verb(word, count) )

	return pre + plural + post
end