pluralize.go (12715B)
1 package pluralize 2 3 import ( 4 "fmt" 5 "regexp" 6 "strconv" 7 "strings" 8 ) 9 10 // Rule -- pluralize rule expression and replacement value. 11 type Rule struct { 12 expression *regexp.Regexp 13 replacement string 14 } 15 16 // Client -- pluralize client. 17 type Client struct { 18 pluralRules []Rule 19 singularRules []Rule 20 uncountables map[string]bool 21 irregularSingles map[string]string 22 irregularPlurals map[string]string 23 interpolateExpr *regexp.Regexp 24 } 25 26 // NewClient - pluralization client factory method. 27 func NewClient() *Client { 28 client := Client{} 29 client.init() 30 31 return &client 32 } 33 34 func (c *Client) init() { 35 c.pluralRules = make([]Rule, 0) 36 c.singularRules = make([]Rule, 0) 37 c.uncountables = make(map[string]bool) 38 c.irregularSingles = make(map[string]string) 39 c.irregularPlurals = make(map[string]string) 40 41 c.loadIrregularRules() 42 c.loadPluralizationRules() 43 c.loadSingularizationRules() 44 c.loadUncountableRules() 45 c.interpolateExpr = regexp.MustCompile(`\$(\d{1,2})`) 46 } 47 48 // Pluralize -- Pluralize or singularize a word based on the passed in count. 49 // word: the word to pluralize 50 // count: how many of the word exist 51 // inclusive: whether to prefix with the number (e.g. 3 ducks) 52 func (c *Client) Pluralize(word string, count int, inclusive bool) string { 53 pluralized := func() func(string) string { 54 if count == 1 { 55 return c.Singular 56 } 57 58 return c.Plural 59 } 60 61 if inclusive { 62 return fmt.Sprintf("%d %s", count, pluralized()(word)) 63 } 64 65 return pluralized()(word) 66 } 67 68 // Plural -- Pluralize a word. 69 func (c *Client) Plural(word string) string { 70 return c.replaceWord(c.irregularSingles, c.irregularPlurals, c.pluralRules)(word) 71 } 72 73 // IsPlural -- Check if a word is plural. 74 func (c *Client) IsPlural(word string) bool { 75 return c.checkWord(c.irregularSingles, c.irregularPlurals, c.pluralRules)(word) 76 } 77 78 // Singular -- Singularize a word. 79 func (c *Client) Singular(word string) string { 80 return c.replaceWord(c.irregularPlurals, c.irregularSingles, c.singularRules)(word) 81 } 82 83 // IsSingular -- Check if a word is singular. 84 func (c *Client) IsSingular(word string) bool { 85 return c.checkWord(c.irregularPlurals, c.irregularSingles, c.singularRules)(word) 86 } 87 88 // AddPluralRule -- Add a pluralization rule to the collection. 89 func (c *Client) AddPluralRule(rule string, replacement string) { 90 c.pluralRules = append(c.pluralRules, Rule{sanitizeRule(rule), replacement}) 91 } 92 93 // AddSingularRule -- Add a singularization rule to the collection. 94 func (c *Client) AddSingularRule(rule string, replacement string) { 95 c.singularRules = append(c.singularRules, Rule{sanitizeRule(rule), replacement}) 96 } 97 98 // AddUncountableRule -- Add an uncountable word rule. 99 func (c *Client) AddUncountableRule(word string) { 100 if !isExpr(word) { 101 c.uncountables[strings.ToLower(word)] = true 102 return 103 } 104 105 c.AddPluralRule(word, `$0`) 106 c.AddSingularRule(word, `$0`) 107 } 108 109 // AddIrregularRule -- Add an irregular word definition. 110 func (c *Client) AddIrregularRule(single string, plural string) { 111 p := strings.ToLower(plural) 112 s := strings.ToLower(single) 113 114 c.irregularSingles[s] = p 115 c.irregularPlurals[p] = s 116 } 117 118 func (c *Client) replaceWord(replaceMap map[string]string, keepMap map[string]string, rules []Rule) func(w string) string { //nolint:lll 119 f := func(word string) string { 120 // Get the correct token and case restoration functions. 121 var token = strings.ToLower(word) 122 123 // Check against the keep object map. 124 if _, ok := keepMap[token]; ok { 125 return restoreCase(word, token) 126 } 127 128 // Check against the replacement map for a direct word replacement. 129 if replaceToken, ok := replaceMap[token]; ok { 130 return restoreCase(word, replaceToken) 131 } 132 133 // Run all the rules against the word. 134 return c.sanitizeWord(token, word, rules) 135 } 136 137 return f 138 } 139 140 func (c *Client) checkWord(replaceMap map[string]string, keepMap map[string]string, rules []Rule) func(w string) bool { 141 f := func(word string) bool { 142 var token = strings.ToLower(word) 143 144 if _, ok := keepMap[token]; ok { 145 return true 146 } 147 148 if _, ok := replaceMap[token]; ok { 149 return false 150 } 151 152 return c.sanitizeWord(token, token, rules) == token 153 } 154 155 return f 156 } 157 158 func (c *Client) interpolate(str string, args []string) string { 159 lookup := map[string]string{} 160 161 for _, submatch := range c.interpolateExpr.FindAllStringSubmatch(str, -1) { 162 element, _ := strconv.Atoi(submatch[1]) 163 lookup[submatch[0]] = args[element] 164 } 165 166 result := c.interpolateExpr.ReplaceAllStringFunc(str, func(repl string) string { 167 return lookup[repl] 168 }) 169 170 return result 171 } 172 173 func (c *Client) replace(word string, rule Rule) string { 174 return rule.expression.ReplaceAllStringFunc(word, func(w string) string { 175 match := rule.expression.FindString(word) 176 index := rule.expression.FindStringIndex(word)[0] 177 args := rule.expression.FindAllStringSubmatch(word, -1)[0] 178 179 result := c.interpolate(rule.replacement, args) 180 181 if match == `` { 182 return restoreCase(word[index-1:index], result) 183 } 184 return restoreCase(match, result) 185 }) 186 } 187 188 func (c *Client) sanitizeWord(token string, word string, rules []Rule) string { 189 // If empty string 190 if len(token) == 0 { 191 return word 192 } 193 // If does not need fixup 194 if _, ok := c.uncountables[token]; ok { 195 return word 196 } 197 198 // Iterate over the sanitization rules and use the first one to match. 199 // NOTE: iterate rules array in reverse order specific => general rules 200 for i := len(rules) - 1; i >= 0; i-- { 201 if rules[i].expression.MatchString(word) { 202 return c.replace(word, rules[i]) 203 } 204 } 205 206 return word 207 } 208 209 func sanitizeRule(rule string) *regexp.Regexp { 210 if isExpr(rule) { 211 return regexp.MustCompile(rule) 212 } 213 214 return regexp.MustCompile(`(?i)^` + rule + `$`) 215 } 216 217 func restoreCase(word string, token string) string { 218 // Tokens are an exact match. 219 if word == token { 220 return token 221 } 222 223 // Lower cased words. E.g. "hello". 224 if word == strings.ToLower(word) { 225 return strings.ToLower(token) 226 } 227 228 // Upper cased words. E.g. "WHISKY". 229 if word == strings.ToUpper(word) { 230 return strings.ToUpper(token) 231 } 232 233 // Title cased words. E.g. "Title". 234 if word[:1] == strings.ToUpper(word[:1]) { 235 return strings.ToUpper(token[:1]) + strings.ToLower(token[1:]) 236 } 237 238 // Lower cased words. E.g. "test". 239 return strings.ToLower(token) 240 } 241 242 // isExpr -- helper to detect if string represents an expression by checking first character to be `(`. 243 func isExpr(s string) bool { 244 return s[:1] == `(` 245 } 246 247 func (c *Client) loadIrregularRules() { //nolint:funlen 248 var irregularRules = []struct { 249 single string 250 plural string 251 }{ 252 // Pronouns. 253 {`I`, `we`}, 254 {`me`, `us`}, 255 {`he`, `they`}, 256 {`she`, `they`}, 257 {`them`, `them`}, 258 {`myself`, `ourselves`}, 259 {`yourself`, `yourselves`}, 260 {`itself`, `themselves`}, 261 {`herself`, `themselves`}, 262 {`himself`, `themselves`}, 263 {`themself`, `themselves`}, 264 {`is`, `are`}, 265 {`was`, `were`}, 266 {`has`, `have`}, 267 {`this`, `these`}, 268 {`that`, `those`}, 269 {`my`, `our`}, 270 {`its`, `their`}, 271 {`his`, `their`}, 272 {`her`, `their`}, 273 // Words ending in with a consonant and `o`. 274 {`echo`, `echoes`}, 275 {`dingo`, `dingoes`}, 276 {`volcano`, `volcanoes`}, 277 {`tornado`, `tornadoes`}, 278 {`torpedo`, `torpedoes`}, 279 // Ends with `us`. 280 {`genus`, `genera`}, 281 {`viscus`, `viscera`}, 282 // Ends with `ma`. 283 {`stigma`, `stigmata`}, 284 {`stoma`, `stomata`}, 285 {`dogma`, `dogmata`}, 286 {`lemma`, `lemmata`}, 287 {`schema`, `schemata`}, 288 {`anathema`, `anathemata`}, 289 // Other irregular rules. 290 {`ox`, `oxen`}, 291 {`axe`, `axes`}, 292 {`die`, `dice`}, 293 {`yes`, `yeses`}, 294 {`foot`, `feet`}, 295 {`eave`, `eaves`}, 296 {`goose`, `geese`}, 297 {`tooth`, `teeth`}, 298 {`quiz`, `quizzes`}, 299 {`human`, `humans`}, 300 {`proof`, `proofs`}, 301 {`carve`, `carves`}, 302 {`valve`, `valves`}, 303 {`looey`, `looies`}, 304 {`thief`, `thieves`}, 305 {`groove`, `grooves`}, 306 {`pickaxe`, `pickaxes`}, 307 {`passerby`, `passersby`}, 308 {`canvas`, `canvases`}, 309 {`sms`, `sms`}, 310 } 311 312 for _, r := range irregularRules { 313 c.AddIrregularRule(r.single, r.plural) 314 } 315 } 316 317 func (c *Client) loadPluralizationRules() { 318 var pluralizationRules = []struct { 319 rule string 320 replacement string 321 }{ 322 {`(?i)s?$`, `s`}, 323 {`(?i)[^[:ascii:]]$`, `$0`}, 324 {`(?i)([^aeiou]ese)$`, `$1`}, 325 {`(?i)(ax|test)is$`, `$1es`}, 326 {`(?i)(alias|[^aou]us|t[lm]as|gas|ris)$`, `$1es`}, 327 {`(?i)(e[mn]u)s?$`, `$1s`}, 328 {`(?i)([^l]ias|[aeiou]las|[ejzr]as|[iu]am)$`, `$1`}, 329 {`(?i)(alumn|syllab|vir|radi|nucle|fung|cact|stimul|termin|bacill|foc|uter|loc|strat)(?:us|i)$`, `$1i`}, //nolint:lll,misspell 330 {`(?i)(alumn|alg|vertebr)(?:a|ae)$`, `$1ae`}, 331 {`(?i)(seraph|cherub)(?:im)?$`, `$1im`}, 332 {`(?i)(her|at|gr)o$`, `$1oes`}, 333 {`(?i)(agend|addend|millenni|dat|extrem|bacteri|desiderat|strat|candelabr|errat|ov|symposi|curricul|automat|quor)(?:a|um)$`, `$1a`}, //nolint:lll,misspell 334 {`(?i)(apheli|hyperbat|periheli|asyndet|noumen|phenomen|criteri|organ|prolegomen|hedr|automat)(?:a|on)$`, `$1a`}, 335 {`(?i)sis$`, `ses`}, 336 {`(?i)(?:(kni|wi|li)fe|(ar|l|ea|eo|oa|hoo)f)$`, `$1$2ves`}, 337 {`(?i)([^aeiouy]|qu)y$`, `$1ies`}, 338 {`(?i)([^ch][ieo][ln])ey$`, `$1ies`}, 339 {`(?i)(x|ch|ss|sh|zz)$`, `$1es`}, 340 {`(?i)(matr|cod|mur|sil|vert|ind|append)(?:ix|ex)$`, `$1ices`}, 341 {`(?i)\b((?:tit)?m|l)(?:ice|ouse)$`, `$1ice`}, 342 {`(?i)(pe)(?:rson|ople)$`, `$1ople`}, 343 {`(?i)(child)(?:ren)?$`, `$1ren`}, 344 {`(?i)eaux$`, `$0`}, 345 {`(?i)m[ae]n$`, `men`}, 346 {`thou`, `you`}, 347 } 348 349 for _, r := range pluralizationRules { 350 c.AddPluralRule(r.rule, r.replacement) 351 } 352 } 353 354 func (c *Client) loadSingularizationRules() { 355 var singularizationRules = []struct { 356 rule string 357 replacement string 358 }{ 359 {`(?i)s$`, ``}, 360 {`(?i)(ss)$`, `$1`}, 361 {`(?i)(wi|kni|(?:after|half|high|low|mid|non|night|[^\w]|^)li)ves$`, `$1fe`}, 362 {`(?i)(ar|(?:wo|[ae])l|[eo][ao])ves$`, `$1f`}, 363 {`(?i)ies$`, `y`}, 364 {`(?i)(dg|ss|ois|lk|ok|wn|mb|th|ch|ec|oal|is|ck|ix|sser|ts|wb)ies$`, `$1ie`}, 365 {`(?i)\b(l|(?:neck|cross|hog|aun)?t|coll|faer|food|gen|goon|group|hipp|junk|vegg|(?:pork)?p|charl|calor|cut)ies$`, `$1ie`}, //nolint:lll 366 {`(?i)\b(mon|smil)ies$`, `$1ey`}, 367 {`(?i)\b((?:tit)?m|l)ice$`, `$1ouse`}, 368 {`(?i)(seraph|cherub)im$`, `$1`}, 369 {`(?i)(x|ch|ss|sh|zz|tto|go|cho|alias|[^aou]us|t[lm]as|gas|(?:her|at|gr)o|[aeiou]ris)(?:es)?$`, `$1`}, 370 {`(?i)(analy|diagno|parenthe|progno|synop|the|empha|cri|ne)(?:sis|ses)$`, `$1sis`}, 371 {`(?i)(movie|twelve|abuse|e[mn]u)s$`, `$1`}, 372 {`(?i)(test)(?:is|es)$`, `$1is`}, 373 {`(?i)(alumn|syllab|vir|radi|nucle|fung|cact|stimul|termin|bacill|foc|uter|loc|strat)(?:us|i)$`, `$1us`}, //nolint:lll,misspell 374 {`(?i)(agend|addend|millenni|dat|extrem|bacteri|desiderat|strat|candelabr|errat|ov|symposi|curricul|quor)a$`, `$1um`}, //nolint:lll,misspell 375 {`(?i)(apheli|hyperbat|periheli|asyndet|noumen|phenomen|criteri|organ|prolegomen|hedr|automat)a$`, `$1on`}, 376 {`(?i)(alumn|alg|vertebr)ae$`, `$1a`}, 377 {`(?i)(cod|mur|sil|vert|ind)ices$`, `$1ex`}, 378 {`(?i)(matr|append)ices$`, `$1ix`}, 379 {`(?i)(pe)(rson|ople)$`, `$1rson`}, 380 {`(?i)(child)ren$`, `$1`}, 381 {`(?i)(eau)x?$`, `$1`}, 382 {`(?i)men$`, `man`}, 383 } 384 385 for _, r := range singularizationRules { 386 c.AddSingularRule(r.rule, r.replacement) 387 } 388 } 389 390 func (c *Client) loadUncountableRules() { //nolint:funlen 391 var uncountableRules = []string{ 392 // Singular words with no plurals. 393 `adulthood`, 394 `advice`, 395 `agenda`, 396 `aid`, 397 `aircraft`, 398 `alcohol`, 399 `ammo`, 400 `analytics`, 401 `anime`, 402 `athletics`, 403 `audio`, 404 `bison`, 405 `blood`, 406 `bream`, 407 `buffalo`, 408 `butter`, 409 `carp`, 410 `cash`, 411 `chassis`, 412 `chess`, 413 `clothing`, 414 `cod`, 415 `commerce`, 416 `cooperation`, 417 `corps`, 418 `debris`, 419 `diabetes`, 420 `digestion`, 421 `elk`, 422 `energy`, 423 `equipment`, 424 `excretion`, 425 `expertise`, 426 `firmware`, 427 `flounder`, 428 `fun`, 429 `gallows`, 430 `garbage`, 431 `graffiti`, 432 `hardware`, 433 `headquarters`, 434 `health`, 435 `herpes`, 436 `highjinks`, 437 `homework`, 438 `housework`, 439 `information`, 440 `jeans`, 441 `justice`, 442 `kudos`, 443 `labour`, 444 `literature`, 445 `machinery`, 446 `mackerel`, 447 `mail`, 448 `media`, 449 `mews`, 450 `moose`, 451 `music`, 452 `mud`, 453 `manga`, 454 `news`, 455 `only`, 456 `personnel`, 457 `pike`, 458 `plankton`, 459 `pliers`, 460 `police`, 461 `pollution`, 462 `premises`, 463 `rain`, 464 `research`, 465 `rice`, 466 `salmon`, 467 `scissors`, 468 `series`, 469 `sewage`, 470 `shambles`, 471 `shrimp`, 472 `software`, 473 `staff`, 474 `swine`, 475 `tennis`, 476 `traffic`, 477 `transportation`, 478 `trout`, 479 `tuna`, 480 `wealth`, 481 `welfare`, 482 `whiting`, 483 `wildebeest`, 484 `wildlife`, 485 `you`, 486 // Regexes. 487 `(?i)pok[eé]mon$`, // 488 `(?i)[^aeiou]ese$`, // "chinese", "japanese" 489 `(?i)deer$`, // "deer", "reindeer" 490 `(?i)(fish)$`, // "fish", "blowfish", "angelfish" 491 `(?i)measles$`, // 492 `(?i)o[iu]s$`, // "carnivorous" 493 `(?i)pox$`, // "chickpox", "smallpox" 494 `(?i)sheep$`, // 495 } 496 497 for _, w := range uncountableRules { 498 c.AddUncountableRule(w) 499 } 500 }