有没有人有一个值得信赖的Proper Case或PCase算法(类似于UCase或Upper)?我在寻找的东西取一个值,如"GEORGE BURDELL"
或"george burdell"
并将其转化为"George Burdell"
.
我有一个简单的处理简单案例.理想的是拥有可以处理诸如"O'REILLY"
并将其转化为内容的东西"O'Reilly"
,但我知道这更难.
如果这简化了事情,我主要关注英语.
更新:我使用C#作为语言,但我可以从几乎任何东西转换(假设存在功能).
我同意麦当劳的痤疮是一个艰难的.我想提一下我的O'Reilly例子,但没有在原帖中.
除非我误解了你的问题,否则我认为你不需要自己动手,TextInfo类可以为你做.
using System.Globalization; CultureInfo.InvariantCulture.TextInfo.ToTitleCase("GeOrGE bUrdEll")
将返回"George Burdell.如果涉及一些特殊规则,您可以使用自己的文化.
更新: 迈克尔(在对此答案的评论中)指出,如果输入是全部大写,这将不起作用,因为该方法将假定它是首字母缩略词.这个天真的解决方法是在将文本提交给ToTitleCase之前.ToLower()文本.
@Zack:我会将其作为单独的回复发布.
这是一个基于kronoz帖子的例子.
void Main() { Listnames = new List () { "bill o'reilly", "johannes diderik van der waals", "mr. moseley-williams", "Joe VanWyck", "mcdonald's", "william the third", "hrh prince charles", "h.r.m. queen elizabeth the third", "william gates, iii", "pope leo xii", "a.k. jennings" }; names.Select(name => name.ToProperCase()).Dump(); } // http://stackoverflow.com/questions/32149/does-anyone-have-a-good-proper-case-algorithm public static class ProperCaseHelper { public static string ToProperCase(this string input) { if (IsAllUpperOrAllLower(input)) { // fix the ALL UPPERCASE or all lowercase names return string.Join(" ", input.Split(' ').Select(word => wordToProperCase(word))); } else { // leave the CamelCase or Propercase names alone return input; } } public static bool IsAllUpperOrAllLower(this string input) { return (input.ToLower().Equals(input) || input.ToUpper().Equals(input)); } private static string wordToProperCase(string word) { if (string.IsNullOrEmpty(word)) return word; // Standard case string ret = capitaliseFirstLetter(word); // Special cases: ret = properSuffix(ret, "'"); // D'Artagnon, D'Silva ret = properSuffix(ret, "."); // ??? ret = properSuffix(ret, "-"); // Oscar-Meyer-Weiner ret = properSuffix(ret, "Mc", t => t.Length > 4); // Scots ret = properSuffix(ret, "Mac", t => t.Length > 5); // Scots except Macey // Special words: ret = specialWords(ret, "van"); // Dick van Dyke ret = specialWords(ret, "von"); // Baron von Bruin-Valt ret = specialWords(ret, "de"); ret = specialWords(ret, "di"); ret = specialWords(ret, "da"); // Leonardo da Vinci, Eduardo da Silva ret = specialWords(ret, "of"); // The Grand Old Duke of York ret = specialWords(ret, "the"); // William the Conqueror ret = specialWords(ret, "HRH"); // His/Her Royal Highness ret = specialWords(ret, "HRM"); // His/Her Royal Majesty ret = specialWords(ret, "H.R.H."); // His/Her Royal Highness ret = specialWords(ret, "H.R.M."); // His/Her Royal Majesty ret = dealWithRomanNumerals(ret); // William Gates, III return ret; } private static string properSuffix(string word, string prefix, Func condition = null) { if (string.IsNullOrEmpty(word)) return word; if (condition != null && ! condition(word)) return word; string lowerWord = word.ToLower(); string lowerPrefix = prefix.ToLower(); if (!lowerWord.Contains(lowerPrefix)) return word; int index = lowerWord.IndexOf(lowerPrefix); // If the search string is at the end of the word ignore. if (index + prefix.Length == word.Length) return word; return word.Substring(0, index) + prefix + capitaliseFirstLetter(word.Substring(index + prefix.Length)); } private static string specialWords(string word, string specialWord) { if (word.Equals(specialWord, StringComparison.InvariantCultureIgnoreCase)) { return specialWord; } else { return word; } } private static string dealWithRomanNumerals(string word) { // Roman Numeral parser thanks to [Hannobo](/sf/ask/17360801/) // Note that it excludes the Chinese last name Xi return new Regex(@"\b(?!Xi\b)(X|XX|XXX|XL|L|LX|LXX|LXXX|XC|C)?(I|II|III|IV|V|VI|VII|VIII|IX)?\b", RegexOptions.IgnoreCase).Replace(word, match => match.Value.ToUpperInvariant()); } private static string capitaliseFirstLetter(string word) { return char.ToUpper(word[0]) + word.Substring(1).ToLower(); } }