我有4个字符串:
"h:/a/b/c"
"h:/a/b/d"
"h:/a/b/e"
"h:/a/c"
我想找出这些字符串的公共前缀,例如"h:/a"
。如何找到?通常我会使用分隔符
'/'
将字符串拆分并放入另一个列表中等等。有更好的方法吗?
我有4个字符串:
"h:/a/b/c"
"h:/a/b/d"
"h:/a/b/e"
"h:/a/c"
我想找出这些字符串的公共前缀,例如"h:/a"
。如何找到?'/'
将字符串拆分并放入另一个列表中等等。string[] xs = new[] { "h:/a/b/c", "h:/a/b/d", "h:/a/b/e", "h:/a/c" };
string x = string.Join("/", xs.Select(s => s.Split('/').AsEnumerable())
.Transpose()
.TakeWhile(s => s.All(d => d == s.First()))
.Select(s => s.First()));
使用
public static IEnumerable<IEnumerable<T>> Transpose<T>(
this IEnumerable<IEnumerable<T>> source)
{
var enumerators = source.Select(e => e.GetEnumerator()).ToArray();
try
{
while (enumerators.All(e => e.MoveNext()))
{
yield return enumerators.Select(e => e.Current).ToArray();
}
}
finally
{
Array.ForEach(enumerators, e => e.Dispose());
}
}
var samples = new[] { "h:/a/b/c", "h:/a/b/d", "h:/a/b/e", "h:/a/e" };
var commonPrefix = new string(samples.MinBy(s => s.Length)
.TakeWhile((c, i) => samples.All(s => s[i] == c)).ToArray());
对于 .NET 5 及更早版本,请使用以下内容:
var samples = new[] { "h:/a/b/c", "h:/a/b/d", "h:/a/b/e", "h:/a/e" };
var commonPrefix = new string(
samples.First().Substring(0, samples.Min(s => s.Length))
.TakeWhile((c, i) => samples.All(s => s[i] == c)).ToArray());
.First().Substring(0, samples.Min(s => s.Length))
可以被替换为 .Min()
。 - Alex Telon.Min()
并不一定会得到最短的字符串。{"aa", "b"}.Min()
返回"aa",因为"a"比"b"更低/更早地排序。 - Niko Os[i]
处不会超出范围的字符串。Alex聪明的Min()
建议满足了这个条件,使代码更加简洁。话虽如此,寻找最小的字符串比寻找最短的字符串要更昂贵。在.NET 6中,我会选择MinBy(s => s.Length)
。 - Yegor只需循环遍历最短字符串的字符,并将每个字符与其他字符串中相同位置的字符进行比较。只要它们都匹配,就继续向下执行。一旦出现不匹配的字符,则当前位置之前的字符串即为答案。
类似于以下的伪代码:
int count=0;
foreach(char c in shortestString)
{
foreach(string s in otherStrings)
{
if (s[count]!=c)
{
return shortestString.SubString(0,count-1); //need to check count is not 0
}
}
count+=1;
}
return shortestString;
foreach
循环实际上包含了 shortestString
,否则在访问 sub[count]
时,每当 sub.Length == count
时,您将会得到一个 index out of bounds
异常 - 如果您无法保证这一点,请在您的方法中添加一个检查以提前返回... 在内部 foreach 循环中加入 if (sub.Length == count) return parent.Substring(0, count);
。 - KyleMit基于Sam Holder的解决方案编写的工作代码(请注意,它给出的是h:/a/而不是问题中最长的公共初始子串h:/a):
using System;
namespace CommonPrefix
{
class Program
{
static void Main(string[] args)
{
Console.WriteLine(CommonPrefix(new[] { "h:/a/b/c", "h:/a/b/d", "h:/a/b/e", "h:/a/c" })); // "h:/a/"
Console.WriteLine(CommonPrefix(new[] { "abc", "abc" })); // "abc"
Console.WriteLine(CommonPrefix(new[] { "abc" })); // "abc"
Console.WriteLine(CommonPrefix(new string[] { })); // ""
Console.WriteLine(CommonPrefix(new[] { "a", "abc" })); // "a"
Console.WriteLine(CommonPrefix(new[] { "abc", "a" })); // "a"
Console.ReadKey();
}
private static string CommonPrefix(string[] ss)
{
if (ss.Length == 0)
{
return "";
}
if (ss.Length == 1)
{
return ss[0];
}
int prefixLength = 0;
foreach (char c in ss[0])
{
foreach (string s in ss)
{
if (s.Length <= prefixLength || s[prefixLength] != c)
{
return ss[0].Substring(0, prefixLength);
}
}
prefixLength++;
}
return ss[0]; // all strings identical up to length of ss[0]
}
}
}
using System;
using System.Collections;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
public class StringIndex
{
private Dictionary<char, Item> _rootChars;
public StringIndex()
{
_rootChars = new Dictionary<char, Item>();
}
public void Add(string value, string data)
{
int level = 0;
Dictionary<char, Item> currentChars = _rootChars;
Item currentItem = null;
foreach (char c in value)
{
if (currentChars.ContainsKey(c))
{
currentItem = currentChars[c];
}
else
{
currentItem = new Item() { Level = level, Letter = c };
currentChars.Add(c, currentItem);
}
currentChars = currentItem.Items;
level++;
}
if (!currentItem.Values.Contains(data))
{
currentItem.Values.Add(data);
IncrementCount(value);
}
}
private void IncrementCount(string value)
{
Dictionary<char, Item> currentChars = _rootChars;
Item currentItem = null;
foreach (char c in value)
{
currentItem = currentChars[c];
currentItem.Total++;
currentChars = currentItem.Items;
}
}
public void Remove(string value, string data)
{
Dictionary<char, Item> currentChars = _rootChars;
Dictionary<char, Item> parentChars = null;
Item currentItem = null;
foreach (char c in value)
{
if (currentChars.ContainsKey(c))
{
currentItem = currentChars[c];
parentChars = currentChars;
currentChars = currentItem.Items;
}
else
{
return; // no matches found
}
}
if (currentItem.Values.Contains(data))
{
currentItem.Values.Remove(data);
DecrementCount(value);
if (currentItem.Total == 0)
{
parentChars.Remove(currentItem.Letter);
}
}
}
private void DecrementCount(string value)
{
Dictionary<char, Item> currentChars = _rootChars;
Item currentItem = null;
foreach (char c in value)
{
currentItem = currentChars[c];
currentItem.Total--;
currentChars = currentItem.Items;
}
}
public void Clear()
{
_rootChars.Clear();
}
public int GetValuesByPrefixCount(string prefix)
{
int valuescount = 0;
int level = 0;
Dictionary<char, Item> currentChars = _rootChars;
Item currentItem = null;
foreach (char c in prefix)
{
if (currentChars.ContainsKey(c))
{
currentItem = currentChars[c];
currentChars = currentItem.Items;
}
else
{
return valuescount; // no matches found
}
level++;
}
valuescount = currentItem.Total;
return valuescount;
}
public HashSet<string> GetValuesByPrefixFlattened(string prefix)
{
var results = GetValuesByPrefix(prefix);
return new HashSet<string>(results.SelectMany(x => x));
}
public List<HashSet<string>> GetValuesByPrefix(string prefix)
{
var values = new List<HashSet<string>>();
int level = 0;
Dictionary<char, Item> currentChars = _rootChars;
Item currentItem = null;
foreach (char c in prefix)
{
if (currentChars.ContainsKey(c))
{
currentItem = currentChars[c];
currentChars = currentItem.Items;
}
else
{
return values; // no matches found
}
level++;
}
ExtractValues(values, currentItem);
return values;
}
public void ExtractValues(List<HashSet<string>> values, Item item)
{
foreach (Item subitem in item.Items.Values)
{
ExtractValues(values, subitem);
}
values.Add(item.Values);
}
public class Item
{
public int Level { get; set; }
public char Letter { get; set; }
public int Total { get; set; }
public HashSet<string> Values { get; set; }
public Dictionary<char, Item> Items { get; set; }
public Item()
{
Values = new HashSet<string>();
Items = new Dictionary<char, Item>();
}
}
}
using System;
using Microsoft.VisualStudio.TestTools.UnitTesting;
[TestClass]
public class StringIndexTest
{
[TestMethod]
public void AddAndSearchValues()
{
var si = new StringIndex();
si.Add("abcdef", "1");
si.Add("abcdeff", "2");
si.Add("abcdeffg", "3");
si.Add("bcdef", "4");
si.Add("bcdefg", "5");
si.Add("cdefg", "6");
si.Add("cdefgh", "7");
var output = si.GetValuesByPrefixFlattened("abc");
Assert.IsTrue(output.Contains("1") && output.Contains("2") && output.Contains("3"));
}
[TestMethod]
public void RemoveAndSearch()
{
var si = new StringIndex();
si.Add("abcdef", "1");
si.Add("abcdeff", "2");
si.Add("abcdeffg", "3");
si.Add("bcdef", "4");
si.Add("bcdefg", "5");
si.Add("cdefg", "6");
si.Add("cdefgh", "7");
si.Remove("abcdef", "1");
var output = si.GetValuesByPrefixFlattened("abc");
Assert.IsTrue(!output.Contains("1") && output.Contains("2") && output.Contains("3"));
}
[TestMethod]
public void Clear()
{
var si = new StringIndex();
si.Add("abcdef", "1");
si.Add("abcdeff", "2");
si.Add("abcdeffg", "3");
si.Add("bcdef", "4");
si.Add("bcdefg", "5");
si.Add("cdefg", "6");
si.Add("cdefgh", "7");
si.Clear();
var output = si.GetValuesByPrefix("abc");
Assert.IsTrue(output.Count == 0);
}
[TestMethod]
public void AddAndSearchValuesCount()
{
var si = new StringIndex();
si.Add("abcdef", "1");
si.Add("abcdeff", "2");
si.Add("abcdeffg", "3");
si.Add("bcdef", "4");
si.Add("bcdefg", "5");
si.Add("cdefg", "6");
si.Add("cdefgh", "7");
si.Remove("cdefgh", "7");
var output1 = si.GetValuesByPrefixCount("abc");
var output2 = si.GetValuesByPrefixCount("b");
var output3 = si.GetValuesByPrefixCount("bc");
var output4 = si.GetValuesByPrefixCount("ca");
Assert.IsTrue(output1 == 3 && output2 == 2 && output3 == 2 && output4 == 0);
}
}
public class CommonStringPrefix
{
public static string Of(IEnumerable<string> strings)
{
var commonPrefix = strings.FirstOrDefault() ?? "";
foreach(var s in strings)
{
var potentialMatchLength = Math.Min(s.Length, commonPrefix.Length);
if (potentialMatchLength < commonPrefix.Length)
commonPrefix = commonPrefix.Substring(0, potentialMatchLength);
for(var i = 0; i < potentialMatchLength; i++)
{
if (s[i] != commonPrefix[i])
{
commonPrefix = commonPrefix.Substring(0, i);
break;
}
}
}
return commonPrefix;
}
}
public string LongestCommonPrefix(string[] strs)
{
return strs.Aggregate((seed,z) => string.Join("",seed.TakeWhile((v,i)=> z.Length > i && v == z[i])));
}
var str_array = new string[]{"h:/a/b/c",
"h:/a/b/d",
"h:/a/b/e",
"h:/a/c"};
var separator = '/';
// get longest common prefix (optinally use ToLowerInvariant)
var ret = str_array.Any()
? str_array.First().TakeWhile((s,i) =>
str_array.All(e =>
Char.ToLowerInvariant(s) == Char.ToLowerInvariant(e.Skip(i).Take(1).SingleOrDefault())))
: String.Empty;
// remove last character if it's a separator (optional)
if (ret.LastOrDefault() == separator)
ret = ret.Take(ret.Count() -1);
string prefix = new String(ret.ToArray());
/// <summary>
/// Resolves a common base Uri from a list of Uri strings. Ignores case. Includes the last slash
/// </summary>
/// <param name="collectionOfUriStrings"></param>
/// <returns>Common root in lowercase</returns>
public static string GetCommonUri(this ICollection<string> collectionOfUriStrings)
{
//Check that collection contains entries
if (!collectionOfUriStrings.Any())
return string.Empty;
//Check that the first is no zero length
var firstUri = collectionOfUriStrings.FirstOrDefault();
if(string.IsNullOrEmpty(firstUri))
return string.Empty;
//set starting position to be passed '://'
int previousSlashPos = firstUri.IndexOf("://", StringComparison.OrdinalIgnoreCase) + 2;
int minPos = previousSlashPos + 1; //results return must have a slash after this initial position
int nextSlashPos = firstUri.IndexOf("/", previousSlashPos + 1, StringComparison.OrdinalIgnoreCase);
//check if any slashes
if (nextSlashPos == -1)
return string.Empty;
do
{
string common = firstUri.Substring(0, nextSlashPos + 1);
//check against whole collection
foreach (var collectionOfUriString in collectionOfUriStrings)
{
if (!collectionOfUriString.StartsWith(common, StringComparison.OrdinalIgnoreCase))
{
//return as soon as a mismatch is found
return previousSlashPos > minPos ? firstUri.Substring(0, previousSlashPos + 1).ToLower() : string.Empty ;
}
}
previousSlashPos = nextSlashPos;
nextSlashPos = firstUri.IndexOf("/", previousSlashPos + 1, StringComparison.OrdinalIgnoreCase);
} while (nextSlashPos != -1);
return previousSlashPos > minPos ? firstUri.Substring(0, previousSlashPos + 1).ToLower() : string.Empty;
}
h:/a/
。 - user102008