(我是在JavaScript环境下编写这篇文章的,但我会接受任何语言中算法正确的答案)
如何查找字符串数组中每个元素的最短子串,其中该子串不包含在其他元素中(忽略大小写)?
假设我有一个输入数组,例如:
var names = ["Anne", "Anthony", "LouAnn", "Kant", "Louise", "ark"];
输出应该类似于:
var uniqueNames = ["ne", "h", "ua", "ka", "i", "r"];
就我的目的而言,你可以安全地假设没有元素完全包含在另一个元素中。
我的想法:
看起来可以采用暴力方法,按照以下方式进行:
var names = ["Anne", "Anthony", "LouAnn", "Kant", "Louise", "ark"];
var uniqueNames = [], nameInd, windowSize, substrInd, substr, otherNameInd, foundMatch;
// For each name
for (nameInd = 0; nameInd < names.length; nameInd++)
{
var name = names[nameInd];
// For each possible substring length
windowLoop:
for (windowSize = 1; windowSize <= name.length; windowSize++)
{
// For each starting index of a substring
for (substrInd = 0; substrInd <= name.length-windowSize; substrInd++)
{
substr = name.substring(substrInd,substrInd+windowSize).toLowerCase();
foundMatch = false;
// For each other name
for (otherNameInd = 0; otherNameInd < names.length; otherNameInd++)
{
if (nameInd != otherNameInd && names[otherNameInd].toLowerCase().indexOf(substr) > -1)
{
foundMatch = true;
break;
}
}
if (!foundMatch)
{
// This substr works!
uniqueNames[nameInd] = substr;
break windowLoop;
}
}
}
}
但我想象中应该有一种更优雅的解决方案,可以使用字典树/前缀树、后缀数组或像那样有趣的东西。
编辑: 我认为以下是JavaScript中所选答案的程序形式:
var names = ["Anne", "Anthony", "LouAnn", "Kant", "Louise", "ark"];
var uniqueNames = [], permutations = {}, permutation, nameInd, windowSize, substrInd, substr;
// For each name
for (nameInd = 0; nameInd < names.length; nameInd++)
{
var name = names[nameInd];
// For each possible substring length
windowLoop:
for (windowSize = 1; windowSize <= name.length; windowSize++)
{
// For each starting index of a substring
for (substrInd = 0; substrInd <= name.length-windowSize; substrInd++)
{
substr = name.substring(substrInd,substrInd+windowSize).toLowerCase();
permutations[substr] = (typeof permutations[substr] === "undefined")?nameInd:-1;
}
}
}
for (substr in permutations)
{
permutation = permutations[substr];
if (permutation !== -1 && ((typeof uniqueNames[permutation] === "string" && substr.length < uniqueNames[permutation].length) || typeof uniqueNames[permutation] === "undefined"))
{
uniqueNames[permutation] = substr;
}
}
s
和y
,但是我看到了i、h
和r
... - Icaruss
和y
之所以不在其中,只是因为我不是在寻找符合条件的所有最小子字符串,而是任何一个都可以。如果有人能够返回一个包含它们所有的二维数组作为答案,我也会接受,但我并不需要那么详细的信息。同样有效的输出可能是`var uniqueNames = ["ne", "y", "ua", "ka", "i", "s"];`
- Patrick