如何从多个数组中查找元素的序列

5

我有一个庞大的列表数组,每个数组可能都有一些重复的序列,我需要找到这些重复的序列(在不同数组中重复出现的元素数量超过1)

在下面的例子中,数组1和数组2具有元素序列010,002,007,而数组2和数组3则共同具有011,345,547,800

array1: 090,010,002,007,310,104,048,610,720    
array2: 456,010,002,007,087,011,345,547,800    
array3: 004,089,870,011,345,547,800,001,002

什么是最好的方法来做到这一点,我知道我们可以用纯C#来实现,但需要很多循环和编程。是否有算法可以识别任何数组中连续的超过1个元素相同的情况?


你需要找到数组中所有匹配的元素,对吗? - Aousaf Rashid
你的示例输出是什么? - Daniel Tshuva
3个回答

0
首先,你应该尝试将数组映射为图形。 我们将定义以下类。
    private class Transition
    {
        public int ArrayNum { get; set; }
        public int? NextNum { get; set; }
    }

假设我们有以下数组

var arrays = new[]
{
      new[] {090, 010, 002, 007, 310, 104, 048, 610, 720},
      new[] {456, 010, 002, 007, 087, 011, 345, 547, 800},
      new[] {004, 089, 870, 011, 345, 547, 800, 001, 002}
};

我们将按照以下方式映射转换:
var transitions = new Dictionary<int, List<Transition>>();

for (var i = 0; i < arrays.Length; i++)
{
    for (var j = 0; j < arrays[i].Length; j++)
    {
        var num = arrays[i][j];
        var transition = new Transition { ArrayNum = i, NextNum = j < arrays[i].Length - 1 ? arrays[i][j + 1] : (int?)null };
        if (!transitions.ContainsKey(num))
        {
            transitions.Add(num, new List<Transition> {transition});
        }
        else
        {
            transitions[num].Add(transition);
        }

    }
}

transitions = transitions.OrderBy(x => x.Key).ToDictionary(x => x.Key, x => x.Value);

在完成映射后,我们将只遍历每个数组一次,找到它们相关的序列:

private class Sequence
{
    public int FirstArray { get; set; }
    public int SecondArray { get; set; }
    public IList<int> Nums { get; set; }
}



var sequences = new List<Sequence>();
for (var i = 0; i < arrays.Length-1; i++)
{
    List<Sequence> relevantSequences = null;
    for (var j = 0; j < arrays[i].Length - 1; j++)
    {
        var num = arrays[i][j];
        var nextNum = arrays[i][j+1];
        var flows = transitions[num].Where(x => x.NextNum == nextNum && x.ArrayNum > i).ToList();
        if (!flows.Any())
        {
            if (relevantSequences != null)
            {
                sequences.AddRange(relevantSequences);
                relevantSequences = null;
            }
        }
        else 
        {
            if (relevantSequences == null)
            {
                relevantSequences = flows.Select(x => new Sequence { FirstArray = i, SecondArray = x.ArrayNum, Nums = new List<int> {num, nextNum} }).ToList();
            }
            else
            {
                foreach (var flow in flows)
                {
                    var sequence = relevantSequences.SingleOrDefault(x => x.SecondArray == flow.ArrayNum);

                    if (sequence != null)
                    {
                        sequence.Nums.Add(nextNum);
                    }
                    else
                    {
                        relevantSequences.Add(new Sequence { FirstArray = i, SecondArray = flow.ArrayNum, Nums = new List<int> { num, nextNum } });
                    }
                }
            }
        }

    }

    if (relevantSequences != null)
        sequences.AddRange(relevantSequences);
}

希望你会觉得它有用。

0
如果我理解问题正确,您想知道2+个序列是否在所有数组中重复(可以是不同的顺序),例如:
array1: 000, 110
array2: 111, 011, 000
array3: 101, 000
共同的序列为:
1) 000
2) 110\011\101

为了简单/短的解决方案,我使用了字符串数组而不是整数:

static void Main(string[] args)
{
    var arr1 = new[] { "000", "110" };
    var arr2 = new[] { "111", "011", "000" };
    var arr3 = new[] { "101", "000" };

    var result = IsAllHaveMoreThen1SequnceDuplicate(new List<string[]> {arr1, arr2, arr3});
}

private static bool IsAllHaveMoreThen1SequnceDuplicate(List<string[]> arrays)
{
    if (arrays == null || arrays.Count == 0)
    {
        return false;
    }

    var firstArray = arrays[0];

    for (var i = 1; i < arrays.Count; i++)
    {
        var isHaveMoreThen1SequnceDuplicate = IsHaveMoreThen1SequnceDuplicate(firstArray, arrays[i]);
        if (!isHaveMoreThen1SequnceDuplicate)
        {
            return false;
        }
    }

    return true;
}

private static bool IsHaveMoreThen1SequnceDuplicate(string[] arr1, string[] arr2)
{
    var globalCounter = 0;
    foreach (var s1 in arr1)
    {
        foreach (var s2 in arr2)
        {
            var isSameSequence = IsSameSequence(s1, s2);
            if (isSameSequence)
            {
                globalCounter++;
                break;
            }
        }

        if (globalCounter == 2)
        {
            return true;
        }
    }

    return false;
}

private static bool IsSameSequence(string s1, string s2)
{
    if (s1 == s2)
    {
        return true;
    }

    if (s1.Length != s2.Length)
    {
        return false;
    }


    var flags = new int[9];
    for (int i = 0; i < s1.Length; i++)
    {
        var cInt = int.Parse(s1[i].ToString());
        flags[cInt]++;
    }

    for (int i = 0; i < s2.Length; i++)
    {
        var cInt = int.Parse(s2[i].ToString());
        flags[cInt]--;
    }

    return flags.All(f => f == 0);
}

@ Daniel Tshuva,第一个对于我的情况是正确的,“000”在所有中都是普遍的。但是你提到的第二个,例如110、011是不同的ID。实际上,所有这些ID都是我在SQL表中的唯一标识符。所以我想看看是否在不同的数组中重复出现了ID序列(每个数组都是我单独的成品,每个产品由小部件集合组成。因此,我想将零件集合分组为单个组,并在每个产品中引用相同的组)。 - Mathiyazhagan

0

这里有另一种解决方案。我假设序列至少包含两个数字。主要思路是先创建所有可能的序列,然后计算它们的总和。接着,我根据总和进行快速过滤,如果它们相同,则进行更详细的检查。

使用更合适的数据结构可以改进此代码。一些列表可以替换为字典等。

static int[][] arrays = new[]
{
    new[] {090, 010, 002, 007, 310, 104, 048, 610, 720},
    new[] {456, 010, 002, 007, 087, 011, 345, 547, 800},
    new[] {004, 089, 870, 011, 345, 547, 800, 001, 002}
};

static void Main()
{
    SequentedArray[] sequentedArrays = arrays.Select(array => new SequentedArray(array)).ToArray();

    foreach (var baseSequentedArray in sequentedArrays)
    {
        foreach (var comparisonSequentedArray in sequentedArrays)
        {
            if(baseSequentedArray == comparisonSequentedArray) continue;

            var sameSequences = baseSequentedArray.FindSameSequences(comparisonSequentedArray);
            foreach (var sameSequence in sameSequences)
            {
                Console.WriteLine(String.Join(", ", sameSequence.GetValues(baseSequentedArray.Array)));
            }
        }
    }
}

class SequentedArray
{
    public int[] Array { get; private set; }
    public List<Sequence> Sequences { get; private set; }

    public SequentedArray(int[] array)
    {
        Array = array;
        Sequences = new List<Sequence>();

        for (int endIndex = 1; endIndex < array.Length; endIndex++)
        {
            int sum = array[endIndex];
            for (int startIndex = endIndex - 1; startIndex >= 0; startIndex--)
            {
                sum += array[startIndex];
                Sequences.Add(new Sequence(startIndex, endIndex, sum));
            }
        }
    }

    public IEnumerable<Sequence> FindSameSequences(SequentedArray comparison)
    {
        var sameSequences = new List<Sequence>();
        foreach (var sequence in Sequences)
        {
            foreach (var comparisonSequence in comparison.Sequences)
            {
                if (sequence.Sum == comparisonSequence.Sum &&
                    sequence.GetValues(Array).SequenceEqual(comparisonSequence.GetValues(comparison.Array)))
                {
                    var smallerSequences = sameSequences.Where(existing => sequence.Covers(existing)).ToList();
                    foreach (var smallerSequence in smallerSequences)
                    {
                        sameSequences.Remove(smallerSequence);
                    }
                    sameSequences.Add(sequence);
                }
            }
        }

        return sameSequences;
    }
}

class Sequence
{
    public int StartIndex { get; private set; }
    public int EndIndex { get; private set; }
    public int Sum { get; private set; }

    public Sequence(int startIndex, int endIndex, int sum)
    {
        StartIndex = startIndex;
        EndIndex = endIndex;
        Sum = sum;
    }

    public IEnumerable<int> GetValues(int[] array)
    {
        for (int i = StartIndex; i <= EndIndex; i++)
        {
            yield return array[i];
        }
    }

    public bool Covers(Sequence comparison)
    {
        return StartIndex <= comparison.StartIndex && EndIndex >= comparison.EndIndex;
    }
}

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接