在列表中查找邻居

4

我有一个列表:

l=['a','>>','b','>>','d','e','f','g','>>','i','>>','>>','j','k','l','>>','>>']

我需要提取所有'>>'的邻居,并将它们分成组,其中它们之间有元素既不是'>>'也不是'>>'的邻居。
对于示例列表,预期结果如下:
[['a', 'b', 'd'], ['g', 'i', 'j'], ['l']]

我尝试过很多方法,但所有简单的方法都或多或少地失败了。目前似乎唯一有效的代码是这个:

def func(L,N):
    outer=[]
    inner=[]
    for i,e in enumerate(L):
        if e!=N:
            try:
                if L[i-1]==N or L[i+1]==N:
                    inner.append(e)
                elif len(inner)>0:
                    outer.append(inner)
                    inner=[] 
            except IndexError:
                pass
    if len(inner):
        outer.append(inner)
    return outer

func(l,'>>')

Out[196]:
[['a', 'b', 'd'], ['g', 'i', 'j'], ['l']]

虽然它似乎可以工作,但我正在想是否有更好、更清晰的方法来实现它?

6个回答

2

这里有一个替代方案:

import itertools

def func(L, N):
    def key(i_e):
        i, e = i_e
        return e == N or (i > 0 and L[i-1] == N) or (i < len(L) and L[i+1] == N)
    outer = []
    for k, g in itertools.groupby(enumerate(L), key):
        if k:
            outer.append([e for i, e in g if e != N])
    return outer

或者使用嵌套列表推导式的等效版本:

def func(L, N):
    def key(i_e):
        i, e = i_e
        return e == N or (i > 0 and L[i-1] == N) or (i < len(L) and L[i+1] == N)
    return [[e for i, e in g if e != N] 
                for k, g in itertools.groupby(enumerate(L), key) if k]

+1 更加简洁,尽管看起来比原来的慢了大约两倍。 - root

2
你可以简化它,就像这样。
l = ['']+l+['']
stack = []
connected = last_connected = False
for i, item in enumerate(l):
    if item in ['','>>']: continue
    connected = l[i-1] == '>>' or  l[i+1] == '>>'
    if connected:
        if not last_connected:
            stack.append([])
        stack[-1].append(item)
    last_connected = connected

2
我认为最符合Python风格且易于阅读的解决方案应该是这样的:

import itertools

def neighbours(items, fill=None):
    """Yeild the elements with their neighbours as (before, element, after).

    neighbours([1, 2, 3]) --> (None, 1, 2), (1, 2, 3), (2, 3, None)

    """
    before = itertools.chain([fill], items)
    after = itertools.chain(items, [fill]) #You could use itertools.zip_longest() later instead.
    next(after)
    return zip(before, items, after)

def split_not_neighbour(seq, mark):
    """Split the sequence on each item where the item is not the mark, or next
    to the mark.

    split_not_neighbour([1, 0, 2, 3, 4, 5, 0], 0) --> (1, 2), (5)

    """
    output = []
    for items in neighbours(seq):
        if mark in items:
            _, item, _ = items
            if item != mark:
                output.append(item)
        else:
            if output:
                yield output
                output = []
    if output:
        yield output

我们可以这样使用它:
>>> l = ['a', '>>', 'b', '>>', 'd', 'e', 'f', 'g', '>>', 'i', '>>', '>>',
...      'j', 'k', 'l', '>>', '>>']
>>> print(list(split_not_neighbour(l, ">>")))
[['a', 'b', 'd'], ['g', 'i', 'j'], ['l']]

请注意巧妙地避免了任何直接索引。
编辑:更优雅的版本。
def split_not_neighbour(seq, mark):
    """Split the sequence on each item where the item is not the mark, or next
    to the mark.

    split_not_neighbour([1, 0, 2, 3, 4, 5, 0], 0) --> (1, 2), (5)

    """
    neighboured = neighbours(seq)
    for _, items in itertools.groupby(neighboured, key=lambda x: mark not in x):
        yield [item for _, item, _ in items if item != mark]

+1,特别是对于整洁的避免方式,但它似乎比原来慢了约2倍。 - root
1
@root 速度是否很重要?除非已经证明是瓶颈,否则应始终优先考虑代码可读性而不是速度。 - Gareth Latty
我同意,实际上我只是出于好奇尝试了第一个答案。 - root
+1 个漂亮的解决方案。我确实注意到了巧妙的避免 :) - Lukas Graf

0

我天真的尝试

things = (''.join(l)).split('>>')

output = []
inner = []

for i in things:
    if not i:
        continue
    i_len = len(i)
    if i_len == 1:
        inner.append(i)
    elif i_len > 1:
        inner.append(i[0])
        output.append(inner)
        inner = [i[-1]]

output.append(inner)
print output # [['a', 'b', 'd'], ['g', 'i', 'j'], ['l']] 

0

类似这样:

l=['a','>>','b','>>','d','e','f','g','>>','i','>>','>>','j','k','l','>>','>>']
l= filter(None,"".join(l).split(">>"))
lis=[]
for i,x in enumerate(l):
    if len(x)==1:
        if len(lis)!=0:
            lis[-1].append(x[0])
        else:
            lis.append([])
            lis[-1].append(x[0])
    else:
        if len(lis)!=0:
            lis[-1].append(x[0])
            lis.append([])
            lis[-1].append(x[-1])
        else:
            lis.append([])    
            lis[-1].append(x[0])
            lis.append([])
            lis[-1].append(x[-1])

print lis

输出:

[['a', 'b', 'd'], ['g', 'i', 'j'], ['l']]

或者:

l=['a','>>','b','>>','d','e','f','g','>>','i','>>','>>','j','k','l','>>','>>']
l= filter(None,"".join(l).split(">>"))
lis=[[] for _ in range(len([1 for x in l if len(x)>1])+1)]
for i,x in enumerate(l):
    if len(x)==1:
        for y in reversed(lis):
            if len(y)!=0:
                y.append(x)
                break
        else:
            lis[0].append(x)
    else:
        if not all(len(x)==0 for x in lis):
            for y in reversed(lis):
                if len(y)!=0:
                    y.append(x[0])
                    break
            for y in lis:
                if len(y)==0:
                    y.append(x[-1])
                    break    
        else:
            lis[0].append(x[0])
            lis[1].append(x[-1])

print lis

输出:

[['a', 'b', 'd'], ['g', 'i', 'j'], ['l']]

0

另一种使用原始列表叠加的方法

import copy

lis_dup = copy.deepcopy(lis)

lis_dup.insert(0,'')
prev_in = 0
tmp=[]
res = []

for (x,y) in zip(lis,lis_dup):
    if '>>' in (x,y):
        if y!='>>' :
            if y not in tmp:
                tmp.append(y)
        elif x!='>>':
            if x not in tmp:
                print 'x is ' ,x
                tmp.append(x)
        else:
            if prev_in ==1:
                res.append(tmp)
                prev_in =0
                tmp = []
        prev_in  = 1
    else:
        if prev_in == 1:
            res.append(tmp)
            prev_in =0
            tmp = []
res.append(tmp)

print res

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接