最终,我想找到英语字典中包含至少三个字母的子词最多的单词。我编写了这个算法,但速度太慢,不够实用。想知道如何进行优化。
def subWords(word):
return set((word[0:i] for i in range(2, len(word)+1))) #returns all subWords of length 2 or greater
def checkDict(wordList, dictList):
return set((word for word in wordList if word in dictList))
def main():
dictList = [i.strip() for i in open('wordlist.txt').readlines()]
allwords = list()
maximum = (0, list())
for dictWords in dictList:
for i in range (len(dictWords)):
for a in checkDict(subWords(dictWords[i: len(dictWords) + 1]), dictList):
allwords.append(a)
if len(allwords) > maximum[0]:
maximum = (len(allwords), allwords)
print maximum
allwords = list()
print maximum
main()