在你之前的六个问题中,你似乎将这个不好的字典作为一种文本索引对象使用。为什么不将它变成一个适当的类呢?
from collections import Counter
textfreq = {
'I': 1, 'heaven': 1, 'filled': 1, 'their': 1, 'termed': 1, 'of': 4,
'And': 3, 'parts': 1, 'neer': 1, 'to': 2, 'song': 1, 'poets': 1,
'The': 1, 'a': 2, 'were': 2, 'verse': 1, 'your': 6, 'knows': 1,
'not': 1, 'half': 1, 'number': 1, 'but': 1, 'yours': 1, 'come': 2,
'rage': 1, 'age': 2, 'Though': 1, 'men': 1, 'fresh': 1, 'heavenly': 1,
'say': 1, 'alive': 1, 'truth': 1, 'this': 1, 'If': 2, 'than': 1,
'old': 1, 'believe': 1, 'Which': 1, 'that': 1, 'You': 1, 'faces': 1,
'yet': 1, 'poet': 1, 'in': 4, 'life': 1, 'most': 1, 'earthly': 1,
'will': 1, 'hides': 1, 'my': 3, 'papers': 1, 'is': 1, 'stretched': 1,
'rights': 1, 'eyes': 1, 'it': 3, 'yellowed': 1, 'Such': 1, 'So': 1,
'all': 1, 'lies': 1, 'the': 1, 'an': 1, 'as': 1, 'write': 1,
'child': 1, 'deserts': 1, 'shows': 1, 'tongue': 1, 'twice': 1,
'Be': 1, 'high': 1, 'some': 1, 'could': 1, 'should': 2, 'and': 2,
'touched': 1, 'like': 1, 'would': 1, 'Who': 1, 'tomb': 1, 'numbers': 1,
'antique': 1, 'scorned': 1, 'metre': 1, 'time': 2, 'touches': 1,
'be': 1, 'with': 2, 'true': 1, 'beauty': 1, 'rhyme': 1, 'less': 1,
'But': 1, 'graces': 1, 'live': 1
}
class TextStats():
def __init__(self, text=''):
if hasattr(text, 'wordfreq'):
self.wordfreq = Counter(text.wordfreq)
elif hasattr(text, 'keys'):
self.wordfreq = Counter(text)
else:
self.wordfreq = Counter(w for w in text.lower().split() if w)
@classmethod
def from_file(cls, fname):
with open(fname) as inf:
text = ' '.join(line.strip() for line in inf.readlines())
return cls(text.translate(None, '`~!@#$\'"'))
def __add__(self, otherTextStats):
return TextStats(self.wordfreq + otherTextStats.wordfreq)
def __str__(self):
return(
"Count: {}\n"
"Average len: {:0.4f}\n"
"Shortest: {}\n"
"Most common: {}\n"
"Longest: {}\n".format(
self.total_words,
self.average_word_length,
self.shortest_words,
self.most_common_words,
self.longest_words
)
)
@property
def unique_words(self):
return len(self.wordfreq)
@property
def total_words(self):
return sum(self.wordfreq.values())
@property
def total_letters(self):
return sum(len(w)*c for w,c in self.wordfreq.items())
@property
def average_word_length(self):
return float(self.total_letters) / self.total_words
@property
def shortest_words(self):
minlen = len(min(self.wordfreq, key=len))
return sorted(w for w in self.wordfreq if len(w)==minlen)
@property
def most_common_words(self):
most_common = self.wordfreq.most_common()
howmany = most_common[0][1] if most_common else 0
return sorted(w for w,c in most_common if c == howmany)
@property
def longest_words(self):
maxlen = len(max(self.wordfreq, key=len))
return sorted(w for w in self.wordfreq if len(w)==maxlen)
def main():
t = TextStats(textfreq)
u = TextStats.from_file('corpus.txt')
v = t + u
print(t)
print()
print(u)
print()
print(v)
if __name__=="__main__":
main()