from nltk.corpus import stopwords
stop_words = {w.lower() for w in stopwords.words('english')}
sent = [('include', 'details', 'about', 'your', 'performance'),
('show', 'the', 'results,', 'which', 'you\'ve', 'got')]
如果您想创建一个不包含停用词的单词列表;
>>> no_stop_words = [word for sentence in sent for word in sentence if word not in stop_words]
['include', 'details', 'performance', 'show', 'results,', 'got']
如果您想保持句子的完整性;
>>> sent_no_stop = [[word for word in sentence if word not in stop_words] for sentence in sent]
[['include', 'details', 'performance'], ['show', 'results,', 'got']]
然而,大多数情况下,您将使用一个单词列表(不含括号)。
sent = ['include', 'details', 'about', 'your performance','show', 'the', 'results,', 'which', 'you\'ve', 'got']
>>> no_stopwords = [word for word in sent if word not in stop_words]
['include', 'details', 'performance', 'show', 'results,', 'got']