1 2 3 4 5 6 7 8 9 10
| from nltk.book import * from nltk.util import bigrams
print(list(bigrams(['more','is','said','than','done']))) a=text2.collocation_list(num=20) # Print collocations derived from the text, ignoring stopwords. print(a) # 收集最常见的词组
- - - - - - - - - - [('more', 'is'), ('is', 'said'), ('said', 'than'), ('than', 'done')] ['Colonel Brandon', 'Sir John', 'Lady Middleton', 'Miss Dashwood', 'every thing', 'thousand pounds', 'dare say', 'Miss Steeles', 'said Elinor', 'Miss Steele', 'every body', 'John Dashwood', 'great deal', 'Harley Street', 'Berkeley Street', 'Miss Dashwoods', 'young man', 'Combe Magna', 'every day', 'next morning']
|