本文介绍了TextBlob的使用方法,这是一个用Python编写的开源的文本处理库。它可以用来执行很多自然语言处理的任务,比如,词性标注,名词性成分提取,情感分析,文本翻译,等等。
简介
官方文档:https://textblob.readthedocs.io/en/dev/
# 安装:pip install textblob
# 配置国内源安装:pip install textblob -i https://pypi.tuna.tsinghua.edu.cn/simple
# 参考:https://textblob.readthedocs.io/en/dev/quickstart.html
from textblob import TextBlob
text = 'I love natural language processing! I am not like fish!'
blob = TextBlob(text)
blob.tags
[('I', 'PRP'),
('love', 'VBP'),
('natural', 'JJ'),
('language', 'NN'),
('processing', 'NN'),
('I', 'PRP'),
('am', 'VBP'),
('not', 'RB'),
('like', 'IN'),
('fish', 'NN')]
np = blob.noun_phrases
for w in np:
print(w)
natural language processing
for sentence in blob.sentences:
print(sentence + '------>' + str(sentence.sentiment.polarity))
I love natural language processing!------>0.3125
i am not like you!------>0.0
token = blob.words
for w in token:
print(w)
I
love
natural
language
processing
I
am
not
like
fish
sentence = blob.sentences
for s in sentence:
print(s)
I love natural language processing!
I am not like fish!
token = blob.words
for w in token:
# 变复数
print(w.pluralize())
# 变单数
print(w.singularize())
we
I
love
love
naturals
natural
languages
language
processings
processing
we
I
ams
am
nots
not
likes
like
fish
fish
from textblob import Word
w = Word('went')
print(w.lemmatize('v'))
w = Word('octopi')
print(w.lemmatize())
go
octopus
from textblob.wordnet import VERB
word = Word('octopus')
syn_word = word.synsets
for syn in syn_word:
print(syn)
Synset('octopus.n.01')
Synset('octopus.n.02')
# 指定返回的同义词集为动词
syn_word1 = Word("hack").get_synsets(pos=VERB)
for syn in syn_word1:
print(syn)
Synset('chop.v.05')
Synset('hack.v.02')
Synset('hack.v.03')
Synset('hack.v.04')
Synset('hack.v.05')
Synset('hack.v.06')
Synset('hack.v.07')
Synset('hack.v.08')
# 查看synset(同义词集)的具体定义
Word("beautiful").definitions
['delighting the senses or exciting intellectual or emotional admiration',
'(of weather) highly enjoyable']
sen = 'I lvoe naturl language processing!'
sen = TextBlob(sen)
print(sen.correct())
I love nature language processing!
# Word.spellcheck()返回拼写建议以及置信度
w1 = Word('good')
w2 = Word('god')
w3 = Word('gd')
print(w1.spellcheck())
print(w2.spellcheck())
print(w3.spellcheck())
[('good', 1.0)]
[('god', 1.0)]
[('go', 0.586139896373057), ('god', 0.23510362694300518), ('d', 0.11658031088082901), ('g', 0.03626943005181347), ('ed', 0.009067357512953367), ('rd', 0.006476683937823834), ('nd', 0.0038860103626943004), ('gr', 0.0025906735751295338), ('sd', 0.0006476683937823834), ('md', 0.0006476683937823834), ('id', 0.0006476683937823834), ('gdp', 0.0006476683937823834), ('ga', 0.0006476683937823834), ('ad', 0.0006476683937823834)]
text = TextBlob('I lvoe naturl language processing!')
print(text.parse())
I/PRP/B-NP/O lvoe/NN/I-NP/O naturl/NN/I-NP/O language/NN/I-NP/O processing/NN/I-NP/O !/./O/O
text = TextBlob('I lvoe naturl language processing!')
print(text.ngrams(n=2))
[WordList(['I', 'lvoe']), WordList(['lvoe', 'naturl']), WordList(['naturl', 'language']), WordList(['language', 'processing'])]
# 一个使用TextBlob进行Naive Bayes classifier
# 参考:https://textblob.readthedocs.io/en/dev/classifiers.html#classifiers
# 1.准备数据集:训练集和测试集
train = [
... ('I love this sandwich.', 'pos'),
... ('this is an amazing place!', 'pos'),
... ('I feel very good about these beers.', 'pos'),
... ('this is my best work.', 'pos'),
... ("what an awesome view", 'pos'),
... ('I do not like this restaurant', 'neg'),
... ('I am tired of this stuff.', 'neg'),
... ("I can't deal with this", 'neg'),
... ('he is my sworn enemy!', 'neg'),
... ('my boss is horrible.', 'neg')
... ]
test = [
... ('the beer was good.', 'pos'),
... ('I do not enjoy my job', 'neg'),
... ("I ain't feeling dandy today.", 'neg'),
... ("I feel amazing!", 'pos'),
... ('Gary is a friend of mine.', 'pos'),
... ("I can't believe I'm doing this.", 'neg')
... ]
# 2.创建朴素贝叶斯分类器
from textblob.classifiers import NaiveBayesClassifier
# 3.把训练丢进去训练
nb_model = NaiveBayesClassifier(train)
# 4.预测新来的样本
dev_sen = "This is an amazing library!"
print(nb_model.classify(dev_sen))
pos
# 也可以计算属于某一类的概率
dev_sen_prob = nb_model.prob_classify(dev_sen)
print(dev_sen_prob.prob("pos"))
0.980117820324005
# 5.计算模型在测试集上的精确度
print(nb_model.accuracy(test))
0.8333333333333334
— 完 —
扫码关注人工智能头条 围观一个假的 AI