mirror of
https://github.com/simon987/hexlib.git
synced 2025-04-19 17:56:43 +00:00
ignore log in text
This commit is contained in:
parent
d895ac837e
commit
18cd59fc4a
@ -1,5 +1,4 @@
|
|||||||
import nltk.corpus
|
import nltk.corpus
|
||||||
from hexlib.misc import silent_stderr
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
from nltk.corpus import stopwords
|
from nltk.corpus import stopwords
|
||||||
from nltk.stem import WordNetLemmatizer
|
from nltk.stem import WordNetLemmatizer
|
||||||
@ -10,9 +9,8 @@ get_text = etree.XPath("//text()")
|
|||||||
|
|
||||||
stop_words_en = set(stopwords.words("english"))
|
stop_words_en = set(stopwords.words("english"))
|
||||||
|
|
||||||
with silent_stderr:
|
nltk.download("stopwords", quiet=True)
|
||||||
nltk.download("stopwords")
|
nltk.download("wordnet", quiet=True)
|
||||||
nltk.download("wordnet")
|
|
||||||
|
|
||||||
lemmatizer = WordNetLemmatizer()
|
lemmatizer = WordNetLemmatizer()
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user