ignore log in text

This commit is contained in:
simon987 2021-04-18 12:20:22 -04:00
parent d895ac837e
commit 18cd59fc4a

View File

@ -1,5 +1,4 @@
import nltk.corpus import nltk.corpus
from hexlib.misc import silent_stderr
from lxml import etree from lxml import etree
from nltk.corpus import stopwords from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer from nltk.stem import WordNetLemmatizer
@ -10,9 +9,8 @@ get_text = etree.XPath("//text()")
stop_words_en = set(stopwords.words("english")) stop_words_en = set(stopwords.words("english"))
with silent_stderr: nltk.download("stopwords", quiet=True)
nltk.download("stopwords") nltk.download("wordnet", quiet=True)
nltk.download("wordnet")
lemmatizer = WordNetLemmatizer() lemmatizer = WordNetLemmatizer()