From 067a20f7a82f4de12c6a363a32c445a0a5b5bc67 Mon Sep 17 00:00:00 2001 From: simon987 Date: Sun, 18 Apr 2021 20:32:34 -0400 Subject: [PATCH] improve text cleaning --- hexlib/text.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hexlib/text.py b/hexlib/text.py index 7629163..36f1fba 100644 --- a/hexlib/text.py +++ b/hexlib/text.py @@ -1,5 +1,5 @@ from functools import partial -from multiprocessing.pool import ThreadPool +from multiprocessing.pool import Pool import nltk.corpus from lxml import etree @@ -18,8 +18,8 @@ nltk.download("wordnet", quiet=True) lemmatizer = WordNetLemmatizer() -def clean_multithread(texts, processes, **kwargs): - pool = ThreadPool(processes=processes) +def clean_multicore(texts, processes, **kwargs): + pool = Pool(processes=processes) return pool.map( func=partial(clean, **kwargs), iterable=texts,