Don't use multiprocessing for recrawl task

This commit is contained in:
simon987 2019-04-06 09:21:02 -04:00
parent 06ae89f4d2
commit 0c3d0b38e6

View File

@ -2,7 +2,6 @@ import json
import logging
import os
import time
from multiprocessing.pool import ThreadPool
from threading import Thread
from uuid import uuid4
@ -141,9 +140,8 @@ class TaskManager:
)
self.queue_task(crawl_task)
pool = ThreadPool(processes=3)
pool.map(func=recrawl, iterable=websites_to_crawl)
pool.close()
for w in websites_to_crawl:
recrawl(w)
def queue_task(self, task: Task):
max_assign_time = 24 * 7 * 3600