Don't use multiprocessing for recrawl task

This commit is contained in:
simon987 2019-04-06 09:21:02 -04:00
parent 06ae89f4d2
commit 0c3d0b38e6

View File

@ -2,7 +2,6 @@ import json
import logging import logging
import os import os
import time import time
from multiprocessing.pool import ThreadPool
from threading import Thread from threading import Thread
from uuid import uuid4 from uuid import uuid4
@ -141,9 +140,8 @@ class TaskManager:
) )
self.queue_task(crawl_task) self.queue_task(crawl_task)
pool = ThreadPool(processes=3) for w in websites_to_crawl:
pool.map(func=recrawl, iterable=websites_to_crawl) recrawl(w)
pool.close()
def queue_task(self, task: Task): def queue_task(self, task: Task):
max_assign_time = 24 * 7 * 3600 max_assign_time = 24 * 7 * 3600