Migrated to bootstrap 4, added progress bars for tasks

This commit is contained in:
simon
2018-03-13 10:29:06 -04:00
parent eab21fbfe6
commit e79a68ebe6
7 changed files with 237 additions and 219 deletions

View File

@@ -1,4 +1,22 @@
import os
from storage import Task, LocalStorage
import json
from multiprocessing import Process, Value
from apscheduler.schedulers.background import BackgroundScheduler
from parsing import GenericFileParser, Md5CheckSumCalculator, ExtensionMimeGuesser
import time
class RunningTask:
def __init__(self, task: Task):
self.total_files = 0
self.parsed_files = Value("i", 0)
self.task = task
def to_json(self):
return json.dumps({"parsed": self.parsed_files.value, "total": self.total_files, "id": self.task.id})
class Crawler:
@@ -16,7 +34,8 @@ class Crawler:
for ext in parser.extensions:
self.ext_map[ext] = parser
def crawl(self, root_dir: str):
def crawl(self, root_dir: str, counter: Value=None):
for root, dirs, files in os.walk(root_dir):
for filename in files:
@@ -24,9 +43,15 @@ class Crawler:
parser = self.ext_map.get(os.path.splitext(filename)[1], self.default_parser)
doc = parser.parse(full_path)
try:
if counter:
counter.value += 1
self.documents.append(doc)
doc = parser.parse(full_path)
self.documents.append(doc)
except FileNotFoundError:
continue # File was deleted
def countFiles(self, root_dir: str):
count = 0
@@ -36,3 +61,52 @@ class Crawler:
return count
class TaskManager:
def __init__(self, storage: LocalStorage):
self.current_task = None
self.storage = storage
self.current_process = None
scheduler = BackgroundScheduler()
scheduler.add_job(self.check_new_task, "interval", seconds=0.5)
scheduler.start()
def start_task(self, task: Task):
self.current_task = RunningTask(task)
c = Crawler([GenericFileParser([Md5CheckSumCalculator()], ExtensionMimeGuesser())])
path = self.storage.dirs()[task.dir_id].path
self.current_task.total_files = c.countFiles(path)
print("Started task - " + str(self.current_task.total_files) + " files")
print(path)
self.current_process = Process(target=self.execute_crawl, args=(c, path, self.current_task.parsed_files))
self.current_process.daemon = True
self.current_process.start()
def execute_crawl(self, c: Crawler, path: str, counter: Value):
c.crawl(path, counter)
print("Done")
def cancel_task(self):
self.current_task = None
self.current_process.terminate()
def check_new_task(self):
if self.current_task is None:
for i in sorted(self.storage.tasks(), reverse=True):
if not self.storage.tasks()[i].completed:
self.start_task(self.storage.tasks()[i])
else:
print(self.current_task.parsed_files.value)
if self.current_task.parsed_files.value == self.current_task.total_files:
self.current_process.terminate()
self.storage.del_task(self.current_task.task.id)
self.current_task = None