mirror of
https://github.com/simon987/Simple-Incremental-Search-Tool.git
synced 2025-04-10 14:06:41 +00:00
Micro optimizations
This commit is contained in:
parent
f86cb9c62b
commit
3b84f7a016
23
crawler.py
23
crawler.py
@ -1,17 +1,19 @@
|
||||
import os
|
||||
from storage import Task, LocalStorage
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
from multiprocessing import Process, Value
|
||||
|
||||
from apscheduler.schedulers.background import BackgroundScheduler
|
||||
|
||||
import config
|
||||
from indexer import Indexer
|
||||
from parsing import GenericFileParser, Md5CheckSumCalculator, ExtensionMimeGuesser, MediaFileParser, TextFileParser, \
|
||||
PictureFileParser, Sha1CheckSumCalculator, Sha256CheckSumCalculator, ContentMimeGuesser, MimeGuesser, FontParser, \
|
||||
PdfFileParser, DocxParser, EbookParser
|
||||
from indexer import Indexer
|
||||
from search import Search
|
||||
from thumbnail import ThumbnailGenerator
|
||||
from storage import Directory
|
||||
import shutil
|
||||
import config
|
||||
from storage import Task, LocalStorage
|
||||
from thumbnail import ThumbnailGenerator
|
||||
|
||||
|
||||
class RunningTask:
|
||||
@ -28,7 +30,8 @@ class RunningTask:
|
||||
|
||||
class Crawler:
|
||||
|
||||
def __init__(self, enabled_parsers: list, mime_guesser: MimeGuesser=ExtensionMimeGuesser(), indexer=None, dir_id=0,
|
||||
def __init__(self, enabled_parsers: list, mime_guesser: MimeGuesser = ExtensionMimeGuesser(), indexer=None,
|
||||
dir_id=0,
|
||||
root_dir="/"):
|
||||
self.documents = []
|
||||
self.enabled_parsers = enabled_parsers
|
||||
@ -48,7 +51,7 @@ class Crawler:
|
||||
|
||||
self.mime_guesser = mime_guesser
|
||||
|
||||
def crawl(self, root_dir: str, counter: Value=None):
|
||||
def crawl(self, root_dir: str, counter: Value = None):
|
||||
|
||||
document_counter = 0
|
||||
|
||||
@ -179,10 +182,6 @@ class TaskManager:
|
||||
self.start_task(self.storage.tasks()[i])
|
||||
else:
|
||||
if self.current_task.done.value == 1:
|
||||
|
||||
self.current_process.terminate()
|
||||
self.storage.del_task(self.current_task.task.id)
|
||||
self.current_task = None
|
||||
|
||||
|
||||
|
||||
|
@ -127,6 +127,7 @@ class GenericFileParser(FileParser):
|
||||
def __init__(self, checksum_calculators: list, root_dir: str):
|
||||
self.checksum_calculators = checksum_calculators
|
||||
self.root_dir = root_dir
|
||||
self.root_dir_len = len(root_dir)+1
|
||||
|
||||
def parse(self, full_path: str) -> dict:
|
||||
"""
|
||||
@ -142,11 +143,12 @@ class GenericFileParser(FileParser):
|
||||
name, extension = os.path.splitext(name)
|
||||
|
||||
info["size"] = file_stat.st_size
|
||||
info["path"] = os.path.relpath(path, self.root_dir)
|
||||
info["path"] = path[self.root_dir_len:]
|
||||
info["name"] = name
|
||||
info["extension"] = extension[1:]
|
||||
info["mtime"] = file_stat.st_mtime
|
||||
|
||||
# TODO: calculate all checksums at once
|
||||
for calculator in self.checksum_calculators:
|
||||
info[calculator.name] = calculator.checksum(full_path)
|
||||
|
||||
@ -317,7 +319,6 @@ class FontParser(GenericFileParser):
|
||||
warnings.simplefilter("ignore")
|
||||
|
||||
try:
|
||||
|
||||
font = TTFont(f)
|
||||
|
||||
if "name" in font:
|
||||
|
Loading…
x
Reference in New Issue
Block a user