mirror of
https://github.com/simon987/Simple-Incremental-Search-Tool.git
synced 2025-04-19 18:16:45 +00:00
Micro optimizations
This commit is contained in:
parent
f86cb9c62b
commit
3b84f7a016
23
crawler.py
23
crawler.py
@ -1,17 +1,19 @@
|
|||||||
import os
|
|
||||||
from storage import Task, LocalStorage
|
|
||||||
import json
|
import json
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
from multiprocessing import Process, Value
|
from multiprocessing import Process, Value
|
||||||
|
|
||||||
from apscheduler.schedulers.background import BackgroundScheduler
|
from apscheduler.schedulers.background import BackgroundScheduler
|
||||||
|
|
||||||
|
import config
|
||||||
|
from indexer import Indexer
|
||||||
from parsing import GenericFileParser, Md5CheckSumCalculator, ExtensionMimeGuesser, MediaFileParser, TextFileParser, \
|
from parsing import GenericFileParser, Md5CheckSumCalculator, ExtensionMimeGuesser, MediaFileParser, TextFileParser, \
|
||||||
PictureFileParser, Sha1CheckSumCalculator, Sha256CheckSumCalculator, ContentMimeGuesser, MimeGuesser, FontParser, \
|
PictureFileParser, Sha1CheckSumCalculator, Sha256CheckSumCalculator, ContentMimeGuesser, MimeGuesser, FontParser, \
|
||||||
PdfFileParser, DocxParser, EbookParser
|
PdfFileParser, DocxParser, EbookParser
|
||||||
from indexer import Indexer
|
|
||||||
from search import Search
|
from search import Search
|
||||||
from thumbnail import ThumbnailGenerator
|
|
||||||
from storage import Directory
|
from storage import Directory
|
||||||
import shutil
|
from storage import Task, LocalStorage
|
||||||
import config
|
from thumbnail import ThumbnailGenerator
|
||||||
|
|
||||||
|
|
||||||
class RunningTask:
|
class RunningTask:
|
||||||
@ -28,7 +30,8 @@ class RunningTask:
|
|||||||
|
|
||||||
class Crawler:
|
class Crawler:
|
||||||
|
|
||||||
def __init__(self, enabled_parsers: list, mime_guesser: MimeGuesser=ExtensionMimeGuesser(), indexer=None, dir_id=0,
|
def __init__(self, enabled_parsers: list, mime_guesser: MimeGuesser = ExtensionMimeGuesser(), indexer=None,
|
||||||
|
dir_id=0,
|
||||||
root_dir="/"):
|
root_dir="/"):
|
||||||
self.documents = []
|
self.documents = []
|
||||||
self.enabled_parsers = enabled_parsers
|
self.enabled_parsers = enabled_parsers
|
||||||
@ -48,7 +51,7 @@ class Crawler:
|
|||||||
|
|
||||||
self.mime_guesser = mime_guesser
|
self.mime_guesser = mime_guesser
|
||||||
|
|
||||||
def crawl(self, root_dir: str, counter: Value=None):
|
def crawl(self, root_dir: str, counter: Value = None):
|
||||||
|
|
||||||
document_counter = 0
|
document_counter = 0
|
||||||
|
|
||||||
@ -179,10 +182,6 @@ class TaskManager:
|
|||||||
self.start_task(self.storage.tasks()[i])
|
self.start_task(self.storage.tasks()[i])
|
||||||
else:
|
else:
|
||||||
if self.current_task.done.value == 1:
|
if self.current_task.done.value == 1:
|
||||||
|
|
||||||
self.current_process.terminate()
|
self.current_process.terminate()
|
||||||
self.storage.del_task(self.current_task.task.id)
|
self.storage.del_task(self.current_task.task.id)
|
||||||
self.current_task = None
|
self.current_task = None
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -127,6 +127,7 @@ class GenericFileParser(FileParser):
|
|||||||
def __init__(self, checksum_calculators: list, root_dir: str):
|
def __init__(self, checksum_calculators: list, root_dir: str):
|
||||||
self.checksum_calculators = checksum_calculators
|
self.checksum_calculators = checksum_calculators
|
||||||
self.root_dir = root_dir
|
self.root_dir = root_dir
|
||||||
|
self.root_dir_len = len(root_dir)+1
|
||||||
|
|
||||||
def parse(self, full_path: str) -> dict:
|
def parse(self, full_path: str) -> dict:
|
||||||
"""
|
"""
|
||||||
@ -142,11 +143,12 @@ class GenericFileParser(FileParser):
|
|||||||
name, extension = os.path.splitext(name)
|
name, extension = os.path.splitext(name)
|
||||||
|
|
||||||
info["size"] = file_stat.st_size
|
info["size"] = file_stat.st_size
|
||||||
info["path"] = os.path.relpath(path, self.root_dir)
|
info["path"] = path[self.root_dir_len:]
|
||||||
info["name"] = name
|
info["name"] = name
|
||||||
info["extension"] = extension[1:]
|
info["extension"] = extension[1:]
|
||||||
info["mtime"] = file_stat.st_mtime
|
info["mtime"] = file_stat.st_mtime
|
||||||
|
|
||||||
|
# TODO: calculate all checksums at once
|
||||||
for calculator in self.checksum_calculators:
|
for calculator in self.checksum_calculators:
|
||||||
info[calculator.name] = calculator.checksum(full_path)
|
info[calculator.name] = calculator.checksum(full_path)
|
||||||
|
|
||||||
@ -317,7 +319,6 @@ class FontParser(GenericFileParser):
|
|||||||
warnings.simplefilter("ignore")
|
warnings.simplefilter("ignore")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
||||||
font = TTFont(f)
|
font = TTFont(f)
|
||||||
|
|
||||||
if "name" in font:
|
if "name" in font:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user