mirror of
https://github.com/simon987/Simple-Incremental-Search-Tool.git
synced 2025-12-14 23:59:04 +00:00
CRUD for tasks, dirs and options.
Added flash messages
This commit is contained in:
27
crawler.py
27
crawler.py
@@ -1,33 +1,38 @@
|
||||
import os
|
||||
|
||||
|
||||
class Crawler:
|
||||
|
||||
def __init__(self, enabled_parsers: list):
|
||||
self.documents = []
|
||||
self.enabled_parsers = enabled_parsers
|
||||
|
||||
for parser in self.enabled_parsers:
|
||||
if parser.is_default:
|
||||
self.default_parser = parser
|
||||
|
||||
self.ext_map = {}
|
||||
|
||||
for parser in self.enabled_parsers:
|
||||
for ext in parser.extensions:
|
||||
self.ext_map[ext] = parser
|
||||
|
||||
def crawl(self, root_dir: str):
|
||||
for root, dirs, files in os.walk(root_dir):
|
||||
|
||||
for filename in files:
|
||||
full_path = os.path.join(root, filename)
|
||||
|
||||
parser = self.get_parser_by_ext(os.path.splitext(filename)[1])
|
||||
parser = self.ext_map.get(os.path.splitext(filename)[1], self.default_parser)
|
||||
|
||||
doc = parser.parse(full_path)
|
||||
|
||||
self.documents.append(doc)
|
||||
|
||||
def get_parser_by_ext(self, ext: str):
|
||||
def countFiles(self, root_dir: str):
|
||||
count = 0
|
||||
|
||||
for parser in self.enabled_parsers:
|
||||
|
||||
if ext in parser.extensions:
|
||||
return parser
|
||||
|
||||
for parser in self.enabled_parsers:
|
||||
if parser.is_default:
|
||||
return parser
|
||||
for root, dirs, files in os.walk(root_dir):
|
||||
count += len(files)
|
||||
|
||||
return count
|
||||
|
||||
|
||||
Reference in New Issue
Block a user