mirror of
https://github.com/simon987/Simple-Incremental-Search-Tool.git
synced 2025-04-18 17:56:44 +00:00
Added local storage for directories and generic file parser
This commit is contained in:
parent
09acdc762a
commit
fec23d40d9
1
.gitattributes
vendored
Normal file
1
.gitattributes
vendored
Normal file
@ -0,0 +1 @@
|
||||
spec/test_folder/* linguist-vendored
|
200
crawler.py
200
crawler.py
@ -1,64 +1,152 @@
|
||||
import os
|
||||
import hashlib
|
||||
import mimetypes
|
||||
from PIL import Image
|
||||
import simplejson
|
||||
|
||||
rootDir = "/home/simon/Documents"
|
||||
|
||||
|
||||
# https://stackoverflow.com/questions/3431825/generating-an-md5-checksum-of-a-file
|
||||
def md5sum(filename, block_size=65536):
|
||||
hash = hashlib.md5()
|
||||
with open(filename, "rb") as f:
|
||||
for block in iter(lambda: f.read(block_size), b""):
|
||||
hash.update(block)
|
||||
return hash.hexdigest()
|
||||
class Crawler:
|
||||
pass
|
||||
|
||||
|
||||
def crawl(root_dir):
|
||||
|
||||
docs = []
|
||||
|
||||
for root, subdirs, files in os.walk(root_dir):
|
||||
|
||||
print(root)
|
||||
|
||||
for filename in files:
|
||||
full_path = os.path.join(root, filename)
|
||||
|
||||
doc = dict()
|
||||
|
||||
doc["md5"] = md5sum(os.path.join(root, filename))
|
||||
doc["path"] = root
|
||||
doc["name"] = filename
|
||||
doc["size"] = os.path.getsize(full_path)
|
||||
doc["mtime"] = int(os.path.getmtime(full_path))
|
||||
|
||||
mime_type = mimetypes.guess_type(full_path)[0]
|
||||
|
||||
if mime_type is not None:
|
||||
|
||||
doc["mime"] = mime_type
|
||||
|
||||
if mime_type.startswith("image"):
|
||||
try:
|
||||
width, height = Image.open(full_path).size
|
||||
|
||||
doc["width"] = width
|
||||
doc["height"] = height
|
||||
except OSError:
|
||||
doc.pop('mime', None)
|
||||
pass
|
||||
except ValueError:
|
||||
doc.pop('mime', None)
|
||||
pass
|
||||
|
||||
docs.append(doc)
|
||||
|
||||
file = open("crawler.json", "w")
|
||||
file.write(simplejson.dumps(docs))
|
||||
file.close()
|
||||
class FileParser:
|
||||
pass
|
||||
|
||||
|
||||
crawl(rootDir)
|
||||
class CheckSumCalculator:
|
||||
|
||||
def checksum(self, path: str) -> str:
|
||||
"""
|
||||
Calculate the checksum of a file
|
||||
:param path: path of the file
|
||||
:return: checksum
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class Md5CheckSumCalculator(CheckSumCalculator):
|
||||
|
||||
def __init__(self):
|
||||
self.name = "md5"
|
||||
|
||||
def checksum(self, path: str) -> str:
|
||||
"""
|
||||
Calculate the md5 checksum of a file
|
||||
:param path: path of the file
|
||||
:return: md5 checksum
|
||||
"""
|
||||
result = hashlib.md5()
|
||||
|
||||
with open(path, "rb") as f:
|
||||
for block in iter(lambda: f.read(65536), b""):
|
||||
result.update(block)
|
||||
|
||||
return result.hexdigest().upper()
|
||||
|
||||
|
||||
class Sha1CheckSumCalculator(CheckSumCalculator):
|
||||
|
||||
def __init__(self):
|
||||
self.name = "sha1"
|
||||
|
||||
def checksum(self, path: str) -> str:
|
||||
"""
|
||||
Calculate the sha1 checksum of a file
|
||||
:param path: path of the file
|
||||
:return: sha1 checksum
|
||||
"""
|
||||
result = hashlib.sha1()
|
||||
|
||||
with open(path, "rb") as f:
|
||||
for block in iter(lambda: f.read(65536), b""):
|
||||
result.update(block)
|
||||
|
||||
return result.hexdigest().upper()
|
||||
|
||||
|
||||
class Sha256CheckSumCalculator(CheckSumCalculator):
|
||||
|
||||
def __init__(self):
|
||||
self.name = "sha256"
|
||||
|
||||
def checksum(self, path: str) -> str:
|
||||
"""
|
||||
Calculate the sha256 checksum of a file
|
||||
:param path: path of the file
|
||||
:return: sha256 checksum
|
||||
"""
|
||||
result = hashlib.sha256()
|
||||
|
||||
with open(path, "rb") as f:
|
||||
for block in iter(lambda: f.read(65536), b""):
|
||||
result.update(block)
|
||||
|
||||
return result.hexdigest().upper()
|
||||
|
||||
|
||||
class GenericFileParser(FileParser):
|
||||
|
||||
def __init__(self, checksum_calculators: list):
|
||||
self.checksum_calculators = checksum_calculators
|
||||
|
||||
def parse(self, path: str) -> dict:
|
||||
"""
|
||||
Parse a generic file
|
||||
:param path: path of the file to parse
|
||||
:return: dict information about the file
|
||||
"""
|
||||
|
||||
info = dict()
|
||||
|
||||
info["size"] = os.path.getsize(path)
|
||||
info["name"] = os.path.splitext(path)[0]
|
||||
|
||||
for calculator in self.checksum_calculators:
|
||||
info[calculator.name] = calculator.checksum(path)
|
||||
|
||||
return info
|
||||
|
||||
|
||||
|
||||
|
||||
# def crawl(root_dir: str) -> None:
|
||||
# docs = []
|
||||
#
|
||||
# for root, dirs, files in os.walk(root_dir):
|
||||
#
|
||||
# print(root)
|
||||
#
|
||||
# for filename in files:
|
||||
# full_path = os.path.join(root, filename)
|
||||
#
|
||||
# doc = dict()
|
||||
#
|
||||
# doc["md5"] = md5sum(full_path)
|
||||
# doc["path"] = root
|
||||
# doc["name"] = filename
|
||||
# doc["size"] = os.path.getsize(full_path)
|
||||
# doc["mtime"] = int(os.path.getmtime(full_path))
|
||||
#
|
||||
# mime_type = mimetypes.guess_type(full_path)[0]
|
||||
#
|
||||
# if mime_type is not None:
|
||||
#
|
||||
# doc["mime"] = mime_type
|
||||
#
|
||||
# if mime_type.startswith("image"):
|
||||
# try:
|
||||
# width, height = Image.open(full_path).size
|
||||
#
|
||||
# doc["width"] = width
|
||||
# doc["height"] = height
|
||||
# except OSError:
|
||||
# doc.pop('mime', None)
|
||||
# pass
|
||||
# except ValueError:
|
||||
# doc.pop('mime', None)
|
||||
# pass
|
||||
#
|
||||
# docs.append(doc)
|
||||
#
|
||||
# file = open("crawler.json", "w")
|
||||
# file.write(simplejson.dumps(docs))
|
||||
# file.close()
|
||||
#
|
||||
#
|
40
database.sql
Normal file
40
database.sql
Normal file
@ -0,0 +1,40 @@
|
||||
PRAGMA FOREIGN_KEYS = ON;
|
||||
|
||||
-- Represents a directory and its sub-directories
|
||||
CREATE TABLE Directory (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
path TEXT UNIQUE,
|
||||
enabled BOOLEAN
|
||||
);
|
||||
|
||||
-- Represents a queued task for crawling a Directory or generating thumnails
|
||||
CREATE TABLE Task (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
directory_id INTEGER,
|
||||
task_type INTEGER,
|
||||
completed BOOLEAN DEFAULT 0,
|
||||
completed_time DATETIME,
|
||||
FOREIGN KEY (directory_id) REFERENCES Directory(id)
|
||||
);
|
||||
|
||||
-- You can set an option on a directory to change the crawler's behavior
|
||||
CREATE TABLE Option (
|
||||
name STRING,
|
||||
directory_id INTEGER,
|
||||
FOREIGN KEY (directory_id) REFERENCES Directory(id),
|
||||
PRIMARY KEY (name, directory_id)
|
||||
);
|
||||
|
||||
-- User accounts
|
||||
CREATE TABLE User (
|
||||
username TEXT PRIMARY KEY,
|
||||
password TEXT,
|
||||
is_admin BOOLEAN
|
||||
);
|
||||
|
||||
CREATE TABLE User_canRead_Directory (
|
||||
username TEXT,
|
||||
directory_id INTEGER,
|
||||
PRIMARY KEY (username, directory_id)
|
||||
|
||||
)
|
2
requirements.txt
Normal file
2
requirements.txt
Normal file
@ -0,0 +1,2 @@
|
||||
PIL
|
||||
simplejson
|
135
run.py
Normal file
135
run.py
Normal file
@ -0,0 +1,135 @@
|
||||
from flask import Flask, render_template, send_file, request
|
||||
import pysolr
|
||||
import mimetypes
|
||||
import requests
|
||||
import json
|
||||
from PIL import Image
|
||||
import os
|
||||
|
||||
SOLR_URL = "http://localhost:8983/solr/test/"
|
||||
|
||||
solr = pysolr.Solr(SOLR_URL, timeout=10)
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
#
|
||||
# class Document:
|
||||
# def __init__(self, doc_id, name, path, size, md5):
|
||||
# self.doc_id = doc_id
|
||||
# self.name = name
|
||||
# self.path = path
|
||||
# self.size = size
|
||||
# self.md5 = md5
|
||||
#
|
||||
#
|
||||
# class ImageDocument(Document):
|
||||
# def __init__(self, doc_id, name, path, size, md5):
|
||||
# super().__init__(doc_id, name, path, size, md5)
|
||||
# self.type = "image"
|
||||
#
|
||||
#
|
||||
# class AudioClipDocument(Document):
|
||||
# def __init__(self, doc_id, name, path, size, md5):
|
||||
# super().__init__(doc_id, name, path, size, md5)
|
||||
# self.type = "audio"
|
||||
#
|
||||
#
|
||||
# def get_document(id):
|
||||
#
|
||||
# response = requests.get(SOLR_URL + "get?id=" + id)
|
||||
#
|
||||
# return json.loads(response.text)["doc"]
|
||||
#
|
||||
#
|
||||
# def make_thumb(doc):
|
||||
# size = (1024, 1024)
|
||||
#
|
||||
# thumb_path = "thumbnails/" + doc["id"]
|
||||
#
|
||||
# if not os.path.exists(thumb_path):
|
||||
#
|
||||
# file_path = doc["path"][0] + "/" + doc["name"][0]
|
||||
#
|
||||
# if doc["width"][0] > size[0]:
|
||||
#
|
||||
# image = Image.open(file_path)
|
||||
# image.thumbnail(size, Image.ANTIALIAS)
|
||||
#
|
||||
# if image.mode == "RGB":
|
||||
# image.save(thumb_path, "JPEG")
|
||||
# elif image.mode == "RGBA":
|
||||
# image.save(thumb_path, "PNG")
|
||||
# else:
|
||||
# image = image.convert("RGB")
|
||||
# image.save(thumb_path, "JPEG")
|
||||
# else:
|
||||
# print("Skipping thumbnail")
|
||||
# os.symlink(file_path, thumb_path)
|
||||
#
|
||||
# return "thumbnails/" + doc["id"]
|
||||
#
|
||||
#
|
||||
# @app.route("/search/")
|
||||
# def search():
|
||||
#
|
||||
# query = request.args.get("query")
|
||||
# page = int(request.args.get("page"))
|
||||
# per_page = int(request.args.get("per_page"))
|
||||
#
|
||||
# results = solr.search(query, None, rows=per_page, start=per_page * page)
|
||||
#
|
||||
# docs = []
|
||||
# for r in results:
|
||||
#
|
||||
# if "mime" in r:
|
||||
# mime_type = r["mime"][0]
|
||||
# else:
|
||||
# mime_type = ""
|
||||
#
|
||||
# if mime_type.startswith("image"):
|
||||
# docs.append(ImageDocument(r["id"], r["name"][0], r["path"][0], r["size"], r["md5"]))
|
||||
#
|
||||
# elif mime_type.startswith("audio"):
|
||||
# docs.append(AudioClipDocument(r["id"], r["name"][0], r["path"][0], r["size"], r["md5"]))
|
||||
#
|
||||
# return render_template("search.html", docs=docs)
|
||||
#
|
||||
#
|
||||
# @app.route("/")
|
||||
# def index():
|
||||
# return render_template("index.html")
|
||||
#
|
||||
#
|
||||
# @app.route("/files/<id>/")
|
||||
# def files(id):
|
||||
#
|
||||
# doc = get_document(id)
|
||||
#
|
||||
# if doc is not None:
|
||||
# file_path = doc["path"][0] + "/" + doc["name"][0]
|
||||
# return send_file(file_path, mimetype=mimetypes.guess_type(file_path)[0])
|
||||
# else:
|
||||
# return "File not found"
|
||||
#
|
||||
#
|
||||
# @app.route("/thumbs/<doc_id>/")
|
||||
# def thumbs(doc_id):
|
||||
#
|
||||
# doc = get_document(doc_id)
|
||||
#
|
||||
# if doc is not None:
|
||||
#
|
||||
# thumb_path = make_thumb(doc)
|
||||
#
|
||||
# return send_file("thumbnails/" + doc_id, mimetype=mimetypes.guess_type(thumb_path)[0])
|
||||
# else:
|
||||
# return "File not found"
|
||||
|
||||
|
||||
@app.route("/")
|
||||
def tmp_route():
|
||||
return "test"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run("0.0.0.0", 8080)
|
4
setupDb.sh
Executable file
4
setupDb.sh
Executable file
@ -0,0 +1,4 @@
|
||||
#!/bin/bash
|
||||
|
||||
rm test.db
|
||||
sqlite3 local_storage.db -init "database.sql"
|
10
spec/Crawler_spec.py
Normal file
10
spec/Crawler_spec.py
Normal file
@ -0,0 +1,10 @@
|
||||
from unittest import TestCase
|
||||
|
||||
from crawler import Crawler
|
||||
|
||||
|
||||
class CrawlerTest(TestCase):
|
||||
|
||||
def test_dir_walk(self):
|
||||
c = Crawler()
|
||||
|
131
spec/FileParser_spec.py
Normal file
131
spec/FileParser_spec.py
Normal file
@ -0,0 +1,131 @@
|
||||
import os
|
||||
from unittest import TestCase
|
||||
|
||||
from crawler import GenericFileParser, Md5CheckSumCalculator, Sha1CheckSumCalculator, Sha256CheckSumCalculator
|
||||
|
||||
|
||||
class GenericFileParserTest(TestCase):
|
||||
|
||||
def setUp(self):
|
||||
if os.path.exists("test_parse"):
|
||||
os.remove("test_parse")
|
||||
|
||||
test_file = open("test_parse", "w")
|
||||
test_file.write("12345678")
|
||||
test_file.close()
|
||||
|
||||
self.parser = GenericFileParser([Md5CheckSumCalculator()])
|
||||
|
||||
def tearDown(self):
|
||||
os.remove("test_parse")
|
||||
|
||||
def test_parse_size(self):
|
||||
result = self.parser.parse("test_parse")
|
||||
|
||||
self.assertEqual(result["size"], 8)
|
||||
|
||||
def test_parse_name(self):
|
||||
result = self.parser.parse("test_parse")
|
||||
|
||||
self.assertEqual(result["name"], "test_parse")
|
||||
|
||||
def test_parse_md5(self):
|
||||
result = self.parser.parse("test_parse")
|
||||
|
||||
self.assertEqual(result["md5"], "25D55AD283AA400AF464C76D713C07AD")
|
||||
|
||||
|
||||
class Md5CheckSumCalculatorTest(TestCase):
|
||||
|
||||
def setUp(self):
|
||||
if os.path.exists("test_md5_1"):
|
||||
os.remove("test_md5_1")
|
||||
|
||||
test_file = open("test_md5_1", "w")
|
||||
test_file.write("789456123")
|
||||
test_file.close()
|
||||
|
||||
if os.path.exists("test_md5_2"):
|
||||
os.remove("test_md5_2")
|
||||
|
||||
test_file = open("test_md5_2", "w")
|
||||
test_file.write("cj3w97n7RY378WRXEN68W7RExnw6nr8276b473824")
|
||||
test_file.close()
|
||||
|
||||
self.calculator = Md5CheckSumCalculator()
|
||||
|
||||
def tearDown(self):
|
||||
os.remove("test_md5_1")
|
||||
os.remove("test_md5_2")
|
||||
|
||||
def test_md5_checksum(self):
|
||||
|
||||
result = self.calculator.checksum("test_md5_1")
|
||||
self.assertEqual(result, "9FAB6755CD2E8817D3E73B0978CA54A6")
|
||||
|
||||
result = self.calculator.checksum("test_md5_2")
|
||||
self.assertEqual(result, "39A1AADE23E33A7F37C11C7FF9CDC9EC")
|
||||
|
||||
|
||||
class Sha1CheckSumCalculatorTest(TestCase):
|
||||
|
||||
def setUp(self):
|
||||
if os.path.exists("test_sha1_1"):
|
||||
os.remove("test_sha1_1")
|
||||
|
||||
test_file = open("test_sha1_1", "w")
|
||||
test_file.write("sxjkneycbu")
|
||||
test_file.close()
|
||||
|
||||
if os.path.exists("test_sha1_2"):
|
||||
os.remove("test_sha1_2")
|
||||
|
||||
test_file = open("test_sha1_2", "w")
|
||||
test_file.write("xoimoqxy38e")
|
||||
test_file.close()
|
||||
|
||||
self.calculator = Sha1CheckSumCalculator()
|
||||
|
||||
def tearDown(self):
|
||||
os.remove("test_sha1_1")
|
||||
os.remove("test_sha1_2")
|
||||
|
||||
def test_md5_checksum(self):
|
||||
|
||||
result = self.calculator.checksum("test_sha1_1")
|
||||
self.assertEqual(result, "A80315387730DB5743061F397EB66DE0DDAE19E5")
|
||||
|
||||
result = self.calculator.checksum("test_sha1_2")
|
||||
self.assertEqual(result, "E7B5A2B6F6838E766A0BC7E558F640726D70A8D6")
|
||||
|
||||
|
||||
class Sha256CheckSumCalculatorTest(TestCase):
|
||||
|
||||
def setUp(self):
|
||||
if os.path.exists("test_sha256_1"):
|
||||
os.remove("test_sha256_1")
|
||||
|
||||
test_file = open("test_sha256_1", "w")
|
||||
test_file.write("eaur5t84nc7i")
|
||||
test_file.close()
|
||||
|
||||
if os.path.exists("test_sha256_2"):
|
||||
os.remove("test_sha256_2")
|
||||
|
||||
test_file = open("test_sha256_2", "w")
|
||||
test_file.write("xkwerci47ixryw7r6wxadwd")
|
||||
test_file.close()
|
||||
|
||||
self.calculator = Sha256CheckSumCalculator()
|
||||
|
||||
def tearDown(self):
|
||||
os.remove("test_sha256_1")
|
||||
os.remove("test_sha256_2")
|
||||
|
||||
def test_md5_checksum(self):
|
||||
|
||||
result = self.calculator.checksum("test_sha256_1")
|
||||
self.assertEqual(result, "DA7606DC763306B700685A71E2E72A2D95F1291209E5DA344B82DA2508FC27C5")
|
||||
|
||||
result = self.calculator.checksum("test_sha256_2")
|
||||
self.assertEqual(result, "C39C7E0E7D84C9692F3C9C22E1EA0327DEBF1BF531B5738EEA8E79FE27EBC570")
|
47
spec/LocalStorage_spec.py
Normal file
47
spec/LocalStorage_spec.py
Normal file
@ -0,0 +1,47 @@
|
||||
from unittest import TestCase
|
||||
|
||||
from storage import LocalStorage, Directory, DuplicateDirectoryException
|
||||
|
||||
|
||||
class LocalStorageTest(TestCase):
|
||||
|
||||
def setUp(self):
|
||||
|
||||
s = LocalStorage()
|
||||
s.init_db("../database.sql")
|
||||
|
||||
def test_save_and_retrieve_dir(self):
|
||||
|
||||
storage = LocalStorage()
|
||||
|
||||
d = Directory("/some/directory", True, ["opt1", "opt2", "opt3"])
|
||||
|
||||
storage.save_directory(d)
|
||||
|
||||
self.assertEqual(storage.dirs()["/some/directory"].enabled, True)
|
||||
self.assertEqual(storage.dirs()["/some/directory"].options[0], "opt1")
|
||||
|
||||
def test_save_and_retrieve_dir_persistent(self):
|
||||
|
||||
s1 = LocalStorage()
|
||||
|
||||
d = Directory("/some/directory", True, ["opt1", "opt2", "opt3"])
|
||||
|
||||
s1.save_directory(d)
|
||||
|
||||
s2 = LocalStorage()
|
||||
self.assertEqual(s2.dirs()["/some/directory"].enabled, True)
|
||||
self.assertEqual(s2.dirs()["/some/directory"].options[0], "opt1")
|
||||
|
||||
def test_reject_duplicate_path(self):
|
||||
|
||||
s = LocalStorage()
|
||||
|
||||
d1 = Directory("/some/directory", True, ["opt1", "opt2"])
|
||||
d2 = Directory("/some/directory", True, ["opt1", "opt2"])
|
||||
|
||||
s.save_directory(d1)
|
||||
|
||||
with self.assertRaises(DuplicateDirectoryException) as e:
|
||||
s.save_directory(d2)
|
||||
|
11
spec/test_folder/books.csv
vendored
Normal file
11
spec/test_folder/books.csv
vendored
Normal file
@ -0,0 +1,11 @@
|
||||
id,cat,name,price,inStock,author,series_t,sequence_i,genre_s
|
||||
0553573403,book,A Game of Thrones,7.99,true,George R.R. Martin,"A Song of Ice and Fire",1,fantasy
|
||||
0553579908,book,A Clash of Kings,7.99,true,George R.R. Martin,"A Song of Ice and Fire",2,fantasy
|
||||
055357342X,book,A Storm of Swords,7.99,true,George R.R. Martin,"A Song of Ice and Fire",3,fantasy
|
||||
0553293354,book,Foundation,7.99,true,Isaac Asimov,Foundation Novels,1,scifi
|
||||
0812521390,book,The Black Company,6.99,false,Glen Cook,The Chronicles of The Black Company,1,fantasy
|
||||
0812550706,book,Ender's Game,6.99,true,Orson Scott Card,Ender,1,scifi
|
||||
0441385532,book,Jhereg,7.95,false,Steven Brust,Vlad Taltos,1,fantasy
|
||||
0380014300,book,Nine Princes In Amber,6.99,true,Roger Zelazny,the Chronicles of Amber,1,fantasy
|
||||
0805080481,book,The Book of Three,5.99,true,Lloyd Alexander,The Chronicles of Prydain,1,fantasy
|
||||
080508049X,book,The Black Cauldron,5.99,true,Lloyd Alexander,The Chronicles of Prydain,2,fantasy
|
|
51
spec/test_folder/books.json
vendored
Normal file
51
spec/test_folder/books.json
vendored
Normal file
@ -0,0 +1,51 @@
|
||||
[
|
||||
{
|
||||
"id" : "978-0641723445",
|
||||
"cat" : ["book","hardcover"],
|
||||
"name" : "The Lightning Thief",
|
||||
"author" : "Rick Riordan",
|
||||
"series_t" : "Percy Jackson and the Olympians",
|
||||
"sequence_i" : 1,
|
||||
"genre_s" : "fantasy",
|
||||
"inStock" : true,
|
||||
"price" : 12.50,
|
||||
"pages_i" : 384
|
||||
}
|
||||
,
|
||||
{
|
||||
"id" : "978-1423103349",
|
||||
"cat" : ["book","paperback"],
|
||||
"name" : "The Sea of Monsters",
|
||||
"author" : "Rick Riordan",
|
||||
"series_t" : "Percy Jackson and the Olympians",
|
||||
"sequence_i" : 2,
|
||||
"genre_s" : "fantasy",
|
||||
"inStock" : true,
|
||||
"price" : 6.49,
|
||||
"pages_i" : 304
|
||||
}
|
||||
,
|
||||
{
|
||||
"id" : "978-1857995879",
|
||||
"cat" : ["book","paperback"],
|
||||
"name" : "Sophie's World : The Greek Philosophers",
|
||||
"author" : "Jostein Gaarder",
|
||||
"sequence_i" : 1,
|
||||
"genre_s" : "fantasy",
|
||||
"inStock" : true,
|
||||
"price" : 3.07,
|
||||
"pages_i" : 64
|
||||
}
|
||||
,
|
||||
{
|
||||
"id" : "978-1933988177",
|
||||
"cat" : ["book","paperback"],
|
||||
"name" : "Lucene in Action, Second Edition",
|
||||
"author" : "Michael McCandless",
|
||||
"sequence_i" : 1,
|
||||
"genre_s" : "IT",
|
||||
"inStock" : true,
|
||||
"price" : 30.50,
|
||||
"pages_i" : 475
|
||||
}
|
||||
]
|
32
spec/test_folder/gb18030-example.xml
vendored
Normal file
32
spec/test_folder/gb18030-example.xml
vendored
Normal file
@ -0,0 +1,32 @@
|
||||
<?xml version="1.0" encoding="GB18030"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<add>
|
||||
<doc>
|
||||
<field name="id">GB18030TEST</field>
|
||||
<field name="name">Test with some GB18030 encoded characters</field>
|
||||
<field name="features">No accents here</field>
|
||||
<field name="features">这是一个功能</field>
|
||||
<field name="features">This is a feature (translated)</field>
|
||||
<field name="features">这份文件是很有光泽</field>
|
||||
<field name="features">This document is very shiny (translated)</field>
|
||||
<field name="price">0.0</field>
|
||||
<field name="inStock">true</field>
|
||||
</doc>
|
||||
</add>
|
||||
|
56
spec/test_folder/hd.xml
vendored
Normal file
56
spec/test_folder/hd.xml
vendored
Normal file
@ -0,0 +1,56 @@
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<add>
|
||||
<doc>
|
||||
<field name="id">SP2514N</field>
|
||||
<field name="name">Samsung SpinPoint P120 SP2514N - hard drive - 250 GB - ATA-133</field>
|
||||
<field name="manu">Samsung Electronics Co. Ltd.</field>
|
||||
<!-- Join -->
|
||||
<field name="manu_id_s">samsung</field>
|
||||
<field name="cat">electronics</field>
|
||||
<field name="cat">hard drive</field>
|
||||
<field name="features">7200RPM, 8MB cache, IDE Ultra ATA-133</field>
|
||||
<field name="features">NoiseGuard, SilentSeek technology, Fluid Dynamic Bearing (FDB) motor</field>
|
||||
<field name="price">92.0</field>
|
||||
<field name="popularity">6</field>
|
||||
<field name="inStock">true</field>
|
||||
<field name="manufacturedate_dt">2006-02-13T15:26:37Z</field>
|
||||
<!-- Near Oklahoma city -->
|
||||
<field name="store">35.0752,-97.032</field>
|
||||
</doc>
|
||||
|
||||
<doc>
|
||||
<field name="id">6H500F0</field>
|
||||
<field name="name">Maxtor DiamondMax 11 - hard drive - 500 GB - SATA-300</field>
|
||||
<field name="manu">Maxtor Corp.</field>
|
||||
<!-- Join -->
|
||||
<field name="manu_id_s">maxtor</field>
|
||||
<field name="cat">electronics</field>
|
||||
<field name="cat">hard drive</field>
|
||||
<field name="features">SATA 3.0Gb/s, NCQ</field>
|
||||
<field name="features">8.5ms seek</field>
|
||||
<field name="features">16MB cache</field>
|
||||
<field name="price">350.0</field>
|
||||
<field name="popularity">6</field>
|
||||
<field name="inStock">true</field>
|
||||
<!-- Buffalo store -->
|
||||
<field name="store">45.17614,-93.87341</field>
|
||||
<field name="manufacturedate_dt">2006-02-13T15:26:37Z</field>
|
||||
</doc>
|
||||
</add>
|
||||
|
60
spec/test_folder/ipod_other.xml
vendored
Normal file
60
spec/test_folder/ipod_other.xml
vendored
Normal file
@ -0,0 +1,60 @@
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<add>
|
||||
|
||||
<doc>
|
||||
<field name="id">F8V7067-APL-KIT</field>
|
||||
<field name="name">Belkin Mobile Power Cord for iPod w/ Dock</field>
|
||||
<field name="manu">Belkin</field>
|
||||
<!-- Join -->
|
||||
<field name="manu_id_s">belkin</field>
|
||||
<field name="cat">electronics</field>
|
||||
<field name="cat">connector</field>
|
||||
<field name="features">car power adapter, white</field>
|
||||
<field name="weight">4.0</field>
|
||||
<field name="price">19.95</field>
|
||||
<field name="popularity">1</field>
|
||||
<field name="inStock">false</field>
|
||||
<!-- Buffalo store -->
|
||||
<field name="store">45.18014,-93.87741</field>
|
||||
<field name="manufacturedate_dt">2005-08-01T16:30:25Z</field>
|
||||
</doc>
|
||||
|
||||
<doc>
|
||||
<field name="id">IW-02</field>
|
||||
<field name="name">iPod & iPod Mini USB 2.0 Cable</field>
|
||||
<field name="manu">Belkin</field>
|
||||
<!-- Join -->
|
||||
<field name="manu_id_s">belkin</field>
|
||||
<field name="cat">electronics</field>
|
||||
<field name="cat">connector</field>
|
||||
<field name="features">car power adapter for iPod, white</field>
|
||||
<field name="weight">2.0</field>
|
||||
<field name="price">11.50</field>
|
||||
<field name="popularity">1</field>
|
||||
<field name="inStock">false</field>
|
||||
<!-- San Francisco store -->
|
||||
<field name="store">37.7752,-122.4232</field>
|
||||
<field name="manufacturedate_dt">2006-02-14T23:55:59Z</field>
|
||||
</doc>
|
||||
|
||||
|
||||
</add>
|
||||
|
||||
|
||||
|
40
spec/test_folder/ipod_video.xml
vendored
Normal file
40
spec/test_folder/ipod_video.xml
vendored
Normal file
@ -0,0 +1,40 @@
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<add><doc>
|
||||
<field name="id">MA147LL/A</field>
|
||||
<field name="name">Apple 60 GB iPod with Video Playback Black</field>
|
||||
<field name="manu">Apple Computer Inc.</field>
|
||||
<!-- Join -->
|
||||
<field name="manu_id_s">apple</field>
|
||||
<field name="cat">electronics</field>
|
||||
<field name="cat">music</field>
|
||||
<field name="features">iTunes, Podcasts, Audiobooks</field>
|
||||
<field name="features">Stores up to 15,000 songs, 25,000 photos, or 150 hours of video</field>
|
||||
<field name="features">2.5-inch, 320x240 color TFT LCD display with LED backlight</field>
|
||||
<field name="features">Up to 20 hours of battery life</field>
|
||||
<field name="features">Plays AAC, MP3, WAV, AIFF, Audible, Apple Lossless, H.264 video</field>
|
||||
<field name="features">Notes, Calendar, Phone book, Hold button, Date display, Photo wallet, Built-in games, JPEG photo playback, Upgradeable firmware, USB 2.0 compatibility, Playback speed control, Rechargeable capability, Battery level indication</field>
|
||||
<field name="includes">earbud headphones, USB cable</field>
|
||||
<field name="weight">5.5</field>
|
||||
<field name="price">399.00</field>
|
||||
<field name="popularity">10</field>
|
||||
<field name="inStock">true</field>
|
||||
<!-- Dodge City store -->
|
||||
<field name="store">37.7752,-100.0232</field>
|
||||
<field name="manufacturedate_dt">2005-10-12T08:00:00Z</field>
|
||||
</doc></add>
|
33
spec/test_folder/monitor2.xml
vendored
Normal file
33
spec/test_folder/monitor2.xml
vendored
Normal file
@ -0,0 +1,33 @@
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<add><doc>
|
||||
<field name="id">VA902B</field>
|
||||
<field name="name">ViewSonic VA902B - flat panel display - TFT - 19"</field>
|
||||
<field name="manu">ViewSonic Corp.</field>
|
||||
<!-- Join -->
|
||||
<field name="manu_id_s">viewsonic</field>
|
||||
<field name="cat">electronics and stuff2</field>
|
||||
<field name="features">19" TFT active matrix LCD, 8ms response time, 1280 x 1024 native resolution</field>
|
||||
<field name="weight">190.4</field>
|
||||
<field name="price">279.95</field>
|
||||
<field name="popularity">6</field>
|
||||
<field name="inStock">true</field>
|
||||
<!-- Buffalo store -->
|
||||
<field name="store">45.18814,-93.88541</field>
|
||||
</doc></add>
|
||||
|
3
spec/test_folder/more_books.jsonl
vendored
Normal file
3
spec/test_folder/more_books.jsonl
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
{"id":"0060248025","name":"Falling Up","inStock": true,"author": "Shel Silverstein"}
|
||||
{"id":"0679805273","name":"Oh, The Places You'll Go","inStock": true,"author": "Dr. Seuss"}
|
||||
|
43
spec/test_folder/mp500.xml
vendored
Normal file
43
spec/test_folder/mp500.xml
vendored
Normal file
@ -0,0 +1,43 @@
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<add><doc>
|
||||
<field name="id">0579B002</field>
|
||||
<field name="name">Canon PIXMA MP500 All-In-One Photo Printer</field>
|
||||
<field name="manu">Canon Inc.</field>
|
||||
<!-- Join -->
|
||||
<field name="manu_id_s">canon</field>
|
||||
<field name="cat">electronics</field>
|
||||
<field name="cat">multifunction printer</field>
|
||||
<field name="cat">printer</field>
|
||||
<field name="cat">scanner</field>
|
||||
<field name="cat">copier</field>
|
||||
<field name="features">Multifunction ink-jet color photo printer</field>
|
||||
<field name="features">Flatbed scanner, optical scan resolution of 1,200 x 2,400 dpi</field>
|
||||
<field name="features">2.5" color LCD preview screen</field>
|
||||
<field name="features">Duplex Copying</field>
|
||||
<field name="features">Printing speed up to 29ppm black, 19ppm color</field>
|
||||
<field name="features">Hi-Speed USB</field>
|
||||
<field name="features">memory card: CompactFlash, Micro Drive, SmartMedia, Memory Stick, Memory Stick Pro, SD Card, and MultiMediaCard</field>
|
||||
<field name="weight">352.0</field>
|
||||
<field name="price">179.99</field>
|
||||
<field name="popularity">6</field>
|
||||
<field name="inStock">true</field>
|
||||
<!-- Buffalo store -->
|
||||
<field name="store">45.19214,-93.89941</field>
|
||||
</doc></add>
|
||||
|
BIN
spec/test_folder/post.jar
vendored
Normal file
BIN
spec/test_folder/post.jar
vendored
Normal file
Binary file not shown.
13
spec/test_folder/sample.html
vendored
Normal file
13
spec/test_folder/sample.html
vendored
Normal file
@ -0,0 +1,13 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>Welcome to Solr</title>
|
||||
</head>
|
||||
<body>
|
||||
<p>
|
||||
Here is some text
|
||||
</p>
|
||||
<p>distinct<br/>words</p>
|
||||
<div>Here is some text in a div</div>
|
||||
<div>This has a <a href="http://www.apache.org">link</a>.</div>
|
||||
</body>
|
||||
</html>
|
BIN
spec/test_folder/sample_1.jpg
vendored
Normal file
BIN
spec/test_folder/sample_1.jpg
vendored
Normal file
Binary file not shown.
After Width: | Height: | Size: 32 KiB |
BIN
spec/test_folder/sample_2.jpeg
vendored
Normal file
BIN
spec/test_folder/sample_2.jpeg
vendored
Normal file
Binary file not shown.
After Width: | Height: | Size: 55 KiB |
BIN
spec/test_folder/sample_3.jpg
vendored
Normal file
BIN
spec/test_folder/sample_3.jpg
vendored
Normal file
Binary file not shown.
After Width: | Height: | Size: 3.9 MiB |
BIN
spec/test_folder/sample_4.jpg
vendored
Normal file
BIN
spec/test_folder/sample_4.jpg
vendored
Normal file
Binary file not shown.
After Width: | Height: | Size: 348 KiB |
BIN
spec/test_folder/solr-word.pdf
vendored
Normal file
BIN
spec/test_folder/solr-word.pdf
vendored
Normal file
Binary file not shown.
38
spec/test_folder/solr.xml
vendored
Normal file
38
spec/test_folder/solr.xml
vendored
Normal file
@ -0,0 +1,38 @@
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<add>
|
||||
<doc>
|
||||
<field name="id">SOLR1000</field>
|
||||
<field name="name">Solr, the Enterprise Search Server</field>
|
||||
<field name="manu">Apache Software Foundation</field>
|
||||
<field name="cat">software</field>
|
||||
<field name="cat">search</field>
|
||||
<field name="features">Advanced Full-Text Search Capabilities using Lucene</field>
|
||||
<field name="features">Optimized for High Volume Web Traffic</field>
|
||||
<field name="features">Standards Based Open Interfaces - XML and HTTP</field>
|
||||
<field name="features">Comprehensive HTML Administration Interfaces</field>
|
||||
<field name="features">Scalability - Efficient Replication to other Solr Search Servers</field>
|
||||
<field name="features">Flexible and Adaptable with XML configuration and Schema</field>
|
||||
<field name="features">Good unicode support: héllo (hello with an accent over the e)</field>
|
||||
<field name="price">0.0</field>
|
||||
<field name="popularity">10</field>
|
||||
<field name="inStock">true</field>
|
||||
<field name="incubationdate_dt">2006-01-17T00:00:00.000Z</field>
|
||||
</doc>
|
||||
</add>
|
||||
|
75
spec/test_folder/sub1/manufacturers.xml
Normal file
75
spec/test_folder/sub1/manufacturers.xml
Normal file
@ -0,0 +1,75 @@
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<add>
|
||||
<doc>
|
||||
<field name="id">adata</field>
|
||||
<field name="compName_s">A-Data Technology</field>
|
||||
<field name="address_s">46221 Landing Parkway Fremont, CA 94538</field>
|
||||
</doc>
|
||||
<doc>
|
||||
<field name="id">apple</field>
|
||||
<field name="compName_s">Apple</field>
|
||||
<field name="address_s">1 Infinite Way, Cupertino CA</field>
|
||||
</doc>
|
||||
<doc>
|
||||
<field name="id">asus</field>
|
||||
<field name="compName_s">ASUS Computer</field>
|
||||
<field name="address_s">800 Corporate Way Fremont, CA 94539</field>
|
||||
</doc>
|
||||
<doc>
|
||||
<field name="id">ati</field>
|
||||
<field name="compName_s">ATI Technologies</field>
|
||||
<field name="address_s">33 Commerce Valley Drive East Thornhill, ON L3T 7N6 Canada</field>
|
||||
</doc>
|
||||
<doc>
|
||||
<field name="id">belkin</field>
|
||||
<field name="compName_s">Belkin</field>
|
||||
<field name="address_s">12045 E. Waterfront Drive Playa Vista, CA 90094</field>
|
||||
</doc>
|
||||
<doc>
|
||||
<field name="id">canon</field>
|
||||
<field name="compName_s">Canon, Inc.</field>
|
||||
<field name="address_s">One Canon Plaza Lake Success, NY 11042</field>
|
||||
</doc>
|
||||
<doc>
|
||||
<field name="id">corsair</field>
|
||||
<field name="compName_s">Corsair Microsystems</field>
|
||||
<field name="address_s">46221 Landing Parkway Fremont, CA 94538</field>
|
||||
</doc>
|
||||
<doc>
|
||||
<field name="id">dell</field>
|
||||
<field name="compName_s">Dell, Inc.</field>
|
||||
<field name="address_s">One Dell Way Round Rock, Texas 78682</field>
|
||||
</doc>
|
||||
<doc>
|
||||
<field name="id">maxtor</field>
|
||||
<field name="compName_s">Maxtor Corporation</field>
|
||||
<field name="address_s">920 Disc Drive Scotts Valley, CA 95066</field>
|
||||
</doc>
|
||||
<doc>
|
||||
<field name="id">samsung</field>
|
||||
<field name="compName_s">Samsung Electronics Co. Ltd.</field>
|
||||
<field name="address_s">105 Challenger Rd. Ridgefield Park, NJ 07660-0511</field>
|
||||
</doc>
|
||||
<doc>
|
||||
<field name="id">viewsonic</field>
|
||||
<field name="compName_s">ViewSonic Corp</field>
|
||||
<field name="address_s">381 Brea Canyon Road Walnut, CA 91789-0708</field>
|
||||
</doc>
|
||||
</add>
|
||||
|
77
spec/test_folder/sub1/mem.xml
Normal file
77
spec/test_folder/sub1/mem.xml
Normal file
@ -0,0 +1,77 @@
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<add>
|
||||
<doc>
|
||||
<field name="id">TWINX2048-3200PRO</field>
|
||||
<field name="name">CORSAIR XMS 2GB (2 x 1GB) 184-Pin DDR SDRAM Unbuffered DDR 400 (PC 3200) Dual Channel Kit System Memory - Retail</field>
|
||||
<field name="manu">Corsair Microsystems Inc.</field>
|
||||
<!-- Join -->
|
||||
<field name="manu_id_s">corsair</field>
|
||||
<field name="cat">electronics</field>
|
||||
<field name="cat">memory</field>
|
||||
<field name="features">CAS latency 2, 2-3-3-6 timing, 2.75v, unbuffered, heat-spreader</field>
|
||||
<field name="price">185.00</field>
|
||||
<field name="popularity">5</field>
|
||||
<field name="inStock">true</field>
|
||||
<!-- San Francisco store -->
|
||||
<field name="store">37.7752,-122.4232</field>
|
||||
<field name="manufacturedate_dt">2006-02-13T15:26:37Z</field>
|
||||
|
||||
<!-- a field for testing payload tagged text via DelimitedPayloadTokenFilter -->
|
||||
<field name="payloads">electronics|6.0 memory|3.0</field>
|
||||
</doc>
|
||||
|
||||
<doc>
|
||||
<field name="id">VS1GB400C3</field>
|
||||
<field name="name">CORSAIR ValueSelect 1GB 184-Pin DDR SDRAM Unbuffered DDR 400 (PC 3200) System Memory - Retail</field>
|
||||
<field name="manu">Corsair Microsystems Inc.</field>
|
||||
<!-- Join -->
|
||||
<field name="manu_id_s">corsair</field>
|
||||
<field name="cat">electronics</field>
|
||||
<field name="cat">memory</field>
|
||||
<field name="price">74.99</field>
|
||||
<field name="popularity">7</field>
|
||||
<field name="inStock">true</field>
|
||||
<!-- Dodge City store -->
|
||||
<field name="store">37.7752,-100.0232</field>
|
||||
<field name="manufacturedate_dt">2006-02-13T15:26:37Z</field>
|
||||
|
||||
<field name="payloads">electronics|4.0 memory|2.0</field>
|
||||
</doc>
|
||||
|
||||
<doc>
|
||||
<field name="id">VDBDB1A16</field>
|
||||
<field name="name">A-DATA V-Series 1GB 184-Pin DDR SDRAM Unbuffered DDR 400 (PC 3200) System Memory - OEM</field>
|
||||
<field name="manu">A-DATA Technology Inc.</field>
|
||||
<!-- Join -->
|
||||
<field name="manu_id_s">corsair</field>
|
||||
<field name="cat">electronics</field>
|
||||
<field name="cat">memory</field>
|
||||
<field name="features">CAS latency 3, 2.7v</field>
|
||||
<!-- note: price & popularity is missing on this one -->
|
||||
<field name="popularity">0</field>
|
||||
<field name="inStock">true</field>
|
||||
<!-- Buffalo store -->
|
||||
<field name="store">45.18414,-93.88141</field>
|
||||
<field name="manufacturedate_dt">2006-02-13T15:26:37Z</field>
|
||||
|
||||
<field name="payloads">electronics|0.9 memory|0.1</field>
|
||||
</doc>
|
||||
|
||||
</add>
|
||||
|
65
spec/test_folder/sub1/money.xml
Normal file
65
spec/test_folder/sub1/money.xml
Normal file
@ -0,0 +1,65 @@
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<!-- Example documents utilizing the CurrencyField type -->
|
||||
<add>
|
||||
<doc>
|
||||
<field name="id">USD</field>
|
||||
<field name="name">One Dollar</field>
|
||||
<field name="manu">Bank of America</field>
|
||||
<field name="manu_id_s">boa</field>
|
||||
<field name="cat">currency</field>
|
||||
<field name="features">Coins and notes</field>
|
||||
<field name="price_c">1,USD</field>
|
||||
<field name="inStock">true</field>
|
||||
</doc>
|
||||
|
||||
<doc>
|
||||
<field name="id">EUR</field>
|
||||
<field name="name">One Euro</field>
|
||||
<field name="manu">European Union</field>
|
||||
<field name="manu_id_s">eu</field>
|
||||
<field name="cat">currency</field>
|
||||
<field name="features">Coins and notes</field>
|
||||
<field name="price_c">1,EUR</field>
|
||||
<field name="inStock">true</field>
|
||||
</doc>
|
||||
|
||||
<doc>
|
||||
<field name="id">GBP</field>
|
||||
<field name="name">One British Pound</field>
|
||||
<field name="manu">U.K.</field>
|
||||
<field name="manu_id_s">uk</field>
|
||||
<field name="cat">currency</field>
|
||||
<field name="features">Coins and notes</field>
|
||||
<field name="price_c">1,GBP</field>
|
||||
<field name="inStock">true</field>
|
||||
</doc>
|
||||
|
||||
<doc>
|
||||
<field name="id">NOK</field>
|
||||
<field name="name">One Krone</field>
|
||||
<field name="manu">Bank of Norway</field>
|
||||
<field name="manu_id_s">nor</field>
|
||||
<field name="cat">currency</field>
|
||||
<field name="features">Coins and notes</field>
|
||||
<field name="price_c">1,NOK</field>
|
||||
<field name="inStock">true</field>
|
||||
</doc>
|
||||
|
||||
</add>
|
||||
|
34
spec/test_folder/sub2/monitor.xml
Normal file
34
spec/test_folder/sub2/monitor.xml
Normal file
@ -0,0 +1,34 @@
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<add><doc>
|
||||
<field name="id">3007WFP</field>
|
||||
<field name="name">Dell Widescreen UltraSharp 3007WFP</field>
|
||||
<field name="manu">Dell, Inc.</field>
|
||||
<!-- Join -->
|
||||
<field name="manu_id_s">dell</field>
|
||||
<field name="cat">electronics and computer1</field>
|
||||
<field name="features">30" TFT active matrix LCD, 2560 x 1600, .25mm dot pitch, 700:1 contrast</field>
|
||||
<field name="includes">USB cable</field>
|
||||
<field name="weight">401.6</field>
|
||||
<field name="price">2199.0</field>
|
||||
<field name="popularity">6</field>
|
||||
<field name="inStock">true</field>
|
||||
<!-- Buffalo store -->
|
||||
<field name="store">43.17614,-90.57341</field>
|
||||
</doc></add>
|
||||
|
38
spec/test_folder/sub2/sd500.xml
Normal file
38
spec/test_folder/sub2/sd500.xml
Normal file
@ -0,0 +1,38 @@
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<add><doc>
|
||||
<field name="id">9885A004</field>
|
||||
<field name="name">Canon PowerShot SD500</field>
|
||||
<field name="manu">Canon Inc.</field>
|
||||
<!-- Join -->
|
||||
<field name="manu_id_s">canon</field>
|
||||
<field name="cat">electronics</field>
|
||||
<field name="cat">camera</field>
|
||||
<field name="features">3x zoop, 7.1 megapixel Digital ELPH</field>
|
||||
<field name="features">movie clips up to 640x480 @30 fps</field>
|
||||
<field name="features">2.0" TFT LCD, 118,000 pixels</field>
|
||||
<field name="features">built in flash, red-eye reduction</field>
|
||||
<field name="includes">32MB SD card, USB cable, AV cable, battery</field>
|
||||
<field name="weight">6.4</field>
|
||||
<field name="price">329.95</field>
|
||||
<field name="popularity">7</field>
|
||||
<field name="inStock">true</field>
|
||||
<field name="manufacturedate_dt">2006-02-13T15:26:37Z</field>
|
||||
<!-- Buffalo store -->
|
||||
<field name="store">45.19614,-93.90341</field>
|
||||
</doc></add>
|
3
spec/test_folder/sub2/sub_sub1/more_books.jsonl
Normal file
3
spec/test_folder/sub2/sub_sub1/more_books.jsonl
Normal file
@ -0,0 +1,3 @@
|
||||
{"id":"0060248025","name":"Falling Up","inStock": true,"author": "Shel Silverstein"}
|
||||
{"id":"0679805273","name":"Oh, The Places You'll Go","inStock": true,"author": "Dr. Seuss"}
|
||||
|
43
spec/test_folder/sub2/sub_sub1/mp500.xml
Normal file
43
spec/test_folder/sub2/sub_sub1/mp500.xml
Normal file
@ -0,0 +1,43 @@
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<add><doc>
|
||||
<field name="id">0579B002</field>
|
||||
<field name="name">Canon PIXMA MP500 All-In-One Photo Printer</field>
|
||||
<field name="manu">Canon Inc.</field>
|
||||
<!-- Join -->
|
||||
<field name="manu_id_s">canon</field>
|
||||
<field name="cat">electronics</field>
|
||||
<field name="cat">multifunction printer</field>
|
||||
<field name="cat">printer</field>
|
||||
<field name="cat">scanner</field>
|
||||
<field name="cat">copier</field>
|
||||
<field name="features">Multifunction ink-jet color photo printer</field>
|
||||
<field name="features">Flatbed scanner, optical scan resolution of 1,200 x 2,400 dpi</field>
|
||||
<field name="features">2.5" color LCD preview screen</field>
|
||||
<field name="features">Duplex Copying</field>
|
||||
<field name="features">Printing speed up to 29ppm black, 19ppm color</field>
|
||||
<field name="features">Hi-Speed USB</field>
|
||||
<field name="features">memory card: CompactFlash, Micro Drive, SmartMedia, Memory Stick, Memory Stick Pro, SD Card, and MultiMediaCard</field>
|
||||
<field name="weight">352.0</field>
|
||||
<field name="price">179.99</field>
|
||||
<field name="popularity">6</field>
|
||||
<field name="inStock">true</field>
|
||||
<!-- Buffalo store -->
|
||||
<field name="store">45.19214,-93.89941</field>
|
||||
</doc></add>
|
||||
|
BIN
spec/test_folder/sub2/sub_sub1/post.jar
Normal file
BIN
spec/test_folder/sub2/sub_sub1/post.jar
Normal file
Binary file not shown.
93
spec/test_folder/test_utf8.sh
vendored
Executable file
93
spec/test_folder/test_utf8.sh
vendored
Executable file
@ -0,0 +1,93 @@
|
||||
#!/bin/sh
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
#Test script to tell if the server is accepting UTF-8
|
||||
#The python writer currently escapes non-ascii chars, so it's good for testing
|
||||
|
||||
SOLR_URL=http://localhost:8983/solr
|
||||
|
||||
if [ ! -z $1 ]; then
|
||||
SOLR_URL=$1
|
||||
fi
|
||||
|
||||
curl "$SOLR_URL/select?q=hello¶ms=explicit&wt=python" 2> /dev/null | grep 'hello' > /dev/null 2>&1
|
||||
if [ $? = 0 ]; then
|
||||
echo "Solr server is up."
|
||||
else
|
||||
echo "ERROR: Could not curl to Solr - is curl installed? Is Solr not running?"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
curl "$SOLR_URL/select?q=h%C3%A9llo&echoParams=explicit&wt=python" 2> /dev/null | grep 'h\\u00e9llo' > /dev/null 2>&1
|
||||
if [ $? = 0 ]; then
|
||||
echo "HTTP GET is accepting UTF-8"
|
||||
else
|
||||
echo "ERROR: HTTP GET is not accepting UTF-8"
|
||||
fi
|
||||
|
||||
curl $SOLR_URL/select --data-binary 'q=h%C3%A9llo&echoParams=explicit&wt=python' -H 'Content-type:application/x-www-form-urlencoded; charset=UTF-8' 2> /dev/null | grep 'h\\u00e9llo' > /dev/null 2>&1
|
||||
if [ $? = 0 ]; then
|
||||
echo "HTTP POST is accepting UTF-8"
|
||||
else
|
||||
echo "ERROR: HTTP POST is not accepting UTF-8"
|
||||
fi
|
||||
|
||||
curl $SOLR_URL/select --data-binary 'q=h%C3%A9llo&echoParams=explicit&wt=python' 2> /dev/null | grep 'h\\u00e9llo' > /dev/null 2>&1
|
||||
if [ $? = 0 ]; then
|
||||
echo "HTTP POST defaults to UTF-8"
|
||||
else
|
||||
echo "HTTP POST does not default to UTF-8"
|
||||
fi
|
||||
|
||||
|
||||
#A unicode character outside of the BMP (a circle with an x inside)
|
||||
CHAR="𐌈"
|
||||
CODEPOINT='0x10308'
|
||||
#URL encoded UTF8 of the codepoint
|
||||
UTF8_Q='%F0%90%8C%88'
|
||||
#expected return of the python writer (currently uses UTF-16 surrogates)
|
||||
EXPECTED='\\ud800\\udf08'
|
||||
|
||||
curl "$SOLR_URL/select?q=$UTF8_Q&echoParams=explicit&wt=python" 2> /dev/null | grep $EXPECTED > /dev/null 2>&1
|
||||
if [ $? = 0 ]; then
|
||||
echo "HTTP GET is accepting UTF-8 beyond the basic multilingual plane"
|
||||
else
|
||||
echo "ERROR: HTTP GET is not accepting UTF-8 beyond the basic multilingual plane"
|
||||
fi
|
||||
|
||||
curl $SOLR_URL/select --data-binary "q=$UTF8_Q&echoParams=explicit&wt=python" -H 'Content-type:application/x-www-form-urlencoded; charset=UTF-8' 2> /dev/null | grep $EXPECTED > /dev/null 2>&1
|
||||
if [ $? = 0 ]; then
|
||||
echo "HTTP POST is accepting UTF-8 beyond the basic multilingual plane"
|
||||
else
|
||||
echo "ERROR: HTTP POST is not accepting UTF-8 beyond the basic multilingual plane"
|
||||
fi
|
||||
|
||||
curl "$SOLR_URL/select?q=$UTF8_Q&echoParams=explicit&wt=python" --data-binary '' 2> /dev/null | grep $EXPECTED > /dev/null 2>&1
|
||||
if [ $? = 0 ]; then
|
||||
echo "HTTP POST + URL params is accepting UTF-8 beyond the basic multilingual plane"
|
||||
else
|
||||
echo "ERROR: HTTP POST + URL params is not accepting UTF-8 beyond the basic multilingual plane"
|
||||
fi
|
||||
|
||||
#curl "$SOLR_URL/select?q=$UTF8_Q&echoParams=explicit" 2> /dev/null | od -tx1 -w1000 | sed 's/ //g' | grep 'f4808198' > /dev/null 2>&1
|
||||
curl "$SOLR_URL/select?q=$UTF8_Q&echoParams=explicit" 2> /dev/null | grep "$CHAR" > /dev/null 2>&1
|
||||
if [ $? = 0 ]; then
|
||||
echo "Response correctly returns UTF-8 beyond the basic multilingual plane"
|
||||
else
|
||||
echo "ERROR: Response can't return UTF-8 beyond the basic multilingual plane"
|
||||
fi
|
||||
|
||||
|
42
spec/test_folder/utf8-example.xml
vendored
Normal file
42
spec/test_folder/utf8-example.xml
vendored
Normal file
@ -0,0 +1,42 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<!--
|
||||
After posting this to Solr with bin/post, searching for "êâîôû" from
|
||||
the solr/admin/ search page must return this document.
|
||||
-->
|
||||
|
||||
<add>
|
||||
<doc>
|
||||
<field name="id">UTF8TEST</field>
|
||||
<field name="name">Test with some UTF-8 encoded characters</field>
|
||||
<field name="manu">Apache Software Foundation</field>
|
||||
<field name="cat">software</field>
|
||||
<field name="cat">search</field>
|
||||
<field name="features">No accents here</field>
|
||||
<field name="features">This is an e acute: é</field>
|
||||
<field name="features">eaiou with circumflexes: êâîôû</field>
|
||||
<field name="features">eaiou with umlauts: ëäïöü</field>
|
||||
<field name="features">tag with escaped chars: <nicetag/></field>
|
||||
<field name="features">escaped ampersand: Bonnie & Clyde</field>
|
||||
<field name="features">Outside the BMP:𐌈 codepoint=10308, a circle with an x inside. UTF8=f0908c88 UTF16=d800 df08</field>
|
||||
<field name="price">0.0</field>
|
||||
<field name="inStock">true</field>
|
||||
</doc>
|
||||
</add>
|
||||
|
62
spec/test_folder/vidcard.xml
vendored
Normal file
62
spec/test_folder/vidcard.xml
vendored
Normal file
@ -0,0 +1,62 @@
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<add>
|
||||
<doc>
|
||||
<field name="id">EN7800GTX/2DHTV/256M</field>
|
||||
<field name="name">ASUS Extreme N7800GTX/2DHTV (256 MB)</field>
|
||||
<!-- Denormalized -->
|
||||
<field name="manu">ASUS Computer Inc.</field>
|
||||
<!-- Join -->
|
||||
<field name="manu_id_s">asus</field>
|
||||
<field name="cat">electronics</field>
|
||||
<field name="cat">graphics card</field>
|
||||
<field name="features">NVIDIA GeForce 7800 GTX GPU/VPU clocked at 486MHz</field>
|
||||
<field name="features">256MB GDDR3 Memory clocked at 1.35GHz</field>
|
||||
<field name="features">PCI Express x16</field>
|
||||
<field name="features">Dual DVI connectors, HDTV out, video input</field>
|
||||
<field name="features">OpenGL 2.0, DirectX 9.0</field>
|
||||
<field name="weight">16.0</field>
|
||||
<field name="price">479.95</field>
|
||||
<field name="popularity">7</field>
|
||||
<field name="store">40.7143,-74.006</field>
|
||||
<field name="inStock">false</field>
|
||||
<field name="manufacturedate_dt">2006-02-13T15:26:37Z/DAY</field>
|
||||
</doc>
|
||||
<!-- yes, you can add more than one document at a time -->
|
||||
<doc>
|
||||
<field name="id">100-435805</field>
|
||||
<field name="name">ATI Radeon X1900 XTX 512 MB PCIE Video Card</field>
|
||||
<field name="manu">ATI Technologies</field>
|
||||
<!-- Join -->
|
||||
<field name="manu_id_s">ati</field>
|
||||
<field name="cat">electronics</field>
|
||||
<field name="cat">graphics card</field>
|
||||
<field name="features">ATI RADEON X1900 GPU/VPU clocked at 650MHz</field>
|
||||
<field name="features">512MB GDDR3 SDRAM clocked at 1.55GHz</field>
|
||||
<field name="features">PCI Express x16</field>
|
||||
<field name="features">dual DVI, HDTV, svideo, composite out</field>
|
||||
<field name="features">OpenGL 2.0, DirectX 9.0</field>
|
||||
<field name="weight">48.0</field>
|
||||
<field name="price">649.99</field>
|
||||
<field name="popularity">7</field>
|
||||
<field name="inStock">false</field>
|
||||
<field name="manufacturedate_dt">2006-02-13T15:26:37Z/DAY</field>
|
||||
<!-- NYC store -->
|
||||
<field name="store">40.7143,-74.006</field>
|
||||
</doc>
|
||||
</add>
|
107
storage.py
Normal file
107
storage.py
Normal file
@ -0,0 +1,107 @@
|
||||
import sqlite3
|
||||
import os
|
||||
|
||||
|
||||
class DuplicateDirectoryException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class Directory:
|
||||
"""
|
||||
Data structure to hold directory information
|
||||
"""
|
||||
def __init__(self, path: str, enabled: bool, options: list):
|
||||
self.path = path
|
||||
self.enabled = enabled
|
||||
self.options = options
|
||||
|
||||
def __str__(self):
|
||||
return self.path + " | enabled: " + str(self.enabled) + " | opts: " + str(self.options)
|
||||
|
||||
|
||||
class LocalStorage:
|
||||
"""
|
||||
Manages storage of application data to disk.
|
||||
Could be refactored into a abstract class to switch from SQLite3 to something else
|
||||
"""
|
||||
|
||||
cache_outdated = True
|
||||
"""Static variable that indicates that the database was changed since the last time it was cached in memory"""
|
||||
|
||||
db_path = "../local_storage.db"
|
||||
|
||||
def __init__(self):
|
||||
self.cached_dirs = {}
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def init_db(script_path):
|
||||
"""Creates a blank database. Overwrites the old one"""
|
||||
if os.path.isfile(LocalStorage.db_path):
|
||||
os.remove(LocalStorage.db_path)
|
||||
|
||||
conn = sqlite3.connect(LocalStorage.db_path)
|
||||
c = conn.cursor()
|
||||
with open(script_path, "r") as f:
|
||||
c.executescript(f.read())
|
||||
|
||||
conn.commit()
|
||||
c.close()
|
||||
conn.close()
|
||||
|
||||
def save_directory(self, directory: Directory):
|
||||
"""
|
||||
Save directory to storage
|
||||
:param directory: Directory to save
|
||||
:return: None
|
||||
"""
|
||||
|
||||
LocalStorage.cache_outdated = True
|
||||
|
||||
conn = sqlite3.connect(LocalStorage.db_path)
|
||||
c = conn.cursor()
|
||||
c.execute("PRAGMA FOREIGN_KEYS = ON;")
|
||||
try:
|
||||
c.execute("INSERT INTO Directory (path, enabled) VALUES (?, ?)", (directory.path, directory.enabled))
|
||||
c.execute("SELECT last_insert_rowid()")
|
||||
|
||||
dir_id = c.fetchone()[0]
|
||||
|
||||
for opt in directory.options:
|
||||
conn.execute("INSERT INTO Option (name, directory_id) VALUES (?, ?)", (opt, dir_id))
|
||||
|
||||
conn.commit()
|
||||
except sqlite3.IntegrityError:
|
||||
raise DuplicateDirectoryException("Duplicate directory path: " + directory.path)
|
||||
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
def dirs(self):
|
||||
|
||||
if LocalStorage.cache_outdated:
|
||||
|
||||
self.cached_dirs = {}
|
||||
|
||||
conn = sqlite3.connect(LocalStorage.db_path)
|
||||
c = conn.cursor()
|
||||
c.execute("SELECT id, path, enabled FROM Directory")
|
||||
db_directories = c.fetchall()
|
||||
c.execute("SELECT name, directory_id FROM Option")
|
||||
db_options = c.fetchall()
|
||||
|
||||
for db_dir in db_directories:
|
||||
|
||||
options = []
|
||||
directory = Directory(db_dir[1], db_dir[2], options)
|
||||
|
||||
for db_opt in db_options:
|
||||
if db_opt[1] == db_dir[0]:
|
||||
options.append(db_opt[0])
|
||||
|
||||
self.cached_dirs[directory.path] = directory
|
||||
LocalStorage.cache_outdated = False
|
||||
return self.cached_dirs
|
||||
|
||||
else:
|
||||
return self.cached_dirs
|
10
templates/layout.html
Normal file
10
templates/layout.html
Normal file
@ -0,0 +1,10 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>Layout Title</title>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
</body>
|
||||
</html>
|
129
webserver.py
129
webserver.py
@ -1,129 +0,0 @@
|
||||
from flask import Flask, render_template, send_file, request
|
||||
import pysolr
|
||||
import mimetypes
|
||||
import requests
|
||||
import json
|
||||
from PIL import Image
|
||||
import os
|
||||
|
||||
SOLR_URL = "http://localhost:8983/solr/test/"
|
||||
|
||||
solr = pysolr.Solr(SOLR_URL, timeout=10)
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
|
||||
class Document:
|
||||
def __init__(self, doc_id, name, path, size, md5):
|
||||
self.doc_id = doc_id
|
||||
self.name = name
|
||||
self.path = path
|
||||
self.size = size
|
||||
self.md5 = md5
|
||||
|
||||
|
||||
class ImageDocument(Document):
|
||||
def __init__(self, doc_id, name, path, size, md5):
|
||||
super().__init__(doc_id, name, path, size, md5)
|
||||
self.type = "image"
|
||||
|
||||
|
||||
class AudioClipDocument(Document):
|
||||
def __init__(self, doc_id, name, path, size, md5):
|
||||
super().__init__(doc_id, name, path, size, md5)
|
||||
self.type = "audio"
|
||||
|
||||
|
||||
def get_document(id):
|
||||
|
||||
response = requests.get(SOLR_URL + "get?id=" + id)
|
||||
|
||||
return json.loads(response.text)["doc"]
|
||||
|
||||
|
||||
def make_thumb(doc):
|
||||
size = (1024, 1024)
|
||||
|
||||
thumb_path = "thumbnails/" + doc["id"]
|
||||
|
||||
if not os.path.exists(thumb_path):
|
||||
|
||||
file_path = doc["path"][0] + "/" + doc["name"][0]
|
||||
|
||||
if doc["width"][0] > size[0]:
|
||||
|
||||
image = Image.open(file_path)
|
||||
image.thumbnail(size, Image.ANTIALIAS)
|
||||
|
||||
if image.mode == "RGB":
|
||||
image.save(thumb_path, "JPEG")
|
||||
elif image.mode == "RGBA":
|
||||
image.save(thumb_path, "PNG")
|
||||
else:
|
||||
image = image.convert("RGB")
|
||||
image.save(thumb_path, "JPEG")
|
||||
else:
|
||||
print("Skipping thumbnail")
|
||||
os.symlink(file_path, thumb_path)
|
||||
|
||||
return "thumbnails/" + doc["id"]
|
||||
|
||||
|
||||
@app.route("/search/")
|
||||
def search():
|
||||
|
||||
query = request.args.get("query")
|
||||
page = int(request.args.get("page"))
|
||||
per_page = int(request.args.get("per_page"))
|
||||
|
||||
results = solr.search(query, None, rows=per_page, start=per_page * page)
|
||||
|
||||
docs = []
|
||||
for r in results:
|
||||
|
||||
if "mime" in r:
|
||||
mime_type = r["mime"][0]
|
||||
else:
|
||||
mime_type = ""
|
||||
|
||||
if mime_type.startswith("image"):
|
||||
docs.append(ImageDocument(r["id"], r["name"][0], r["path"][0], r["size"], r["md5"]))
|
||||
|
||||
elif mime_type.startswith("audio"):
|
||||
docs.append(AudioClipDocument(r["id"], r["name"][0], r["path"][0], r["size"], r["md5"]))
|
||||
|
||||
return render_template("search.html", docs=docs)
|
||||
|
||||
|
||||
@app.route("/")
|
||||
def index():
|
||||
return render_template("index.html")
|
||||
|
||||
|
||||
@app.route("/files/<id>/")
|
||||
def files(id):
|
||||
|
||||
doc = get_document(id)
|
||||
|
||||
if doc is not None:
|
||||
file_path = doc["path"][0] + "/" + doc["name"][0]
|
||||
return send_file(file_path, mimetype=mimetypes.guess_type(file_path)[0])
|
||||
else:
|
||||
return "File not found"
|
||||
|
||||
|
||||
@app.route("/thumbs/<doc_id>/")
|
||||
def thumbs(doc_id):
|
||||
|
||||
doc = get_document(doc_id)
|
||||
|
||||
if doc is not None:
|
||||
|
||||
thumb_path = make_thumb(doc)
|
||||
|
||||
return send_file("thumbnails/" + doc_id, mimetype=mimetypes.guess_type(thumb_path)[0])
|
||||
else:
|
||||
return "File not found"
|
||||
|
||||
|
||||
app.run("0.0.0.0", 8080)
|
Loading…
x
Reference in New Issue
Block a user