mirror of
https://github.com/simon987/Simple-Incremental-Search-Tool.git
synced 2025-04-19 18:16:45 +00:00
Added local storage for directories and generic file parser
This commit is contained in:
parent
09acdc762a
commit
fec23d40d9
1
.gitattributes
vendored
Normal file
1
.gitattributes
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
spec/test_folder/* linguist-vendored
|
200
crawler.py
200
crawler.py
@ -1,64 +1,152 @@
|
|||||||
import os
|
import os
|
||||||
import hashlib
|
import hashlib
|
||||||
import mimetypes
|
|
||||||
from PIL import Image
|
|
||||||
import simplejson
|
|
||||||
|
|
||||||
rootDir = "/home/simon/Documents"
|
|
||||||
|
|
||||||
|
|
||||||
# https://stackoverflow.com/questions/3431825/generating-an-md5-checksum-of-a-file
|
class Crawler:
|
||||||
def md5sum(filename, block_size=65536):
|
pass
|
||||||
hash = hashlib.md5()
|
|
||||||
with open(filename, "rb") as f:
|
|
||||||
for block in iter(lambda: f.read(block_size), b""):
|
|
||||||
hash.update(block)
|
|
||||||
return hash.hexdigest()
|
|
||||||
|
|
||||||
|
|
||||||
def crawl(root_dir):
|
class FileParser:
|
||||||
|
pass
|
||||||
docs = []
|
|
||||||
|
|
||||||
for root, subdirs, files in os.walk(root_dir):
|
|
||||||
|
|
||||||
print(root)
|
|
||||||
|
|
||||||
for filename in files:
|
|
||||||
full_path = os.path.join(root, filename)
|
|
||||||
|
|
||||||
doc = dict()
|
|
||||||
|
|
||||||
doc["md5"] = md5sum(os.path.join(root, filename))
|
|
||||||
doc["path"] = root
|
|
||||||
doc["name"] = filename
|
|
||||||
doc["size"] = os.path.getsize(full_path)
|
|
||||||
doc["mtime"] = int(os.path.getmtime(full_path))
|
|
||||||
|
|
||||||
mime_type = mimetypes.guess_type(full_path)[0]
|
|
||||||
|
|
||||||
if mime_type is not None:
|
|
||||||
|
|
||||||
doc["mime"] = mime_type
|
|
||||||
|
|
||||||
if mime_type.startswith("image"):
|
|
||||||
try:
|
|
||||||
width, height = Image.open(full_path).size
|
|
||||||
|
|
||||||
doc["width"] = width
|
|
||||||
doc["height"] = height
|
|
||||||
except OSError:
|
|
||||||
doc.pop('mime', None)
|
|
||||||
pass
|
|
||||||
except ValueError:
|
|
||||||
doc.pop('mime', None)
|
|
||||||
pass
|
|
||||||
|
|
||||||
docs.append(doc)
|
|
||||||
|
|
||||||
file = open("crawler.json", "w")
|
|
||||||
file.write(simplejson.dumps(docs))
|
|
||||||
file.close()
|
|
||||||
|
|
||||||
|
|
||||||
crawl(rootDir)
|
class CheckSumCalculator:
|
||||||
|
|
||||||
|
def checksum(self, path: str) -> str:
|
||||||
|
"""
|
||||||
|
Calculate the checksum of a file
|
||||||
|
:param path: path of the file
|
||||||
|
:return: checksum
|
||||||
|
"""
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
|
||||||
|
class Md5CheckSumCalculator(CheckSumCalculator):
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.name = "md5"
|
||||||
|
|
||||||
|
def checksum(self, path: str) -> str:
|
||||||
|
"""
|
||||||
|
Calculate the md5 checksum of a file
|
||||||
|
:param path: path of the file
|
||||||
|
:return: md5 checksum
|
||||||
|
"""
|
||||||
|
result = hashlib.md5()
|
||||||
|
|
||||||
|
with open(path, "rb") as f:
|
||||||
|
for block in iter(lambda: f.read(65536), b""):
|
||||||
|
result.update(block)
|
||||||
|
|
||||||
|
return result.hexdigest().upper()
|
||||||
|
|
||||||
|
|
||||||
|
class Sha1CheckSumCalculator(CheckSumCalculator):
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.name = "sha1"
|
||||||
|
|
||||||
|
def checksum(self, path: str) -> str:
|
||||||
|
"""
|
||||||
|
Calculate the sha1 checksum of a file
|
||||||
|
:param path: path of the file
|
||||||
|
:return: sha1 checksum
|
||||||
|
"""
|
||||||
|
result = hashlib.sha1()
|
||||||
|
|
||||||
|
with open(path, "rb") as f:
|
||||||
|
for block in iter(lambda: f.read(65536), b""):
|
||||||
|
result.update(block)
|
||||||
|
|
||||||
|
return result.hexdigest().upper()
|
||||||
|
|
||||||
|
|
||||||
|
class Sha256CheckSumCalculator(CheckSumCalculator):
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.name = "sha256"
|
||||||
|
|
||||||
|
def checksum(self, path: str) -> str:
|
||||||
|
"""
|
||||||
|
Calculate the sha256 checksum of a file
|
||||||
|
:param path: path of the file
|
||||||
|
:return: sha256 checksum
|
||||||
|
"""
|
||||||
|
result = hashlib.sha256()
|
||||||
|
|
||||||
|
with open(path, "rb") as f:
|
||||||
|
for block in iter(lambda: f.read(65536), b""):
|
||||||
|
result.update(block)
|
||||||
|
|
||||||
|
return result.hexdigest().upper()
|
||||||
|
|
||||||
|
|
||||||
|
class GenericFileParser(FileParser):
|
||||||
|
|
||||||
|
def __init__(self, checksum_calculators: list):
|
||||||
|
self.checksum_calculators = checksum_calculators
|
||||||
|
|
||||||
|
def parse(self, path: str) -> dict:
|
||||||
|
"""
|
||||||
|
Parse a generic file
|
||||||
|
:param path: path of the file to parse
|
||||||
|
:return: dict information about the file
|
||||||
|
"""
|
||||||
|
|
||||||
|
info = dict()
|
||||||
|
|
||||||
|
info["size"] = os.path.getsize(path)
|
||||||
|
info["name"] = os.path.splitext(path)[0]
|
||||||
|
|
||||||
|
for calculator in self.checksum_calculators:
|
||||||
|
info[calculator.name] = calculator.checksum(path)
|
||||||
|
|
||||||
|
return info
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# def crawl(root_dir: str) -> None:
|
||||||
|
# docs = []
|
||||||
|
#
|
||||||
|
# for root, dirs, files in os.walk(root_dir):
|
||||||
|
#
|
||||||
|
# print(root)
|
||||||
|
#
|
||||||
|
# for filename in files:
|
||||||
|
# full_path = os.path.join(root, filename)
|
||||||
|
#
|
||||||
|
# doc = dict()
|
||||||
|
#
|
||||||
|
# doc["md5"] = md5sum(full_path)
|
||||||
|
# doc["path"] = root
|
||||||
|
# doc["name"] = filename
|
||||||
|
# doc["size"] = os.path.getsize(full_path)
|
||||||
|
# doc["mtime"] = int(os.path.getmtime(full_path))
|
||||||
|
#
|
||||||
|
# mime_type = mimetypes.guess_type(full_path)[0]
|
||||||
|
#
|
||||||
|
# if mime_type is not None:
|
||||||
|
#
|
||||||
|
# doc["mime"] = mime_type
|
||||||
|
#
|
||||||
|
# if mime_type.startswith("image"):
|
||||||
|
# try:
|
||||||
|
# width, height = Image.open(full_path).size
|
||||||
|
#
|
||||||
|
# doc["width"] = width
|
||||||
|
# doc["height"] = height
|
||||||
|
# except OSError:
|
||||||
|
# doc.pop('mime', None)
|
||||||
|
# pass
|
||||||
|
# except ValueError:
|
||||||
|
# doc.pop('mime', None)
|
||||||
|
# pass
|
||||||
|
#
|
||||||
|
# docs.append(doc)
|
||||||
|
#
|
||||||
|
# file = open("crawler.json", "w")
|
||||||
|
# file.write(simplejson.dumps(docs))
|
||||||
|
# file.close()
|
||||||
|
#
|
||||||
|
#
|
40
database.sql
Normal file
40
database.sql
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
PRAGMA FOREIGN_KEYS = ON;
|
||||||
|
|
||||||
|
-- Represents a directory and its sub-directories
|
||||||
|
CREATE TABLE Directory (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
path TEXT UNIQUE,
|
||||||
|
enabled BOOLEAN
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Represents a queued task for crawling a Directory or generating thumnails
|
||||||
|
CREATE TABLE Task (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
directory_id INTEGER,
|
||||||
|
task_type INTEGER,
|
||||||
|
completed BOOLEAN DEFAULT 0,
|
||||||
|
completed_time DATETIME,
|
||||||
|
FOREIGN KEY (directory_id) REFERENCES Directory(id)
|
||||||
|
);
|
||||||
|
|
||||||
|
-- You can set an option on a directory to change the crawler's behavior
|
||||||
|
CREATE TABLE Option (
|
||||||
|
name STRING,
|
||||||
|
directory_id INTEGER,
|
||||||
|
FOREIGN KEY (directory_id) REFERENCES Directory(id),
|
||||||
|
PRIMARY KEY (name, directory_id)
|
||||||
|
);
|
||||||
|
|
||||||
|
-- User accounts
|
||||||
|
CREATE TABLE User (
|
||||||
|
username TEXT PRIMARY KEY,
|
||||||
|
password TEXT,
|
||||||
|
is_admin BOOLEAN
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE User_canRead_Directory (
|
||||||
|
username TEXT,
|
||||||
|
directory_id INTEGER,
|
||||||
|
PRIMARY KEY (username, directory_id)
|
||||||
|
|
||||||
|
)
|
2
requirements.txt
Normal file
2
requirements.txt
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
PIL
|
||||||
|
simplejson
|
135
run.py
Normal file
135
run.py
Normal file
@ -0,0 +1,135 @@
|
|||||||
|
from flask import Flask, render_template, send_file, request
|
||||||
|
import pysolr
|
||||||
|
import mimetypes
|
||||||
|
import requests
|
||||||
|
import json
|
||||||
|
from PIL import Image
|
||||||
|
import os
|
||||||
|
|
||||||
|
SOLR_URL = "http://localhost:8983/solr/test/"
|
||||||
|
|
||||||
|
solr = pysolr.Solr(SOLR_URL, timeout=10)
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
#
|
||||||
|
# class Document:
|
||||||
|
# def __init__(self, doc_id, name, path, size, md5):
|
||||||
|
# self.doc_id = doc_id
|
||||||
|
# self.name = name
|
||||||
|
# self.path = path
|
||||||
|
# self.size = size
|
||||||
|
# self.md5 = md5
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# class ImageDocument(Document):
|
||||||
|
# def __init__(self, doc_id, name, path, size, md5):
|
||||||
|
# super().__init__(doc_id, name, path, size, md5)
|
||||||
|
# self.type = "image"
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# class AudioClipDocument(Document):
|
||||||
|
# def __init__(self, doc_id, name, path, size, md5):
|
||||||
|
# super().__init__(doc_id, name, path, size, md5)
|
||||||
|
# self.type = "audio"
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# def get_document(id):
|
||||||
|
#
|
||||||
|
# response = requests.get(SOLR_URL + "get?id=" + id)
|
||||||
|
#
|
||||||
|
# return json.loads(response.text)["doc"]
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# def make_thumb(doc):
|
||||||
|
# size = (1024, 1024)
|
||||||
|
#
|
||||||
|
# thumb_path = "thumbnails/" + doc["id"]
|
||||||
|
#
|
||||||
|
# if not os.path.exists(thumb_path):
|
||||||
|
#
|
||||||
|
# file_path = doc["path"][0] + "/" + doc["name"][0]
|
||||||
|
#
|
||||||
|
# if doc["width"][0] > size[0]:
|
||||||
|
#
|
||||||
|
# image = Image.open(file_path)
|
||||||
|
# image.thumbnail(size, Image.ANTIALIAS)
|
||||||
|
#
|
||||||
|
# if image.mode == "RGB":
|
||||||
|
# image.save(thumb_path, "JPEG")
|
||||||
|
# elif image.mode == "RGBA":
|
||||||
|
# image.save(thumb_path, "PNG")
|
||||||
|
# else:
|
||||||
|
# image = image.convert("RGB")
|
||||||
|
# image.save(thumb_path, "JPEG")
|
||||||
|
# else:
|
||||||
|
# print("Skipping thumbnail")
|
||||||
|
# os.symlink(file_path, thumb_path)
|
||||||
|
#
|
||||||
|
# return "thumbnails/" + doc["id"]
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# @app.route("/search/")
|
||||||
|
# def search():
|
||||||
|
#
|
||||||
|
# query = request.args.get("query")
|
||||||
|
# page = int(request.args.get("page"))
|
||||||
|
# per_page = int(request.args.get("per_page"))
|
||||||
|
#
|
||||||
|
# results = solr.search(query, None, rows=per_page, start=per_page * page)
|
||||||
|
#
|
||||||
|
# docs = []
|
||||||
|
# for r in results:
|
||||||
|
#
|
||||||
|
# if "mime" in r:
|
||||||
|
# mime_type = r["mime"][0]
|
||||||
|
# else:
|
||||||
|
# mime_type = ""
|
||||||
|
#
|
||||||
|
# if mime_type.startswith("image"):
|
||||||
|
# docs.append(ImageDocument(r["id"], r["name"][0], r["path"][0], r["size"], r["md5"]))
|
||||||
|
#
|
||||||
|
# elif mime_type.startswith("audio"):
|
||||||
|
# docs.append(AudioClipDocument(r["id"], r["name"][0], r["path"][0], r["size"], r["md5"]))
|
||||||
|
#
|
||||||
|
# return render_template("search.html", docs=docs)
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# @app.route("/")
|
||||||
|
# def index():
|
||||||
|
# return render_template("index.html")
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# @app.route("/files/<id>/")
|
||||||
|
# def files(id):
|
||||||
|
#
|
||||||
|
# doc = get_document(id)
|
||||||
|
#
|
||||||
|
# if doc is not None:
|
||||||
|
# file_path = doc["path"][0] + "/" + doc["name"][0]
|
||||||
|
# return send_file(file_path, mimetype=mimetypes.guess_type(file_path)[0])
|
||||||
|
# else:
|
||||||
|
# return "File not found"
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# @app.route("/thumbs/<doc_id>/")
|
||||||
|
# def thumbs(doc_id):
|
||||||
|
#
|
||||||
|
# doc = get_document(doc_id)
|
||||||
|
#
|
||||||
|
# if doc is not None:
|
||||||
|
#
|
||||||
|
# thumb_path = make_thumb(doc)
|
||||||
|
#
|
||||||
|
# return send_file("thumbnails/" + doc_id, mimetype=mimetypes.guess_type(thumb_path)[0])
|
||||||
|
# else:
|
||||||
|
# return "File not found"
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/")
|
||||||
|
def tmp_route():
|
||||||
|
return "test"
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
app.run("0.0.0.0", 8080)
|
4
setupDb.sh
Executable file
4
setupDb.sh
Executable file
@ -0,0 +1,4 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
rm test.db
|
||||||
|
sqlite3 local_storage.db -init "database.sql"
|
10
spec/Crawler_spec.py
Normal file
10
spec/Crawler_spec.py
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
from unittest import TestCase
|
||||||
|
|
||||||
|
from crawler import Crawler
|
||||||
|
|
||||||
|
|
||||||
|
class CrawlerTest(TestCase):
|
||||||
|
|
||||||
|
def test_dir_walk(self):
|
||||||
|
c = Crawler()
|
||||||
|
|
131
spec/FileParser_spec.py
Normal file
131
spec/FileParser_spec.py
Normal file
@ -0,0 +1,131 @@
|
|||||||
|
import os
|
||||||
|
from unittest import TestCase
|
||||||
|
|
||||||
|
from crawler import GenericFileParser, Md5CheckSumCalculator, Sha1CheckSumCalculator, Sha256CheckSumCalculator
|
||||||
|
|
||||||
|
|
||||||
|
class GenericFileParserTest(TestCase):
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
if os.path.exists("test_parse"):
|
||||||
|
os.remove("test_parse")
|
||||||
|
|
||||||
|
test_file = open("test_parse", "w")
|
||||||
|
test_file.write("12345678")
|
||||||
|
test_file.close()
|
||||||
|
|
||||||
|
self.parser = GenericFileParser([Md5CheckSumCalculator()])
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
os.remove("test_parse")
|
||||||
|
|
||||||
|
def test_parse_size(self):
|
||||||
|
result = self.parser.parse("test_parse")
|
||||||
|
|
||||||
|
self.assertEqual(result["size"], 8)
|
||||||
|
|
||||||
|
def test_parse_name(self):
|
||||||
|
result = self.parser.parse("test_parse")
|
||||||
|
|
||||||
|
self.assertEqual(result["name"], "test_parse")
|
||||||
|
|
||||||
|
def test_parse_md5(self):
|
||||||
|
result = self.parser.parse("test_parse")
|
||||||
|
|
||||||
|
self.assertEqual(result["md5"], "25D55AD283AA400AF464C76D713C07AD")
|
||||||
|
|
||||||
|
|
||||||
|
class Md5CheckSumCalculatorTest(TestCase):
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
if os.path.exists("test_md5_1"):
|
||||||
|
os.remove("test_md5_1")
|
||||||
|
|
||||||
|
test_file = open("test_md5_1", "w")
|
||||||
|
test_file.write("789456123")
|
||||||
|
test_file.close()
|
||||||
|
|
||||||
|
if os.path.exists("test_md5_2"):
|
||||||
|
os.remove("test_md5_2")
|
||||||
|
|
||||||
|
test_file = open("test_md5_2", "w")
|
||||||
|
test_file.write("cj3w97n7RY378WRXEN68W7RExnw6nr8276b473824")
|
||||||
|
test_file.close()
|
||||||
|
|
||||||
|
self.calculator = Md5CheckSumCalculator()
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
os.remove("test_md5_1")
|
||||||
|
os.remove("test_md5_2")
|
||||||
|
|
||||||
|
def test_md5_checksum(self):
|
||||||
|
|
||||||
|
result = self.calculator.checksum("test_md5_1")
|
||||||
|
self.assertEqual(result, "9FAB6755CD2E8817D3E73B0978CA54A6")
|
||||||
|
|
||||||
|
result = self.calculator.checksum("test_md5_2")
|
||||||
|
self.assertEqual(result, "39A1AADE23E33A7F37C11C7FF9CDC9EC")
|
||||||
|
|
||||||
|
|
||||||
|
class Sha1CheckSumCalculatorTest(TestCase):
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
if os.path.exists("test_sha1_1"):
|
||||||
|
os.remove("test_sha1_1")
|
||||||
|
|
||||||
|
test_file = open("test_sha1_1", "w")
|
||||||
|
test_file.write("sxjkneycbu")
|
||||||
|
test_file.close()
|
||||||
|
|
||||||
|
if os.path.exists("test_sha1_2"):
|
||||||
|
os.remove("test_sha1_2")
|
||||||
|
|
||||||
|
test_file = open("test_sha1_2", "w")
|
||||||
|
test_file.write("xoimoqxy38e")
|
||||||
|
test_file.close()
|
||||||
|
|
||||||
|
self.calculator = Sha1CheckSumCalculator()
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
os.remove("test_sha1_1")
|
||||||
|
os.remove("test_sha1_2")
|
||||||
|
|
||||||
|
def test_md5_checksum(self):
|
||||||
|
|
||||||
|
result = self.calculator.checksum("test_sha1_1")
|
||||||
|
self.assertEqual(result, "A80315387730DB5743061F397EB66DE0DDAE19E5")
|
||||||
|
|
||||||
|
result = self.calculator.checksum("test_sha1_2")
|
||||||
|
self.assertEqual(result, "E7B5A2B6F6838E766A0BC7E558F640726D70A8D6")
|
||||||
|
|
||||||
|
|
||||||
|
class Sha256CheckSumCalculatorTest(TestCase):
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
if os.path.exists("test_sha256_1"):
|
||||||
|
os.remove("test_sha256_1")
|
||||||
|
|
||||||
|
test_file = open("test_sha256_1", "w")
|
||||||
|
test_file.write("eaur5t84nc7i")
|
||||||
|
test_file.close()
|
||||||
|
|
||||||
|
if os.path.exists("test_sha256_2"):
|
||||||
|
os.remove("test_sha256_2")
|
||||||
|
|
||||||
|
test_file = open("test_sha256_2", "w")
|
||||||
|
test_file.write("xkwerci47ixryw7r6wxadwd")
|
||||||
|
test_file.close()
|
||||||
|
|
||||||
|
self.calculator = Sha256CheckSumCalculator()
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
os.remove("test_sha256_1")
|
||||||
|
os.remove("test_sha256_2")
|
||||||
|
|
||||||
|
def test_md5_checksum(self):
|
||||||
|
|
||||||
|
result = self.calculator.checksum("test_sha256_1")
|
||||||
|
self.assertEqual(result, "DA7606DC763306B700685A71E2E72A2D95F1291209E5DA344B82DA2508FC27C5")
|
||||||
|
|
||||||
|
result = self.calculator.checksum("test_sha256_2")
|
||||||
|
self.assertEqual(result, "C39C7E0E7D84C9692F3C9C22E1EA0327DEBF1BF531B5738EEA8E79FE27EBC570")
|
47
spec/LocalStorage_spec.py
Normal file
47
spec/LocalStorage_spec.py
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
from unittest import TestCase
|
||||||
|
|
||||||
|
from storage import LocalStorage, Directory, DuplicateDirectoryException
|
||||||
|
|
||||||
|
|
||||||
|
class LocalStorageTest(TestCase):
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
|
||||||
|
s = LocalStorage()
|
||||||
|
s.init_db("../database.sql")
|
||||||
|
|
||||||
|
def test_save_and_retrieve_dir(self):
|
||||||
|
|
||||||
|
storage = LocalStorage()
|
||||||
|
|
||||||
|
d = Directory("/some/directory", True, ["opt1", "opt2", "opt3"])
|
||||||
|
|
||||||
|
storage.save_directory(d)
|
||||||
|
|
||||||
|
self.assertEqual(storage.dirs()["/some/directory"].enabled, True)
|
||||||
|
self.assertEqual(storage.dirs()["/some/directory"].options[0], "opt1")
|
||||||
|
|
||||||
|
def test_save_and_retrieve_dir_persistent(self):
|
||||||
|
|
||||||
|
s1 = LocalStorage()
|
||||||
|
|
||||||
|
d = Directory("/some/directory", True, ["opt1", "opt2", "opt3"])
|
||||||
|
|
||||||
|
s1.save_directory(d)
|
||||||
|
|
||||||
|
s2 = LocalStorage()
|
||||||
|
self.assertEqual(s2.dirs()["/some/directory"].enabled, True)
|
||||||
|
self.assertEqual(s2.dirs()["/some/directory"].options[0], "opt1")
|
||||||
|
|
||||||
|
def test_reject_duplicate_path(self):
|
||||||
|
|
||||||
|
s = LocalStorage()
|
||||||
|
|
||||||
|
d1 = Directory("/some/directory", True, ["opt1", "opt2"])
|
||||||
|
d2 = Directory("/some/directory", True, ["opt1", "opt2"])
|
||||||
|
|
||||||
|
s.save_directory(d1)
|
||||||
|
|
||||||
|
with self.assertRaises(DuplicateDirectoryException) as e:
|
||||||
|
s.save_directory(d2)
|
||||||
|
|
11
spec/test_folder/books.csv
vendored
Normal file
11
spec/test_folder/books.csv
vendored
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
id,cat,name,price,inStock,author,series_t,sequence_i,genre_s
|
||||||
|
0553573403,book,A Game of Thrones,7.99,true,George R.R. Martin,"A Song of Ice and Fire",1,fantasy
|
||||||
|
0553579908,book,A Clash of Kings,7.99,true,George R.R. Martin,"A Song of Ice and Fire",2,fantasy
|
||||||
|
055357342X,book,A Storm of Swords,7.99,true,George R.R. Martin,"A Song of Ice and Fire",3,fantasy
|
||||||
|
0553293354,book,Foundation,7.99,true,Isaac Asimov,Foundation Novels,1,scifi
|
||||||
|
0812521390,book,The Black Company,6.99,false,Glen Cook,The Chronicles of The Black Company,1,fantasy
|
||||||
|
0812550706,book,Ender's Game,6.99,true,Orson Scott Card,Ender,1,scifi
|
||||||
|
0441385532,book,Jhereg,7.95,false,Steven Brust,Vlad Taltos,1,fantasy
|
||||||
|
0380014300,book,Nine Princes In Amber,6.99,true,Roger Zelazny,the Chronicles of Amber,1,fantasy
|
||||||
|
0805080481,book,The Book of Three,5.99,true,Lloyd Alexander,The Chronicles of Prydain,1,fantasy
|
||||||
|
080508049X,book,The Black Cauldron,5.99,true,Lloyd Alexander,The Chronicles of Prydain,2,fantasy
|
|
51
spec/test_folder/books.json
vendored
Normal file
51
spec/test_folder/books.json
vendored
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"id" : "978-0641723445",
|
||||||
|
"cat" : ["book","hardcover"],
|
||||||
|
"name" : "The Lightning Thief",
|
||||||
|
"author" : "Rick Riordan",
|
||||||
|
"series_t" : "Percy Jackson and the Olympians",
|
||||||
|
"sequence_i" : 1,
|
||||||
|
"genre_s" : "fantasy",
|
||||||
|
"inStock" : true,
|
||||||
|
"price" : 12.50,
|
||||||
|
"pages_i" : 384
|
||||||
|
}
|
||||||
|
,
|
||||||
|
{
|
||||||
|
"id" : "978-1423103349",
|
||||||
|
"cat" : ["book","paperback"],
|
||||||
|
"name" : "The Sea of Monsters",
|
||||||
|
"author" : "Rick Riordan",
|
||||||
|
"series_t" : "Percy Jackson and the Olympians",
|
||||||
|
"sequence_i" : 2,
|
||||||
|
"genre_s" : "fantasy",
|
||||||
|
"inStock" : true,
|
||||||
|
"price" : 6.49,
|
||||||
|
"pages_i" : 304
|
||||||
|
}
|
||||||
|
,
|
||||||
|
{
|
||||||
|
"id" : "978-1857995879",
|
||||||
|
"cat" : ["book","paperback"],
|
||||||
|
"name" : "Sophie's World : The Greek Philosophers",
|
||||||
|
"author" : "Jostein Gaarder",
|
||||||
|
"sequence_i" : 1,
|
||||||
|
"genre_s" : "fantasy",
|
||||||
|
"inStock" : true,
|
||||||
|
"price" : 3.07,
|
||||||
|
"pages_i" : 64
|
||||||
|
}
|
||||||
|
,
|
||||||
|
{
|
||||||
|
"id" : "978-1933988177",
|
||||||
|
"cat" : ["book","paperback"],
|
||||||
|
"name" : "Lucene in Action, Second Edition",
|
||||||
|
"author" : "Michael McCandless",
|
||||||
|
"sequence_i" : 1,
|
||||||
|
"genre_s" : "IT",
|
||||||
|
"inStock" : true,
|
||||||
|
"price" : 30.50,
|
||||||
|
"pages_i" : 475
|
||||||
|
}
|
||||||
|
]
|
32
spec/test_folder/gb18030-example.xml
vendored
Normal file
32
spec/test_folder/gb18030-example.xml
vendored
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
<?xml version="1.0" encoding="GB18030"?>
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<add>
|
||||||
|
<doc>
|
||||||
|
<field name="id">GB18030TEST</field>
|
||||||
|
<field name="name">Test with some GB18030 encoded characters</field>
|
||||||
|
<field name="features">No accents here</field>
|
||||||
|
<field name="features">这是一个功能</field>
|
||||||
|
<field name="features">This is a feature (translated)</field>
|
||||||
|
<field name="features">这份文件是很有光泽</field>
|
||||||
|
<field name="features">This document is very shiny (translated)</field>
|
||||||
|
<field name="price">0.0</field>
|
||||||
|
<field name="inStock">true</field>
|
||||||
|
</doc>
|
||||||
|
</add>
|
||||||
|
|
56
spec/test_folder/hd.xml
vendored
Normal file
56
spec/test_folder/hd.xml
vendored
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<add>
|
||||||
|
<doc>
|
||||||
|
<field name="id">SP2514N</field>
|
||||||
|
<field name="name">Samsung SpinPoint P120 SP2514N - hard drive - 250 GB - ATA-133</field>
|
||||||
|
<field name="manu">Samsung Electronics Co. Ltd.</field>
|
||||||
|
<!-- Join -->
|
||||||
|
<field name="manu_id_s">samsung</field>
|
||||||
|
<field name="cat">electronics</field>
|
||||||
|
<field name="cat">hard drive</field>
|
||||||
|
<field name="features">7200RPM, 8MB cache, IDE Ultra ATA-133</field>
|
||||||
|
<field name="features">NoiseGuard, SilentSeek technology, Fluid Dynamic Bearing (FDB) motor</field>
|
||||||
|
<field name="price">92.0</field>
|
||||||
|
<field name="popularity">6</field>
|
||||||
|
<field name="inStock">true</field>
|
||||||
|
<field name="manufacturedate_dt">2006-02-13T15:26:37Z</field>
|
||||||
|
<!-- Near Oklahoma city -->
|
||||||
|
<field name="store">35.0752,-97.032</field>
|
||||||
|
</doc>
|
||||||
|
|
||||||
|
<doc>
|
||||||
|
<field name="id">6H500F0</field>
|
||||||
|
<field name="name">Maxtor DiamondMax 11 - hard drive - 500 GB - SATA-300</field>
|
||||||
|
<field name="manu">Maxtor Corp.</field>
|
||||||
|
<!-- Join -->
|
||||||
|
<field name="manu_id_s">maxtor</field>
|
||||||
|
<field name="cat">electronics</field>
|
||||||
|
<field name="cat">hard drive</field>
|
||||||
|
<field name="features">SATA 3.0Gb/s, NCQ</field>
|
||||||
|
<field name="features">8.5ms seek</field>
|
||||||
|
<field name="features">16MB cache</field>
|
||||||
|
<field name="price">350.0</field>
|
||||||
|
<field name="popularity">6</field>
|
||||||
|
<field name="inStock">true</field>
|
||||||
|
<!-- Buffalo store -->
|
||||||
|
<field name="store">45.17614,-93.87341</field>
|
||||||
|
<field name="manufacturedate_dt">2006-02-13T15:26:37Z</field>
|
||||||
|
</doc>
|
||||||
|
</add>
|
||||||
|
|
60
spec/test_folder/ipod_other.xml
vendored
Normal file
60
spec/test_folder/ipod_other.xml
vendored
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<add>
|
||||||
|
|
||||||
|
<doc>
|
||||||
|
<field name="id">F8V7067-APL-KIT</field>
|
||||||
|
<field name="name">Belkin Mobile Power Cord for iPod w/ Dock</field>
|
||||||
|
<field name="manu">Belkin</field>
|
||||||
|
<!-- Join -->
|
||||||
|
<field name="manu_id_s">belkin</field>
|
||||||
|
<field name="cat">electronics</field>
|
||||||
|
<field name="cat">connector</field>
|
||||||
|
<field name="features">car power adapter, white</field>
|
||||||
|
<field name="weight">4.0</field>
|
||||||
|
<field name="price">19.95</field>
|
||||||
|
<field name="popularity">1</field>
|
||||||
|
<field name="inStock">false</field>
|
||||||
|
<!-- Buffalo store -->
|
||||||
|
<field name="store">45.18014,-93.87741</field>
|
||||||
|
<field name="manufacturedate_dt">2005-08-01T16:30:25Z</field>
|
||||||
|
</doc>
|
||||||
|
|
||||||
|
<doc>
|
||||||
|
<field name="id">IW-02</field>
|
||||||
|
<field name="name">iPod & iPod Mini USB 2.0 Cable</field>
|
||||||
|
<field name="manu">Belkin</field>
|
||||||
|
<!-- Join -->
|
||||||
|
<field name="manu_id_s">belkin</field>
|
||||||
|
<field name="cat">electronics</field>
|
||||||
|
<field name="cat">connector</field>
|
||||||
|
<field name="features">car power adapter for iPod, white</field>
|
||||||
|
<field name="weight">2.0</field>
|
||||||
|
<field name="price">11.50</field>
|
||||||
|
<field name="popularity">1</field>
|
||||||
|
<field name="inStock">false</field>
|
||||||
|
<!-- San Francisco store -->
|
||||||
|
<field name="store">37.7752,-122.4232</field>
|
||||||
|
<field name="manufacturedate_dt">2006-02-14T23:55:59Z</field>
|
||||||
|
</doc>
|
||||||
|
|
||||||
|
|
||||||
|
</add>
|
||||||
|
|
||||||
|
|
||||||
|
|
40
spec/test_folder/ipod_video.xml
vendored
Normal file
40
spec/test_folder/ipod_video.xml
vendored
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<add><doc>
|
||||||
|
<field name="id">MA147LL/A</field>
|
||||||
|
<field name="name">Apple 60 GB iPod with Video Playback Black</field>
|
||||||
|
<field name="manu">Apple Computer Inc.</field>
|
||||||
|
<!-- Join -->
|
||||||
|
<field name="manu_id_s">apple</field>
|
||||||
|
<field name="cat">electronics</field>
|
||||||
|
<field name="cat">music</field>
|
||||||
|
<field name="features">iTunes, Podcasts, Audiobooks</field>
|
||||||
|
<field name="features">Stores up to 15,000 songs, 25,000 photos, or 150 hours of video</field>
|
||||||
|
<field name="features">2.5-inch, 320x240 color TFT LCD display with LED backlight</field>
|
||||||
|
<field name="features">Up to 20 hours of battery life</field>
|
||||||
|
<field name="features">Plays AAC, MP3, WAV, AIFF, Audible, Apple Lossless, H.264 video</field>
|
||||||
|
<field name="features">Notes, Calendar, Phone book, Hold button, Date display, Photo wallet, Built-in games, JPEG photo playback, Upgradeable firmware, USB 2.0 compatibility, Playback speed control, Rechargeable capability, Battery level indication</field>
|
||||||
|
<field name="includes">earbud headphones, USB cable</field>
|
||||||
|
<field name="weight">5.5</field>
|
||||||
|
<field name="price">399.00</field>
|
||||||
|
<field name="popularity">10</field>
|
||||||
|
<field name="inStock">true</field>
|
||||||
|
<!-- Dodge City store -->
|
||||||
|
<field name="store">37.7752,-100.0232</field>
|
||||||
|
<field name="manufacturedate_dt">2005-10-12T08:00:00Z</field>
|
||||||
|
</doc></add>
|
33
spec/test_folder/monitor2.xml
vendored
Normal file
33
spec/test_folder/monitor2.xml
vendored
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<add><doc>
|
||||||
|
<field name="id">VA902B</field>
|
||||||
|
<field name="name">ViewSonic VA902B - flat panel display - TFT - 19"</field>
|
||||||
|
<field name="manu">ViewSonic Corp.</field>
|
||||||
|
<!-- Join -->
|
||||||
|
<field name="manu_id_s">viewsonic</field>
|
||||||
|
<field name="cat">electronics and stuff2</field>
|
||||||
|
<field name="features">19" TFT active matrix LCD, 8ms response time, 1280 x 1024 native resolution</field>
|
||||||
|
<field name="weight">190.4</field>
|
||||||
|
<field name="price">279.95</field>
|
||||||
|
<field name="popularity">6</field>
|
||||||
|
<field name="inStock">true</field>
|
||||||
|
<!-- Buffalo store -->
|
||||||
|
<field name="store">45.18814,-93.88541</field>
|
||||||
|
</doc></add>
|
||||||
|
|
3
spec/test_folder/more_books.jsonl
vendored
Normal file
3
spec/test_folder/more_books.jsonl
vendored
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
{"id":"0060248025","name":"Falling Up","inStock": true,"author": "Shel Silverstein"}
|
||||||
|
{"id":"0679805273","name":"Oh, The Places You'll Go","inStock": true,"author": "Dr. Seuss"}
|
||||||
|
|
43
spec/test_folder/mp500.xml
vendored
Normal file
43
spec/test_folder/mp500.xml
vendored
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<add><doc>
|
||||||
|
<field name="id">0579B002</field>
|
||||||
|
<field name="name">Canon PIXMA MP500 All-In-One Photo Printer</field>
|
||||||
|
<field name="manu">Canon Inc.</field>
|
||||||
|
<!-- Join -->
|
||||||
|
<field name="manu_id_s">canon</field>
|
||||||
|
<field name="cat">electronics</field>
|
||||||
|
<field name="cat">multifunction printer</field>
|
||||||
|
<field name="cat">printer</field>
|
||||||
|
<field name="cat">scanner</field>
|
||||||
|
<field name="cat">copier</field>
|
||||||
|
<field name="features">Multifunction ink-jet color photo printer</field>
|
||||||
|
<field name="features">Flatbed scanner, optical scan resolution of 1,200 x 2,400 dpi</field>
|
||||||
|
<field name="features">2.5" color LCD preview screen</field>
|
||||||
|
<field name="features">Duplex Copying</field>
|
||||||
|
<field name="features">Printing speed up to 29ppm black, 19ppm color</field>
|
||||||
|
<field name="features">Hi-Speed USB</field>
|
||||||
|
<field name="features">memory card: CompactFlash, Micro Drive, SmartMedia, Memory Stick, Memory Stick Pro, SD Card, and MultiMediaCard</field>
|
||||||
|
<field name="weight">352.0</field>
|
||||||
|
<field name="price">179.99</field>
|
||||||
|
<field name="popularity">6</field>
|
||||||
|
<field name="inStock">true</field>
|
||||||
|
<!-- Buffalo store -->
|
||||||
|
<field name="store">45.19214,-93.89941</field>
|
||||||
|
</doc></add>
|
||||||
|
|
BIN
spec/test_folder/post.jar
vendored
Normal file
BIN
spec/test_folder/post.jar
vendored
Normal file
Binary file not shown.
13
spec/test_folder/sample.html
vendored
Normal file
13
spec/test_folder/sample.html
vendored
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>Welcome to Solr</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<p>
|
||||||
|
Here is some text
|
||||||
|
</p>
|
||||||
|
<p>distinct<br/>words</p>
|
||||||
|
<div>Here is some text in a div</div>
|
||||||
|
<div>This has a <a href="http://www.apache.org">link</a>.</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
BIN
spec/test_folder/sample_1.jpg
vendored
Normal file
BIN
spec/test_folder/sample_1.jpg
vendored
Normal file
Binary file not shown.
After Width: | Height: | Size: 32 KiB |
BIN
spec/test_folder/sample_2.jpeg
vendored
Normal file
BIN
spec/test_folder/sample_2.jpeg
vendored
Normal file
Binary file not shown.
After Width: | Height: | Size: 55 KiB |
BIN
spec/test_folder/sample_3.jpg
vendored
Normal file
BIN
spec/test_folder/sample_3.jpg
vendored
Normal file
Binary file not shown.
After Width: | Height: | Size: 3.9 MiB |
BIN
spec/test_folder/sample_4.jpg
vendored
Normal file
BIN
spec/test_folder/sample_4.jpg
vendored
Normal file
Binary file not shown.
After Width: | Height: | Size: 348 KiB |
BIN
spec/test_folder/solr-word.pdf
vendored
Normal file
BIN
spec/test_folder/solr-word.pdf
vendored
Normal file
Binary file not shown.
38
spec/test_folder/solr.xml
vendored
Normal file
38
spec/test_folder/solr.xml
vendored
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<add>
|
||||||
|
<doc>
|
||||||
|
<field name="id">SOLR1000</field>
|
||||||
|
<field name="name">Solr, the Enterprise Search Server</field>
|
||||||
|
<field name="manu">Apache Software Foundation</field>
|
||||||
|
<field name="cat">software</field>
|
||||||
|
<field name="cat">search</field>
|
||||||
|
<field name="features">Advanced Full-Text Search Capabilities using Lucene</field>
|
||||||
|
<field name="features">Optimized for High Volume Web Traffic</field>
|
||||||
|
<field name="features">Standards Based Open Interfaces - XML and HTTP</field>
|
||||||
|
<field name="features">Comprehensive HTML Administration Interfaces</field>
|
||||||
|
<field name="features">Scalability - Efficient Replication to other Solr Search Servers</field>
|
||||||
|
<field name="features">Flexible and Adaptable with XML configuration and Schema</field>
|
||||||
|
<field name="features">Good unicode support: héllo (hello with an accent over the e)</field>
|
||||||
|
<field name="price">0.0</field>
|
||||||
|
<field name="popularity">10</field>
|
||||||
|
<field name="inStock">true</field>
|
||||||
|
<field name="incubationdate_dt">2006-01-17T00:00:00.000Z</field>
|
||||||
|
</doc>
|
||||||
|
</add>
|
||||||
|
|
75
spec/test_folder/sub1/manufacturers.xml
Normal file
75
spec/test_folder/sub1/manufacturers.xml
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<add>
|
||||||
|
<doc>
|
||||||
|
<field name="id">adata</field>
|
||||||
|
<field name="compName_s">A-Data Technology</field>
|
||||||
|
<field name="address_s">46221 Landing Parkway Fremont, CA 94538</field>
|
||||||
|
</doc>
|
||||||
|
<doc>
|
||||||
|
<field name="id">apple</field>
|
||||||
|
<field name="compName_s">Apple</field>
|
||||||
|
<field name="address_s">1 Infinite Way, Cupertino CA</field>
|
||||||
|
</doc>
|
||||||
|
<doc>
|
||||||
|
<field name="id">asus</field>
|
||||||
|
<field name="compName_s">ASUS Computer</field>
|
||||||
|
<field name="address_s">800 Corporate Way Fremont, CA 94539</field>
|
||||||
|
</doc>
|
||||||
|
<doc>
|
||||||
|
<field name="id">ati</field>
|
||||||
|
<field name="compName_s">ATI Technologies</field>
|
||||||
|
<field name="address_s">33 Commerce Valley Drive East Thornhill, ON L3T 7N6 Canada</field>
|
||||||
|
</doc>
|
||||||
|
<doc>
|
||||||
|
<field name="id">belkin</field>
|
||||||
|
<field name="compName_s">Belkin</field>
|
||||||
|
<field name="address_s">12045 E. Waterfront Drive Playa Vista, CA 90094</field>
|
||||||
|
</doc>
|
||||||
|
<doc>
|
||||||
|
<field name="id">canon</field>
|
||||||
|
<field name="compName_s">Canon, Inc.</field>
|
||||||
|
<field name="address_s">One Canon Plaza Lake Success, NY 11042</field>
|
||||||
|
</doc>
|
||||||
|
<doc>
|
||||||
|
<field name="id">corsair</field>
|
||||||
|
<field name="compName_s">Corsair Microsystems</field>
|
||||||
|
<field name="address_s">46221 Landing Parkway Fremont, CA 94538</field>
|
||||||
|
</doc>
|
||||||
|
<doc>
|
||||||
|
<field name="id">dell</field>
|
||||||
|
<field name="compName_s">Dell, Inc.</field>
|
||||||
|
<field name="address_s">One Dell Way Round Rock, Texas 78682</field>
|
||||||
|
</doc>
|
||||||
|
<doc>
|
||||||
|
<field name="id">maxtor</field>
|
||||||
|
<field name="compName_s">Maxtor Corporation</field>
|
||||||
|
<field name="address_s">920 Disc Drive Scotts Valley, CA 95066</field>
|
||||||
|
</doc>
|
||||||
|
<doc>
|
||||||
|
<field name="id">samsung</field>
|
||||||
|
<field name="compName_s">Samsung Electronics Co. Ltd.</field>
|
||||||
|
<field name="address_s">105 Challenger Rd. Ridgefield Park, NJ 07660-0511</field>
|
||||||
|
</doc>
|
||||||
|
<doc>
|
||||||
|
<field name="id">viewsonic</field>
|
||||||
|
<field name="compName_s">ViewSonic Corp</field>
|
||||||
|
<field name="address_s">381 Brea Canyon Road Walnut, CA 91789-0708</field>
|
||||||
|
</doc>
|
||||||
|
</add>
|
||||||
|
|
77
spec/test_folder/sub1/mem.xml
Normal file
77
spec/test_folder/sub1/mem.xml
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<add>
|
||||||
|
<doc>
|
||||||
|
<field name="id">TWINX2048-3200PRO</field>
|
||||||
|
<field name="name">CORSAIR XMS 2GB (2 x 1GB) 184-Pin DDR SDRAM Unbuffered DDR 400 (PC 3200) Dual Channel Kit System Memory - Retail</field>
|
||||||
|
<field name="manu">Corsair Microsystems Inc.</field>
|
||||||
|
<!-- Join -->
|
||||||
|
<field name="manu_id_s">corsair</field>
|
||||||
|
<field name="cat">electronics</field>
|
||||||
|
<field name="cat">memory</field>
|
||||||
|
<field name="features">CAS latency 2, 2-3-3-6 timing, 2.75v, unbuffered, heat-spreader</field>
|
||||||
|
<field name="price">185.00</field>
|
||||||
|
<field name="popularity">5</field>
|
||||||
|
<field name="inStock">true</field>
|
||||||
|
<!-- San Francisco store -->
|
||||||
|
<field name="store">37.7752,-122.4232</field>
|
||||||
|
<field name="manufacturedate_dt">2006-02-13T15:26:37Z</field>
|
||||||
|
|
||||||
|
<!-- a field for testing payload tagged text via DelimitedPayloadTokenFilter -->
|
||||||
|
<field name="payloads">electronics|6.0 memory|3.0</field>
|
||||||
|
</doc>
|
||||||
|
|
||||||
|
<doc>
|
||||||
|
<field name="id">VS1GB400C3</field>
|
||||||
|
<field name="name">CORSAIR ValueSelect 1GB 184-Pin DDR SDRAM Unbuffered DDR 400 (PC 3200) System Memory - Retail</field>
|
||||||
|
<field name="manu">Corsair Microsystems Inc.</field>
|
||||||
|
<!-- Join -->
|
||||||
|
<field name="manu_id_s">corsair</field>
|
||||||
|
<field name="cat">electronics</field>
|
||||||
|
<field name="cat">memory</field>
|
||||||
|
<field name="price">74.99</field>
|
||||||
|
<field name="popularity">7</field>
|
||||||
|
<field name="inStock">true</field>
|
||||||
|
<!-- Dodge City store -->
|
||||||
|
<field name="store">37.7752,-100.0232</field>
|
||||||
|
<field name="manufacturedate_dt">2006-02-13T15:26:37Z</field>
|
||||||
|
|
||||||
|
<field name="payloads">electronics|4.0 memory|2.0</field>
|
||||||
|
</doc>
|
||||||
|
|
||||||
|
<doc>
|
||||||
|
<field name="id">VDBDB1A16</field>
|
||||||
|
<field name="name">A-DATA V-Series 1GB 184-Pin DDR SDRAM Unbuffered DDR 400 (PC 3200) System Memory - OEM</field>
|
||||||
|
<field name="manu">A-DATA Technology Inc.</field>
|
||||||
|
<!-- Join -->
|
||||||
|
<field name="manu_id_s">corsair</field>
|
||||||
|
<field name="cat">electronics</field>
|
||||||
|
<field name="cat">memory</field>
|
||||||
|
<field name="features">CAS latency 3, 2.7v</field>
|
||||||
|
<!-- note: price & popularity is missing on this one -->
|
||||||
|
<field name="popularity">0</field>
|
||||||
|
<field name="inStock">true</field>
|
||||||
|
<!-- Buffalo store -->
|
||||||
|
<field name="store">45.18414,-93.88141</field>
|
||||||
|
<field name="manufacturedate_dt">2006-02-13T15:26:37Z</field>
|
||||||
|
|
||||||
|
<field name="payloads">electronics|0.9 memory|0.1</field>
|
||||||
|
</doc>
|
||||||
|
|
||||||
|
</add>
|
||||||
|
|
65
spec/test_folder/sub1/money.xml
Normal file
65
spec/test_folder/sub1/money.xml
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!-- Example documents utilizing the CurrencyField type -->
|
||||||
|
<add>
|
||||||
|
<doc>
|
||||||
|
<field name="id">USD</field>
|
||||||
|
<field name="name">One Dollar</field>
|
||||||
|
<field name="manu">Bank of America</field>
|
||||||
|
<field name="manu_id_s">boa</field>
|
||||||
|
<field name="cat">currency</field>
|
||||||
|
<field name="features">Coins and notes</field>
|
||||||
|
<field name="price_c">1,USD</field>
|
||||||
|
<field name="inStock">true</field>
|
||||||
|
</doc>
|
||||||
|
|
||||||
|
<doc>
|
||||||
|
<field name="id">EUR</field>
|
||||||
|
<field name="name">One Euro</field>
|
||||||
|
<field name="manu">European Union</field>
|
||||||
|
<field name="manu_id_s">eu</field>
|
||||||
|
<field name="cat">currency</field>
|
||||||
|
<field name="features">Coins and notes</field>
|
||||||
|
<field name="price_c">1,EUR</field>
|
||||||
|
<field name="inStock">true</field>
|
||||||
|
</doc>
|
||||||
|
|
||||||
|
<doc>
|
||||||
|
<field name="id">GBP</field>
|
||||||
|
<field name="name">One British Pound</field>
|
||||||
|
<field name="manu">U.K.</field>
|
||||||
|
<field name="manu_id_s">uk</field>
|
||||||
|
<field name="cat">currency</field>
|
||||||
|
<field name="features">Coins and notes</field>
|
||||||
|
<field name="price_c">1,GBP</field>
|
||||||
|
<field name="inStock">true</field>
|
||||||
|
</doc>
|
||||||
|
|
||||||
|
<doc>
|
||||||
|
<field name="id">NOK</field>
|
||||||
|
<field name="name">One Krone</field>
|
||||||
|
<field name="manu">Bank of Norway</field>
|
||||||
|
<field name="manu_id_s">nor</field>
|
||||||
|
<field name="cat">currency</field>
|
||||||
|
<field name="features">Coins and notes</field>
|
||||||
|
<field name="price_c">1,NOK</field>
|
||||||
|
<field name="inStock">true</field>
|
||||||
|
</doc>
|
||||||
|
|
||||||
|
</add>
|
||||||
|
|
34
spec/test_folder/sub2/monitor.xml
Normal file
34
spec/test_folder/sub2/monitor.xml
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<add><doc>
|
||||||
|
<field name="id">3007WFP</field>
|
||||||
|
<field name="name">Dell Widescreen UltraSharp 3007WFP</field>
|
||||||
|
<field name="manu">Dell, Inc.</field>
|
||||||
|
<!-- Join -->
|
||||||
|
<field name="manu_id_s">dell</field>
|
||||||
|
<field name="cat">electronics and computer1</field>
|
||||||
|
<field name="features">30" TFT active matrix LCD, 2560 x 1600, .25mm dot pitch, 700:1 contrast</field>
|
||||||
|
<field name="includes">USB cable</field>
|
||||||
|
<field name="weight">401.6</field>
|
||||||
|
<field name="price">2199.0</field>
|
||||||
|
<field name="popularity">6</field>
|
||||||
|
<field name="inStock">true</field>
|
||||||
|
<!-- Buffalo store -->
|
||||||
|
<field name="store">43.17614,-90.57341</field>
|
||||||
|
</doc></add>
|
||||||
|
|
38
spec/test_folder/sub2/sd500.xml
Normal file
38
spec/test_folder/sub2/sd500.xml
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<add><doc>
|
||||||
|
<field name="id">9885A004</field>
|
||||||
|
<field name="name">Canon PowerShot SD500</field>
|
||||||
|
<field name="manu">Canon Inc.</field>
|
||||||
|
<!-- Join -->
|
||||||
|
<field name="manu_id_s">canon</field>
|
||||||
|
<field name="cat">electronics</field>
|
||||||
|
<field name="cat">camera</field>
|
||||||
|
<field name="features">3x zoop, 7.1 megapixel Digital ELPH</field>
|
||||||
|
<field name="features">movie clips up to 640x480 @30 fps</field>
|
||||||
|
<field name="features">2.0" TFT LCD, 118,000 pixels</field>
|
||||||
|
<field name="features">built in flash, red-eye reduction</field>
|
||||||
|
<field name="includes">32MB SD card, USB cable, AV cable, battery</field>
|
||||||
|
<field name="weight">6.4</field>
|
||||||
|
<field name="price">329.95</field>
|
||||||
|
<field name="popularity">7</field>
|
||||||
|
<field name="inStock">true</field>
|
||||||
|
<field name="manufacturedate_dt">2006-02-13T15:26:37Z</field>
|
||||||
|
<!-- Buffalo store -->
|
||||||
|
<field name="store">45.19614,-93.90341</field>
|
||||||
|
</doc></add>
|
3
spec/test_folder/sub2/sub_sub1/more_books.jsonl
Normal file
3
spec/test_folder/sub2/sub_sub1/more_books.jsonl
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
{"id":"0060248025","name":"Falling Up","inStock": true,"author": "Shel Silverstein"}
|
||||||
|
{"id":"0679805273","name":"Oh, The Places You'll Go","inStock": true,"author": "Dr. Seuss"}
|
||||||
|
|
43
spec/test_folder/sub2/sub_sub1/mp500.xml
Normal file
43
spec/test_folder/sub2/sub_sub1/mp500.xml
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<add><doc>
|
||||||
|
<field name="id">0579B002</field>
|
||||||
|
<field name="name">Canon PIXMA MP500 All-In-One Photo Printer</field>
|
||||||
|
<field name="manu">Canon Inc.</field>
|
||||||
|
<!-- Join -->
|
||||||
|
<field name="manu_id_s">canon</field>
|
||||||
|
<field name="cat">electronics</field>
|
||||||
|
<field name="cat">multifunction printer</field>
|
||||||
|
<field name="cat">printer</field>
|
||||||
|
<field name="cat">scanner</field>
|
||||||
|
<field name="cat">copier</field>
|
||||||
|
<field name="features">Multifunction ink-jet color photo printer</field>
|
||||||
|
<field name="features">Flatbed scanner, optical scan resolution of 1,200 x 2,400 dpi</field>
|
||||||
|
<field name="features">2.5" color LCD preview screen</field>
|
||||||
|
<field name="features">Duplex Copying</field>
|
||||||
|
<field name="features">Printing speed up to 29ppm black, 19ppm color</field>
|
||||||
|
<field name="features">Hi-Speed USB</field>
|
||||||
|
<field name="features">memory card: CompactFlash, Micro Drive, SmartMedia, Memory Stick, Memory Stick Pro, SD Card, and MultiMediaCard</field>
|
||||||
|
<field name="weight">352.0</field>
|
||||||
|
<field name="price">179.99</field>
|
||||||
|
<field name="popularity">6</field>
|
||||||
|
<field name="inStock">true</field>
|
||||||
|
<!-- Buffalo store -->
|
||||||
|
<field name="store">45.19214,-93.89941</field>
|
||||||
|
</doc></add>
|
||||||
|
|
BIN
spec/test_folder/sub2/sub_sub1/post.jar
Normal file
BIN
spec/test_folder/sub2/sub_sub1/post.jar
Normal file
Binary file not shown.
93
spec/test_folder/test_utf8.sh
vendored
Executable file
93
spec/test_folder/test_utf8.sh
vendored
Executable file
@ -0,0 +1,93 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# this work for additional information regarding copyright ownership.
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
#Test script to tell if the server is accepting UTF-8
|
||||||
|
#The python writer currently escapes non-ascii chars, so it's good for testing
|
||||||
|
|
||||||
|
SOLR_URL=http://localhost:8983/solr
|
||||||
|
|
||||||
|
if [ ! -z $1 ]; then
|
||||||
|
SOLR_URL=$1
|
||||||
|
fi
|
||||||
|
|
||||||
|
curl "$SOLR_URL/select?q=hello¶ms=explicit&wt=python" 2> /dev/null | grep 'hello' > /dev/null 2>&1
|
||||||
|
if [ $? = 0 ]; then
|
||||||
|
echo "Solr server is up."
|
||||||
|
else
|
||||||
|
echo "ERROR: Could not curl to Solr - is curl installed? Is Solr not running?"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
curl "$SOLR_URL/select?q=h%C3%A9llo&echoParams=explicit&wt=python" 2> /dev/null | grep 'h\\u00e9llo' > /dev/null 2>&1
|
||||||
|
if [ $? = 0 ]; then
|
||||||
|
echo "HTTP GET is accepting UTF-8"
|
||||||
|
else
|
||||||
|
echo "ERROR: HTTP GET is not accepting UTF-8"
|
||||||
|
fi
|
||||||
|
|
||||||
|
curl $SOLR_URL/select --data-binary 'q=h%C3%A9llo&echoParams=explicit&wt=python' -H 'Content-type:application/x-www-form-urlencoded; charset=UTF-8' 2> /dev/null | grep 'h\\u00e9llo' > /dev/null 2>&1
|
||||||
|
if [ $? = 0 ]; then
|
||||||
|
echo "HTTP POST is accepting UTF-8"
|
||||||
|
else
|
||||||
|
echo "ERROR: HTTP POST is not accepting UTF-8"
|
||||||
|
fi
|
||||||
|
|
||||||
|
curl $SOLR_URL/select --data-binary 'q=h%C3%A9llo&echoParams=explicit&wt=python' 2> /dev/null | grep 'h\\u00e9llo' > /dev/null 2>&1
|
||||||
|
if [ $? = 0 ]; then
|
||||||
|
echo "HTTP POST defaults to UTF-8"
|
||||||
|
else
|
||||||
|
echo "HTTP POST does not default to UTF-8"
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
#A unicode character outside of the BMP (a circle with an x inside)
|
||||||
|
CHAR="𐌈"
|
||||||
|
CODEPOINT='0x10308'
|
||||||
|
#URL encoded UTF8 of the codepoint
|
||||||
|
UTF8_Q='%F0%90%8C%88'
|
||||||
|
#expected return of the python writer (currently uses UTF-16 surrogates)
|
||||||
|
EXPECTED='\\ud800\\udf08'
|
||||||
|
|
||||||
|
curl "$SOLR_URL/select?q=$UTF8_Q&echoParams=explicit&wt=python" 2> /dev/null | grep $EXPECTED > /dev/null 2>&1
|
||||||
|
if [ $? = 0 ]; then
|
||||||
|
echo "HTTP GET is accepting UTF-8 beyond the basic multilingual plane"
|
||||||
|
else
|
||||||
|
echo "ERROR: HTTP GET is not accepting UTF-8 beyond the basic multilingual plane"
|
||||||
|
fi
|
||||||
|
|
||||||
|
curl $SOLR_URL/select --data-binary "q=$UTF8_Q&echoParams=explicit&wt=python" -H 'Content-type:application/x-www-form-urlencoded; charset=UTF-8' 2> /dev/null | grep $EXPECTED > /dev/null 2>&1
|
||||||
|
if [ $? = 0 ]; then
|
||||||
|
echo "HTTP POST is accepting UTF-8 beyond the basic multilingual plane"
|
||||||
|
else
|
||||||
|
echo "ERROR: HTTP POST is not accepting UTF-8 beyond the basic multilingual plane"
|
||||||
|
fi
|
||||||
|
|
||||||
|
curl "$SOLR_URL/select?q=$UTF8_Q&echoParams=explicit&wt=python" --data-binary '' 2> /dev/null | grep $EXPECTED > /dev/null 2>&1
|
||||||
|
if [ $? = 0 ]; then
|
||||||
|
echo "HTTP POST + URL params is accepting UTF-8 beyond the basic multilingual plane"
|
||||||
|
else
|
||||||
|
echo "ERROR: HTTP POST + URL params is not accepting UTF-8 beyond the basic multilingual plane"
|
||||||
|
fi
|
||||||
|
|
||||||
|
#curl "$SOLR_URL/select?q=$UTF8_Q&echoParams=explicit" 2> /dev/null | od -tx1 -w1000 | sed 's/ //g' | grep 'f4808198' > /dev/null 2>&1
|
||||||
|
curl "$SOLR_URL/select?q=$UTF8_Q&echoParams=explicit" 2> /dev/null | grep "$CHAR" > /dev/null 2>&1
|
||||||
|
if [ $? = 0 ]; then
|
||||||
|
echo "Response correctly returns UTF-8 beyond the basic multilingual plane"
|
||||||
|
else
|
||||||
|
echo "ERROR: Response can't return UTF-8 beyond the basic multilingual plane"
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
42
spec/test_folder/utf8-example.xml
vendored
Normal file
42
spec/test_folder/utf8-example.xml
vendored
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!--
|
||||||
|
After posting this to Solr with bin/post, searching for "êâîôû" from
|
||||||
|
the solr/admin/ search page must return this document.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<add>
|
||||||
|
<doc>
|
||||||
|
<field name="id">UTF8TEST</field>
|
||||||
|
<field name="name">Test with some UTF-8 encoded characters</field>
|
||||||
|
<field name="manu">Apache Software Foundation</field>
|
||||||
|
<field name="cat">software</field>
|
||||||
|
<field name="cat">search</field>
|
||||||
|
<field name="features">No accents here</field>
|
||||||
|
<field name="features">This is an e acute: é</field>
|
||||||
|
<field name="features">eaiou with circumflexes: êâîôû</field>
|
||||||
|
<field name="features">eaiou with umlauts: ëäïöü</field>
|
||||||
|
<field name="features">tag with escaped chars: <nicetag/></field>
|
||||||
|
<field name="features">escaped ampersand: Bonnie & Clyde</field>
|
||||||
|
<field name="features">Outside the BMP:𐌈 codepoint=10308, a circle with an x inside. UTF8=f0908c88 UTF16=d800 df08</field>
|
||||||
|
<field name="price">0.0</field>
|
||||||
|
<field name="inStock">true</field>
|
||||||
|
</doc>
|
||||||
|
</add>
|
||||||
|
|
62
spec/test_folder/vidcard.xml
vendored
Normal file
62
spec/test_folder/vidcard.xml
vendored
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<add>
|
||||||
|
<doc>
|
||||||
|
<field name="id">EN7800GTX/2DHTV/256M</field>
|
||||||
|
<field name="name">ASUS Extreme N7800GTX/2DHTV (256 MB)</field>
|
||||||
|
<!-- Denormalized -->
|
||||||
|
<field name="manu">ASUS Computer Inc.</field>
|
||||||
|
<!-- Join -->
|
||||||
|
<field name="manu_id_s">asus</field>
|
||||||
|
<field name="cat">electronics</field>
|
||||||
|
<field name="cat">graphics card</field>
|
||||||
|
<field name="features">NVIDIA GeForce 7800 GTX GPU/VPU clocked at 486MHz</field>
|
||||||
|
<field name="features">256MB GDDR3 Memory clocked at 1.35GHz</field>
|
||||||
|
<field name="features">PCI Express x16</field>
|
||||||
|
<field name="features">Dual DVI connectors, HDTV out, video input</field>
|
||||||
|
<field name="features">OpenGL 2.0, DirectX 9.0</field>
|
||||||
|
<field name="weight">16.0</field>
|
||||||
|
<field name="price">479.95</field>
|
||||||
|
<field name="popularity">7</field>
|
||||||
|
<field name="store">40.7143,-74.006</field>
|
||||||
|
<field name="inStock">false</field>
|
||||||
|
<field name="manufacturedate_dt">2006-02-13T15:26:37Z/DAY</field>
|
||||||
|
</doc>
|
||||||
|
<!-- yes, you can add more than one document at a time -->
|
||||||
|
<doc>
|
||||||
|
<field name="id">100-435805</field>
|
||||||
|
<field name="name">ATI Radeon X1900 XTX 512 MB PCIE Video Card</field>
|
||||||
|
<field name="manu">ATI Technologies</field>
|
||||||
|
<!-- Join -->
|
||||||
|
<field name="manu_id_s">ati</field>
|
||||||
|
<field name="cat">electronics</field>
|
||||||
|
<field name="cat">graphics card</field>
|
||||||
|
<field name="features">ATI RADEON X1900 GPU/VPU clocked at 650MHz</field>
|
||||||
|
<field name="features">512MB GDDR3 SDRAM clocked at 1.55GHz</field>
|
||||||
|
<field name="features">PCI Express x16</field>
|
||||||
|
<field name="features">dual DVI, HDTV, svideo, composite out</field>
|
||||||
|
<field name="features">OpenGL 2.0, DirectX 9.0</field>
|
||||||
|
<field name="weight">48.0</field>
|
||||||
|
<field name="price">649.99</field>
|
||||||
|
<field name="popularity">7</field>
|
||||||
|
<field name="inStock">false</field>
|
||||||
|
<field name="manufacturedate_dt">2006-02-13T15:26:37Z/DAY</field>
|
||||||
|
<!-- NYC store -->
|
||||||
|
<field name="store">40.7143,-74.006</field>
|
||||||
|
</doc>
|
||||||
|
</add>
|
107
storage.py
Normal file
107
storage.py
Normal file
@ -0,0 +1,107 @@
|
|||||||
|
import sqlite3
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
class DuplicateDirectoryException(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class Directory:
|
||||||
|
"""
|
||||||
|
Data structure to hold directory information
|
||||||
|
"""
|
||||||
|
def __init__(self, path: str, enabled: bool, options: list):
|
||||||
|
self.path = path
|
||||||
|
self.enabled = enabled
|
||||||
|
self.options = options
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self.path + " | enabled: " + str(self.enabled) + " | opts: " + str(self.options)
|
||||||
|
|
||||||
|
|
||||||
|
class LocalStorage:
|
||||||
|
"""
|
||||||
|
Manages storage of application data to disk.
|
||||||
|
Could be refactored into a abstract class to switch from SQLite3 to something else
|
||||||
|
"""
|
||||||
|
|
||||||
|
cache_outdated = True
|
||||||
|
"""Static variable that indicates that the database was changed since the last time it was cached in memory"""
|
||||||
|
|
||||||
|
db_path = "../local_storage.db"
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.cached_dirs = {}
|
||||||
|
pass
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def init_db(script_path):
|
||||||
|
"""Creates a blank database. Overwrites the old one"""
|
||||||
|
if os.path.isfile(LocalStorage.db_path):
|
||||||
|
os.remove(LocalStorage.db_path)
|
||||||
|
|
||||||
|
conn = sqlite3.connect(LocalStorage.db_path)
|
||||||
|
c = conn.cursor()
|
||||||
|
with open(script_path, "r") as f:
|
||||||
|
c.executescript(f.read())
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
c.close()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
def save_directory(self, directory: Directory):
|
||||||
|
"""
|
||||||
|
Save directory to storage
|
||||||
|
:param directory: Directory to save
|
||||||
|
:return: None
|
||||||
|
"""
|
||||||
|
|
||||||
|
LocalStorage.cache_outdated = True
|
||||||
|
|
||||||
|
conn = sqlite3.connect(LocalStorage.db_path)
|
||||||
|
c = conn.cursor()
|
||||||
|
c.execute("PRAGMA FOREIGN_KEYS = ON;")
|
||||||
|
try:
|
||||||
|
c.execute("INSERT INTO Directory (path, enabled) VALUES (?, ?)", (directory.path, directory.enabled))
|
||||||
|
c.execute("SELECT last_insert_rowid()")
|
||||||
|
|
||||||
|
dir_id = c.fetchone()[0]
|
||||||
|
|
||||||
|
for opt in directory.options:
|
||||||
|
conn.execute("INSERT INTO Option (name, directory_id) VALUES (?, ?)", (opt, dir_id))
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
except sqlite3.IntegrityError:
|
||||||
|
raise DuplicateDirectoryException("Duplicate directory path: " + directory.path)
|
||||||
|
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
def dirs(self):
|
||||||
|
|
||||||
|
if LocalStorage.cache_outdated:
|
||||||
|
|
||||||
|
self.cached_dirs = {}
|
||||||
|
|
||||||
|
conn = sqlite3.connect(LocalStorage.db_path)
|
||||||
|
c = conn.cursor()
|
||||||
|
c.execute("SELECT id, path, enabled FROM Directory")
|
||||||
|
db_directories = c.fetchall()
|
||||||
|
c.execute("SELECT name, directory_id FROM Option")
|
||||||
|
db_options = c.fetchall()
|
||||||
|
|
||||||
|
for db_dir in db_directories:
|
||||||
|
|
||||||
|
options = []
|
||||||
|
directory = Directory(db_dir[1], db_dir[2], options)
|
||||||
|
|
||||||
|
for db_opt in db_options:
|
||||||
|
if db_opt[1] == db_dir[0]:
|
||||||
|
options.append(db_opt[0])
|
||||||
|
|
||||||
|
self.cached_dirs[directory.path] = directory
|
||||||
|
LocalStorage.cache_outdated = False
|
||||||
|
return self.cached_dirs
|
||||||
|
|
||||||
|
else:
|
||||||
|
return self.cached_dirs
|
10
templates/layout.html
Normal file
10
templates/layout.html
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<title>Layout Title</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
|
||||||
|
</body>
|
||||||
|
</html>
|
129
webserver.py
129
webserver.py
@ -1,129 +0,0 @@
|
|||||||
from flask import Flask, render_template, send_file, request
|
|
||||||
import pysolr
|
|
||||||
import mimetypes
|
|
||||||
import requests
|
|
||||||
import json
|
|
||||||
from PIL import Image
|
|
||||||
import os
|
|
||||||
|
|
||||||
SOLR_URL = "http://localhost:8983/solr/test/"
|
|
||||||
|
|
||||||
solr = pysolr.Solr(SOLR_URL, timeout=10)
|
|
||||||
|
|
||||||
app = Flask(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
class Document:
|
|
||||||
def __init__(self, doc_id, name, path, size, md5):
|
|
||||||
self.doc_id = doc_id
|
|
||||||
self.name = name
|
|
||||||
self.path = path
|
|
||||||
self.size = size
|
|
||||||
self.md5 = md5
|
|
||||||
|
|
||||||
|
|
||||||
class ImageDocument(Document):
|
|
||||||
def __init__(self, doc_id, name, path, size, md5):
|
|
||||||
super().__init__(doc_id, name, path, size, md5)
|
|
||||||
self.type = "image"
|
|
||||||
|
|
||||||
|
|
||||||
class AudioClipDocument(Document):
|
|
||||||
def __init__(self, doc_id, name, path, size, md5):
|
|
||||||
super().__init__(doc_id, name, path, size, md5)
|
|
||||||
self.type = "audio"
|
|
||||||
|
|
||||||
|
|
||||||
def get_document(id):
|
|
||||||
|
|
||||||
response = requests.get(SOLR_URL + "get?id=" + id)
|
|
||||||
|
|
||||||
return json.loads(response.text)["doc"]
|
|
||||||
|
|
||||||
|
|
||||||
def make_thumb(doc):
|
|
||||||
size = (1024, 1024)
|
|
||||||
|
|
||||||
thumb_path = "thumbnails/" + doc["id"]
|
|
||||||
|
|
||||||
if not os.path.exists(thumb_path):
|
|
||||||
|
|
||||||
file_path = doc["path"][0] + "/" + doc["name"][0]
|
|
||||||
|
|
||||||
if doc["width"][0] > size[0]:
|
|
||||||
|
|
||||||
image = Image.open(file_path)
|
|
||||||
image.thumbnail(size, Image.ANTIALIAS)
|
|
||||||
|
|
||||||
if image.mode == "RGB":
|
|
||||||
image.save(thumb_path, "JPEG")
|
|
||||||
elif image.mode == "RGBA":
|
|
||||||
image.save(thumb_path, "PNG")
|
|
||||||
else:
|
|
||||||
image = image.convert("RGB")
|
|
||||||
image.save(thumb_path, "JPEG")
|
|
||||||
else:
|
|
||||||
print("Skipping thumbnail")
|
|
||||||
os.symlink(file_path, thumb_path)
|
|
||||||
|
|
||||||
return "thumbnails/" + doc["id"]
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/search/")
|
|
||||||
def search():
|
|
||||||
|
|
||||||
query = request.args.get("query")
|
|
||||||
page = int(request.args.get("page"))
|
|
||||||
per_page = int(request.args.get("per_page"))
|
|
||||||
|
|
||||||
results = solr.search(query, None, rows=per_page, start=per_page * page)
|
|
||||||
|
|
||||||
docs = []
|
|
||||||
for r in results:
|
|
||||||
|
|
||||||
if "mime" in r:
|
|
||||||
mime_type = r["mime"][0]
|
|
||||||
else:
|
|
||||||
mime_type = ""
|
|
||||||
|
|
||||||
if mime_type.startswith("image"):
|
|
||||||
docs.append(ImageDocument(r["id"], r["name"][0], r["path"][0], r["size"], r["md5"]))
|
|
||||||
|
|
||||||
elif mime_type.startswith("audio"):
|
|
||||||
docs.append(AudioClipDocument(r["id"], r["name"][0], r["path"][0], r["size"], r["md5"]))
|
|
||||||
|
|
||||||
return render_template("search.html", docs=docs)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/")
|
|
||||||
def index():
|
|
||||||
return render_template("index.html")
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/files/<id>/")
|
|
||||||
def files(id):
|
|
||||||
|
|
||||||
doc = get_document(id)
|
|
||||||
|
|
||||||
if doc is not None:
|
|
||||||
file_path = doc["path"][0] + "/" + doc["name"][0]
|
|
||||||
return send_file(file_path, mimetype=mimetypes.guess_type(file_path)[0])
|
|
||||||
else:
|
|
||||||
return "File not found"
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/thumbs/<doc_id>/")
|
|
||||||
def thumbs(doc_id):
|
|
||||||
|
|
||||||
doc = get_document(doc_id)
|
|
||||||
|
|
||||||
if doc is not None:
|
|
||||||
|
|
||||||
thumb_path = make_thumb(doc)
|
|
||||||
|
|
||||||
return send_file("thumbnails/" + doc_id, mimetype=mimetypes.guess_type(thumb_path)[0])
|
|
||||||
else:
|
|
||||||
return "File not found"
|
|
||||||
|
|
||||||
|
|
||||||
app.run("0.0.0.0", 8080)
|
|
Loading…
x
Reference in New Issue
Block a user