Added local storage for directories and generic file parser

This commit is contained in:
simon 2018-02-13 10:04:07 -05:00
parent 09acdc762a
commit fec23d40d9
41 changed files with 1543 additions and 185 deletions

1
.gitattributes vendored Normal file
View File

@ -0,0 +1 @@
spec/test_folder/* linguist-vendored

0
config.py Normal file
View File

View File

@ -1,64 +1,152 @@
import os
import hashlib
import mimetypes
from PIL import Image
import simplejson
rootDir = "/home/simon/Documents"
# https://stackoverflow.com/questions/3431825/generating-an-md5-checksum-of-a-file
def md5sum(filename, block_size=65536):
hash = hashlib.md5()
with open(filename, "rb") as f:
for block in iter(lambda: f.read(block_size), b""):
hash.update(block)
return hash.hexdigest()
class Crawler:
pass
def crawl(root_dir):
docs = []
for root, subdirs, files in os.walk(root_dir):
print(root)
for filename in files:
full_path = os.path.join(root, filename)
doc = dict()
doc["md5"] = md5sum(os.path.join(root, filename))
doc["path"] = root
doc["name"] = filename
doc["size"] = os.path.getsize(full_path)
doc["mtime"] = int(os.path.getmtime(full_path))
mime_type = mimetypes.guess_type(full_path)[0]
if mime_type is not None:
doc["mime"] = mime_type
if mime_type.startswith("image"):
try:
width, height = Image.open(full_path).size
doc["width"] = width
doc["height"] = height
except OSError:
doc.pop('mime', None)
pass
except ValueError:
doc.pop('mime', None)
pass
docs.append(doc)
file = open("crawler.json", "w")
file.write(simplejson.dumps(docs))
file.close()
class FileParser:
pass
crawl(rootDir)
class CheckSumCalculator:
def checksum(self, path: str) -> str:
"""
Calculate the checksum of a file
:param path: path of the file
:return: checksum
"""
raise NotImplementedError()
class Md5CheckSumCalculator(CheckSumCalculator):
def __init__(self):
self.name = "md5"
def checksum(self, path: str) -> str:
"""
Calculate the md5 checksum of a file
:param path: path of the file
:return: md5 checksum
"""
result = hashlib.md5()
with open(path, "rb") as f:
for block in iter(lambda: f.read(65536), b""):
result.update(block)
return result.hexdigest().upper()
class Sha1CheckSumCalculator(CheckSumCalculator):
def __init__(self):
self.name = "sha1"
def checksum(self, path: str) -> str:
"""
Calculate the sha1 checksum of a file
:param path: path of the file
:return: sha1 checksum
"""
result = hashlib.sha1()
with open(path, "rb") as f:
for block in iter(lambda: f.read(65536), b""):
result.update(block)
return result.hexdigest().upper()
class Sha256CheckSumCalculator(CheckSumCalculator):
def __init__(self):
self.name = "sha256"
def checksum(self, path: str) -> str:
"""
Calculate the sha256 checksum of a file
:param path: path of the file
:return: sha256 checksum
"""
result = hashlib.sha256()
with open(path, "rb") as f:
for block in iter(lambda: f.read(65536), b""):
result.update(block)
return result.hexdigest().upper()
class GenericFileParser(FileParser):
def __init__(self, checksum_calculators: list):
self.checksum_calculators = checksum_calculators
def parse(self, path: str) -> dict:
"""
Parse a generic file
:param path: path of the file to parse
:return: dict information about the file
"""
info = dict()
info["size"] = os.path.getsize(path)
info["name"] = os.path.splitext(path)[0]
for calculator in self.checksum_calculators:
info[calculator.name] = calculator.checksum(path)
return info
# def crawl(root_dir: str) -> None:
# docs = []
#
# for root, dirs, files in os.walk(root_dir):
#
# print(root)
#
# for filename in files:
# full_path = os.path.join(root, filename)
#
# doc = dict()
#
# doc["md5"] = md5sum(full_path)
# doc["path"] = root
# doc["name"] = filename
# doc["size"] = os.path.getsize(full_path)
# doc["mtime"] = int(os.path.getmtime(full_path))
#
# mime_type = mimetypes.guess_type(full_path)[0]
#
# if mime_type is not None:
#
# doc["mime"] = mime_type
#
# if mime_type.startswith("image"):
# try:
# width, height = Image.open(full_path).size
#
# doc["width"] = width
# doc["height"] = height
# except OSError:
# doc.pop('mime', None)
# pass
# except ValueError:
# doc.pop('mime', None)
# pass
#
# docs.append(doc)
#
# file = open("crawler.json", "w")
# file.write(simplejson.dumps(docs))
# file.close()
#
#

40
database.sql Normal file
View File

@ -0,0 +1,40 @@
PRAGMA FOREIGN_KEYS = ON;
-- Represents a directory and its sub-directories
CREATE TABLE Directory (
id INTEGER PRIMARY KEY AUTOINCREMENT,
path TEXT UNIQUE,
enabled BOOLEAN
);
-- Represents a queued task for crawling a Directory or generating thumnails
CREATE TABLE Task (
id INTEGER PRIMARY KEY AUTOINCREMENT,
directory_id INTEGER,
task_type INTEGER,
completed BOOLEAN DEFAULT 0,
completed_time DATETIME,
FOREIGN KEY (directory_id) REFERENCES Directory(id)
);
-- You can set an option on a directory to change the crawler's behavior
CREATE TABLE Option (
name STRING,
directory_id INTEGER,
FOREIGN KEY (directory_id) REFERENCES Directory(id),
PRIMARY KEY (name, directory_id)
);
-- User accounts
CREATE TABLE User (
username TEXT PRIMARY KEY,
password TEXT,
is_admin BOOLEAN
);
CREATE TABLE User_canRead_Directory (
username TEXT,
directory_id INTEGER,
PRIMARY KEY (username, directory_id)
)

2
requirements.txt Normal file
View File

@ -0,0 +1,2 @@
PIL
simplejson

135
run.py Normal file
View File

@ -0,0 +1,135 @@
from flask import Flask, render_template, send_file, request
import pysolr
import mimetypes
import requests
import json
from PIL import Image
import os
SOLR_URL = "http://localhost:8983/solr/test/"
solr = pysolr.Solr(SOLR_URL, timeout=10)
app = Flask(__name__)
#
# class Document:
# def __init__(self, doc_id, name, path, size, md5):
# self.doc_id = doc_id
# self.name = name
# self.path = path
# self.size = size
# self.md5 = md5
#
#
# class ImageDocument(Document):
# def __init__(self, doc_id, name, path, size, md5):
# super().__init__(doc_id, name, path, size, md5)
# self.type = "image"
#
#
# class AudioClipDocument(Document):
# def __init__(self, doc_id, name, path, size, md5):
# super().__init__(doc_id, name, path, size, md5)
# self.type = "audio"
#
#
# def get_document(id):
#
# response = requests.get(SOLR_URL + "get?id=" + id)
#
# return json.loads(response.text)["doc"]
#
#
# def make_thumb(doc):
# size = (1024, 1024)
#
# thumb_path = "thumbnails/" + doc["id"]
#
# if not os.path.exists(thumb_path):
#
# file_path = doc["path"][0] + "/" + doc["name"][0]
#
# if doc["width"][0] > size[0]:
#
# image = Image.open(file_path)
# image.thumbnail(size, Image.ANTIALIAS)
#
# if image.mode == "RGB":
# image.save(thumb_path, "JPEG")
# elif image.mode == "RGBA":
# image.save(thumb_path, "PNG")
# else:
# image = image.convert("RGB")
# image.save(thumb_path, "JPEG")
# else:
# print("Skipping thumbnail")
# os.symlink(file_path, thumb_path)
#
# return "thumbnails/" + doc["id"]
#
#
# @app.route("/search/")
# def search():
#
# query = request.args.get("query")
# page = int(request.args.get("page"))
# per_page = int(request.args.get("per_page"))
#
# results = solr.search(query, None, rows=per_page, start=per_page * page)
#
# docs = []
# for r in results:
#
# if "mime" in r:
# mime_type = r["mime"][0]
# else:
# mime_type = ""
#
# if mime_type.startswith("image"):
# docs.append(ImageDocument(r["id"], r["name"][0], r["path"][0], r["size"], r["md5"]))
#
# elif mime_type.startswith("audio"):
# docs.append(AudioClipDocument(r["id"], r["name"][0], r["path"][0], r["size"], r["md5"]))
#
# return render_template("search.html", docs=docs)
#
#
# @app.route("/")
# def index():
# return render_template("index.html")
#
#
# @app.route("/files/<id>/")
# def files(id):
#
# doc = get_document(id)
#
# if doc is not None:
# file_path = doc["path"][0] + "/" + doc["name"][0]
# return send_file(file_path, mimetype=mimetypes.guess_type(file_path)[0])
# else:
# return "File not found"
#
#
# @app.route("/thumbs/<doc_id>/")
# def thumbs(doc_id):
#
# doc = get_document(doc_id)
#
# if doc is not None:
#
# thumb_path = make_thumb(doc)
#
# return send_file("thumbnails/" + doc_id, mimetype=mimetypes.guess_type(thumb_path)[0])
# else:
# return "File not found"
@app.route("/")
def tmp_route():
return "test"
if __name__ == "__main__":
app.run("0.0.0.0", 8080)

4
setupDb.sh Executable file
View File

@ -0,0 +1,4 @@
#!/bin/bash
rm test.db
sqlite3 local_storage.db -init "database.sql"

10
spec/Crawler_spec.py Normal file
View File

@ -0,0 +1,10 @@
from unittest import TestCase
from crawler import Crawler
class CrawlerTest(TestCase):
def test_dir_walk(self):
c = Crawler()

131
spec/FileParser_spec.py Normal file
View File

@ -0,0 +1,131 @@
import os
from unittest import TestCase
from crawler import GenericFileParser, Md5CheckSumCalculator, Sha1CheckSumCalculator, Sha256CheckSumCalculator
class GenericFileParserTest(TestCase):
def setUp(self):
if os.path.exists("test_parse"):
os.remove("test_parse")
test_file = open("test_parse", "w")
test_file.write("12345678")
test_file.close()
self.parser = GenericFileParser([Md5CheckSumCalculator()])
def tearDown(self):
os.remove("test_parse")
def test_parse_size(self):
result = self.parser.parse("test_parse")
self.assertEqual(result["size"], 8)
def test_parse_name(self):
result = self.parser.parse("test_parse")
self.assertEqual(result["name"], "test_parse")
def test_parse_md5(self):
result = self.parser.parse("test_parse")
self.assertEqual(result["md5"], "25D55AD283AA400AF464C76D713C07AD")
class Md5CheckSumCalculatorTest(TestCase):
def setUp(self):
if os.path.exists("test_md5_1"):
os.remove("test_md5_1")
test_file = open("test_md5_1", "w")
test_file.write("789456123")
test_file.close()
if os.path.exists("test_md5_2"):
os.remove("test_md5_2")
test_file = open("test_md5_2", "w")
test_file.write("cj3w97n7RY378WRXEN68W7RExnw6nr8276b473824")
test_file.close()
self.calculator = Md5CheckSumCalculator()
def tearDown(self):
os.remove("test_md5_1")
os.remove("test_md5_2")
def test_md5_checksum(self):
result = self.calculator.checksum("test_md5_1")
self.assertEqual(result, "9FAB6755CD2E8817D3E73B0978CA54A6")
result = self.calculator.checksum("test_md5_2")
self.assertEqual(result, "39A1AADE23E33A7F37C11C7FF9CDC9EC")
class Sha1CheckSumCalculatorTest(TestCase):
def setUp(self):
if os.path.exists("test_sha1_1"):
os.remove("test_sha1_1")
test_file = open("test_sha1_1", "w")
test_file.write("sxjkneycbu")
test_file.close()
if os.path.exists("test_sha1_2"):
os.remove("test_sha1_2")
test_file = open("test_sha1_2", "w")
test_file.write("xoimoqxy38e")
test_file.close()
self.calculator = Sha1CheckSumCalculator()
def tearDown(self):
os.remove("test_sha1_1")
os.remove("test_sha1_2")
def test_md5_checksum(self):
result = self.calculator.checksum("test_sha1_1")
self.assertEqual(result, "A80315387730DB5743061F397EB66DE0DDAE19E5")
result = self.calculator.checksum("test_sha1_2")
self.assertEqual(result, "E7B5A2B6F6838E766A0BC7E558F640726D70A8D6")
class Sha256CheckSumCalculatorTest(TestCase):
def setUp(self):
if os.path.exists("test_sha256_1"):
os.remove("test_sha256_1")
test_file = open("test_sha256_1", "w")
test_file.write("eaur5t84nc7i")
test_file.close()
if os.path.exists("test_sha256_2"):
os.remove("test_sha256_2")
test_file = open("test_sha256_2", "w")
test_file.write("xkwerci47ixryw7r6wxadwd")
test_file.close()
self.calculator = Sha256CheckSumCalculator()
def tearDown(self):
os.remove("test_sha256_1")
os.remove("test_sha256_2")
def test_md5_checksum(self):
result = self.calculator.checksum("test_sha256_1")
self.assertEqual(result, "DA7606DC763306B700685A71E2E72A2D95F1291209E5DA344B82DA2508FC27C5")
result = self.calculator.checksum("test_sha256_2")
self.assertEqual(result, "C39C7E0E7D84C9692F3C9C22E1EA0327DEBF1BF531B5738EEA8E79FE27EBC570")

47
spec/LocalStorage_spec.py Normal file
View File

@ -0,0 +1,47 @@
from unittest import TestCase
from storage import LocalStorage, Directory, DuplicateDirectoryException
class LocalStorageTest(TestCase):
def setUp(self):
s = LocalStorage()
s.init_db("../database.sql")
def test_save_and_retrieve_dir(self):
storage = LocalStorage()
d = Directory("/some/directory", True, ["opt1", "opt2", "opt3"])
storage.save_directory(d)
self.assertEqual(storage.dirs()["/some/directory"].enabled, True)
self.assertEqual(storage.dirs()["/some/directory"].options[0], "opt1")
def test_save_and_retrieve_dir_persistent(self):
s1 = LocalStorage()
d = Directory("/some/directory", True, ["opt1", "opt2", "opt3"])
s1.save_directory(d)
s2 = LocalStorage()
self.assertEqual(s2.dirs()["/some/directory"].enabled, True)
self.assertEqual(s2.dirs()["/some/directory"].options[0], "opt1")
def test_reject_duplicate_path(self):
s = LocalStorage()
d1 = Directory("/some/directory", True, ["opt1", "opt2"])
d2 = Directory("/some/directory", True, ["opt1", "opt2"])
s.save_directory(d1)
with self.assertRaises(DuplicateDirectoryException) as e:
s.save_directory(d2)

11
spec/test_folder/books.csv vendored Normal file
View File

@ -0,0 +1,11 @@
id,cat,name,price,inStock,author,series_t,sequence_i,genre_s
0553573403,book,A Game of Thrones,7.99,true,George R.R. Martin,"A Song of Ice and Fire",1,fantasy
0553579908,book,A Clash of Kings,7.99,true,George R.R. Martin,"A Song of Ice and Fire",2,fantasy
055357342X,book,A Storm of Swords,7.99,true,George R.R. Martin,"A Song of Ice and Fire",3,fantasy
0553293354,book,Foundation,7.99,true,Isaac Asimov,Foundation Novels,1,scifi
0812521390,book,The Black Company,6.99,false,Glen Cook,The Chronicles of The Black Company,1,fantasy
0812550706,book,Ender's Game,6.99,true,Orson Scott Card,Ender,1,scifi
0441385532,book,Jhereg,7.95,false,Steven Brust,Vlad Taltos,1,fantasy
0380014300,book,Nine Princes In Amber,6.99,true,Roger Zelazny,the Chronicles of Amber,1,fantasy
0805080481,book,The Book of Three,5.99,true,Lloyd Alexander,The Chronicles of Prydain,1,fantasy
080508049X,book,The Black Cauldron,5.99,true,Lloyd Alexander,The Chronicles of Prydain,2,fantasy
1 id cat name price inStock author series_t sequence_i genre_s
2 0553573403 book A Game of Thrones 7.99 true George R.R. Martin A Song of Ice and Fire 1 fantasy
3 0553579908 book A Clash of Kings 7.99 true George R.R. Martin A Song of Ice and Fire 2 fantasy
4 055357342X book A Storm of Swords 7.99 true George R.R. Martin A Song of Ice and Fire 3 fantasy
5 0553293354 book Foundation 7.99 true Isaac Asimov Foundation Novels 1 scifi
6 0812521390 book The Black Company 6.99 false Glen Cook The Chronicles of The Black Company 1 fantasy
7 0812550706 book Ender's Game 6.99 true Orson Scott Card Ender 1 scifi
8 0441385532 book Jhereg 7.95 false Steven Brust Vlad Taltos 1 fantasy
9 0380014300 book Nine Princes In Amber 6.99 true Roger Zelazny the Chronicles of Amber 1 fantasy
10 0805080481 book The Book of Three 5.99 true Lloyd Alexander The Chronicles of Prydain 1 fantasy
11 080508049X book The Black Cauldron 5.99 true Lloyd Alexander The Chronicles of Prydain 2 fantasy

51
spec/test_folder/books.json vendored Normal file
View File

@ -0,0 +1,51 @@
[
{
"id" : "978-0641723445",
"cat" : ["book","hardcover"],
"name" : "The Lightning Thief",
"author" : "Rick Riordan",
"series_t" : "Percy Jackson and the Olympians",
"sequence_i" : 1,
"genre_s" : "fantasy",
"inStock" : true,
"price" : 12.50,
"pages_i" : 384
}
,
{
"id" : "978-1423103349",
"cat" : ["book","paperback"],
"name" : "The Sea of Monsters",
"author" : "Rick Riordan",
"series_t" : "Percy Jackson and the Olympians",
"sequence_i" : 2,
"genre_s" : "fantasy",
"inStock" : true,
"price" : 6.49,
"pages_i" : 304
}
,
{
"id" : "978-1857995879",
"cat" : ["book","paperback"],
"name" : "Sophie's World : The Greek Philosophers",
"author" : "Jostein Gaarder",
"sequence_i" : 1,
"genre_s" : "fantasy",
"inStock" : true,
"price" : 3.07,
"pages_i" : 64
}
,
{
"id" : "978-1933988177",
"cat" : ["book","paperback"],
"name" : "Lucene in Action, Second Edition",
"author" : "Michael McCandless",
"sequence_i" : 1,
"genre_s" : "IT",
"inStock" : true,
"price" : 30.50,
"pages_i" : 475
}
]

32
spec/test_folder/gb18030-example.xml vendored Normal file
View File

@ -0,0 +1,32 @@
<?xml version="1.0" encoding="GB18030"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<add>
<doc>
<field name="id">GB18030TEST</field>
<field name="name">Test with some GB18030 encoded characters</field>
<field name="features">No accents here</field>
<field name="features">这是一个功能</field>
<field name="features">This is a feature (translated)</field>
<field name="features">这份文件是很有光泽</field>
<field name="features">This document is very shiny (translated)</field>
<field name="price">0.0</field>
<field name="inStock">true</field>
</doc>
</add>

56
spec/test_folder/hd.xml vendored Normal file
View File

@ -0,0 +1,56 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<add>
<doc>
<field name="id">SP2514N</field>
<field name="name">Samsung SpinPoint P120 SP2514N - hard drive - 250 GB - ATA-133</field>
<field name="manu">Samsung Electronics Co. Ltd.</field>
<!-- Join -->
<field name="manu_id_s">samsung</field>
<field name="cat">electronics</field>
<field name="cat">hard drive</field>
<field name="features">7200RPM, 8MB cache, IDE Ultra ATA-133</field>
<field name="features">NoiseGuard, SilentSeek technology, Fluid Dynamic Bearing (FDB) motor</field>
<field name="price">92.0</field>
<field name="popularity">6</field>
<field name="inStock">true</field>
<field name="manufacturedate_dt">2006-02-13T15:26:37Z</field>
<!-- Near Oklahoma city -->
<field name="store">35.0752,-97.032</field>
</doc>
<doc>
<field name="id">6H500F0</field>
<field name="name">Maxtor DiamondMax 11 - hard drive - 500 GB - SATA-300</field>
<field name="manu">Maxtor Corp.</field>
<!-- Join -->
<field name="manu_id_s">maxtor</field>
<field name="cat">electronics</field>
<field name="cat">hard drive</field>
<field name="features">SATA 3.0Gb/s, NCQ</field>
<field name="features">8.5ms seek</field>
<field name="features">16MB cache</field>
<field name="price">350.0</field>
<field name="popularity">6</field>
<field name="inStock">true</field>
<!-- Buffalo store -->
<field name="store">45.17614,-93.87341</field>
<field name="manufacturedate_dt">2006-02-13T15:26:37Z</field>
</doc>
</add>

60
spec/test_folder/ipod_other.xml vendored Normal file
View File

@ -0,0 +1,60 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<add>
<doc>
<field name="id">F8V7067-APL-KIT</field>
<field name="name">Belkin Mobile Power Cord for iPod w/ Dock</field>
<field name="manu">Belkin</field>
<!-- Join -->
<field name="manu_id_s">belkin</field>
<field name="cat">electronics</field>
<field name="cat">connector</field>
<field name="features">car power adapter, white</field>
<field name="weight">4.0</field>
<field name="price">19.95</field>
<field name="popularity">1</field>
<field name="inStock">false</field>
<!-- Buffalo store -->
<field name="store">45.18014,-93.87741</field>
<field name="manufacturedate_dt">2005-08-01T16:30:25Z</field>
</doc>
<doc>
<field name="id">IW-02</field>
<field name="name">iPod &amp; iPod Mini USB 2.0 Cable</field>
<field name="manu">Belkin</field>
<!-- Join -->
<field name="manu_id_s">belkin</field>
<field name="cat">electronics</field>
<field name="cat">connector</field>
<field name="features">car power adapter for iPod, white</field>
<field name="weight">2.0</field>
<field name="price">11.50</field>
<field name="popularity">1</field>
<field name="inStock">false</field>
<!-- San Francisco store -->
<field name="store">37.7752,-122.4232</field>
<field name="manufacturedate_dt">2006-02-14T23:55:59Z</field>
</doc>
</add>

40
spec/test_folder/ipod_video.xml vendored Normal file
View File

@ -0,0 +1,40 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<add><doc>
<field name="id">MA147LL/A</field>
<field name="name">Apple 60 GB iPod with Video Playback Black</field>
<field name="manu">Apple Computer Inc.</field>
<!-- Join -->
<field name="manu_id_s">apple</field>
<field name="cat">electronics</field>
<field name="cat">music</field>
<field name="features">iTunes, Podcasts, Audiobooks</field>
<field name="features">Stores up to 15,000 songs, 25,000 photos, or 150 hours of video</field>
<field name="features">2.5-inch, 320x240 color TFT LCD display with LED backlight</field>
<field name="features">Up to 20 hours of battery life</field>
<field name="features">Plays AAC, MP3, WAV, AIFF, Audible, Apple Lossless, H.264 video</field>
<field name="features">Notes, Calendar, Phone book, Hold button, Date display, Photo wallet, Built-in games, JPEG photo playback, Upgradeable firmware, USB 2.0 compatibility, Playback speed control, Rechargeable capability, Battery level indication</field>
<field name="includes">earbud headphones, USB cable</field>
<field name="weight">5.5</field>
<field name="price">399.00</field>
<field name="popularity">10</field>
<field name="inStock">true</field>
<!-- Dodge City store -->
<field name="store">37.7752,-100.0232</field>
<field name="manufacturedate_dt">2005-10-12T08:00:00Z</field>
</doc></add>

33
spec/test_folder/monitor2.xml vendored Normal file
View File

@ -0,0 +1,33 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<add><doc>
<field name="id">VA902B</field>
<field name="name">ViewSonic VA902B - flat panel display - TFT - 19"</field>
<field name="manu">ViewSonic Corp.</field>
<!-- Join -->
<field name="manu_id_s">viewsonic</field>
<field name="cat">electronics and stuff2</field>
<field name="features">19" TFT active matrix LCD, 8ms response time, 1280 x 1024 native resolution</field>
<field name="weight">190.4</field>
<field name="price">279.95</field>
<field name="popularity">6</field>
<field name="inStock">true</field>
<!-- Buffalo store -->
<field name="store">45.18814,-93.88541</field>
</doc></add>

3
spec/test_folder/more_books.jsonl vendored Normal file
View File

@ -0,0 +1,3 @@
{"id":"0060248025","name":"Falling Up","inStock": true,"author": "Shel Silverstein"}
{"id":"0679805273","name":"Oh, The Places You'll Go","inStock": true,"author": "Dr. Seuss"}

43
spec/test_folder/mp500.xml vendored Normal file
View File

@ -0,0 +1,43 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<add><doc>
<field name="id">0579B002</field>
<field name="name">Canon PIXMA MP500 All-In-One Photo Printer</field>
<field name="manu">Canon Inc.</field>
<!-- Join -->
<field name="manu_id_s">canon</field>
<field name="cat">electronics</field>
<field name="cat">multifunction printer</field>
<field name="cat">printer</field>
<field name="cat">scanner</field>
<field name="cat">copier</field>
<field name="features">Multifunction ink-jet color photo printer</field>
<field name="features">Flatbed scanner, optical scan resolution of 1,200 x 2,400 dpi</field>
<field name="features">2.5" color LCD preview screen</field>
<field name="features">Duplex Copying</field>
<field name="features">Printing speed up to 29ppm black, 19ppm color</field>
<field name="features">Hi-Speed USB</field>
<field name="features">memory card: CompactFlash, Micro Drive, SmartMedia, Memory Stick, Memory Stick Pro, SD Card, and MultiMediaCard</field>
<field name="weight">352.0</field>
<field name="price">179.99</field>
<field name="popularity">6</field>
<field name="inStock">true</field>
<!-- Buffalo store -->
<field name="store">45.19214,-93.89941</field>
</doc></add>

BIN
spec/test_folder/post.jar vendored Normal file

Binary file not shown.

13
spec/test_folder/sample.html vendored Normal file
View File

@ -0,0 +1,13 @@
<html>
<head>
<title>Welcome to Solr</title>
</head>
<body>
<p>
Here is some text
</p>
<p>distinct<br/>words</p>
<div>Here is some text in a div</div>
<div>This has a <a href="http://www.apache.org">link</a>.</div>
</body>
</html>

BIN
spec/test_folder/sample_1.jpg vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 32 KiB

BIN
spec/test_folder/sample_2.jpeg vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 55 KiB

BIN
spec/test_folder/sample_3.jpg vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.9 MiB

BIN
spec/test_folder/sample_4.jpg vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 348 KiB

BIN
spec/test_folder/solr-word.pdf vendored Normal file

Binary file not shown.

38
spec/test_folder/solr.xml vendored Normal file
View File

@ -0,0 +1,38 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<add>
<doc>
<field name="id">SOLR1000</field>
<field name="name">Solr, the Enterprise Search Server</field>
<field name="manu">Apache Software Foundation</field>
<field name="cat">software</field>
<field name="cat">search</field>
<field name="features">Advanced Full-Text Search Capabilities using Lucene</field>
<field name="features">Optimized for High Volume Web Traffic</field>
<field name="features">Standards Based Open Interfaces - XML and HTTP</field>
<field name="features">Comprehensive HTML Administration Interfaces</field>
<field name="features">Scalability - Efficient Replication to other Solr Search Servers</field>
<field name="features">Flexible and Adaptable with XML configuration and Schema</field>
<field name="features">Good unicode support: h&#xE9;llo (hello with an accent over the e)</field>
<field name="price">0.0</field>
<field name="popularity">10</field>
<field name="inStock">true</field>
<field name="incubationdate_dt">2006-01-17T00:00:00.000Z</field>
</doc>
</add>

View File

@ -0,0 +1,75 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<add>
<doc>
<field name="id">adata</field>
<field name="compName_s">A-Data Technology</field>
<field name="address_s">46221 Landing Parkway Fremont, CA 94538</field>
</doc>
<doc>
<field name="id">apple</field>
<field name="compName_s">Apple</field>
<field name="address_s">1 Infinite Way, Cupertino CA</field>
</doc>
<doc>
<field name="id">asus</field>
<field name="compName_s">ASUS Computer</field>
<field name="address_s">800 Corporate Way Fremont, CA 94539</field>
</doc>
<doc>
<field name="id">ati</field>
<field name="compName_s">ATI Technologies</field>
<field name="address_s">33 Commerce Valley Drive East Thornhill, ON L3T 7N6 Canada</field>
</doc>
<doc>
<field name="id">belkin</field>
<field name="compName_s">Belkin</field>
<field name="address_s">12045 E. Waterfront Drive Playa Vista, CA 90094</field>
</doc>
<doc>
<field name="id">canon</field>
<field name="compName_s">Canon, Inc.</field>
<field name="address_s">One Canon Plaza Lake Success, NY 11042</field>
</doc>
<doc>
<field name="id">corsair</field>
<field name="compName_s">Corsair Microsystems</field>
<field name="address_s">46221 Landing Parkway Fremont, CA 94538</field>
</doc>
<doc>
<field name="id">dell</field>
<field name="compName_s">Dell, Inc.</field>
<field name="address_s">One Dell Way Round Rock, Texas 78682</field>
</doc>
<doc>
<field name="id">maxtor</field>
<field name="compName_s">Maxtor Corporation</field>
<field name="address_s">920 Disc Drive Scotts Valley, CA 95066</field>
</doc>
<doc>
<field name="id">samsung</field>
<field name="compName_s">Samsung Electronics Co. Ltd.</field>
<field name="address_s">105 Challenger Rd. Ridgefield Park, NJ 07660-0511</field>
</doc>
<doc>
<field name="id">viewsonic</field>
<field name="compName_s">ViewSonic Corp</field>
<field name="address_s">381 Brea Canyon Road Walnut, CA 91789-0708</field>
</doc>
</add>

View File

@ -0,0 +1,77 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<add>
<doc>
<field name="id">TWINX2048-3200PRO</field>
<field name="name">CORSAIR XMS 2GB (2 x 1GB) 184-Pin DDR SDRAM Unbuffered DDR 400 (PC 3200) Dual Channel Kit System Memory - Retail</field>
<field name="manu">Corsair Microsystems Inc.</field>
<!-- Join -->
<field name="manu_id_s">corsair</field>
<field name="cat">electronics</field>
<field name="cat">memory</field>
<field name="features">CAS latency 2, 2-3-3-6 timing, 2.75v, unbuffered, heat-spreader</field>
<field name="price">185.00</field>
<field name="popularity">5</field>
<field name="inStock">true</field>
<!-- San Francisco store -->
<field name="store">37.7752,-122.4232</field>
<field name="manufacturedate_dt">2006-02-13T15:26:37Z</field>
<!-- a field for testing payload tagged text via DelimitedPayloadTokenFilter -->
<field name="payloads">electronics|6.0 memory|3.0</field>
</doc>
<doc>
<field name="id">VS1GB400C3</field>
<field name="name">CORSAIR ValueSelect 1GB 184-Pin DDR SDRAM Unbuffered DDR 400 (PC 3200) System Memory - Retail</field>
<field name="manu">Corsair Microsystems Inc.</field>
<!-- Join -->
<field name="manu_id_s">corsair</field>
<field name="cat">electronics</field>
<field name="cat">memory</field>
<field name="price">74.99</field>
<field name="popularity">7</field>
<field name="inStock">true</field>
<!-- Dodge City store -->
<field name="store">37.7752,-100.0232</field>
<field name="manufacturedate_dt">2006-02-13T15:26:37Z</field>
<field name="payloads">electronics|4.0 memory|2.0</field>
</doc>
<doc>
<field name="id">VDBDB1A16</field>
<field name="name">A-DATA V-Series 1GB 184-Pin DDR SDRAM Unbuffered DDR 400 (PC 3200) System Memory - OEM</field>
<field name="manu">A-DATA Technology Inc.</field>
<!-- Join -->
<field name="manu_id_s">corsair</field>
<field name="cat">electronics</field>
<field name="cat">memory</field>
<field name="features">CAS latency 3, 2.7v</field>
<!-- note: price & popularity is missing on this one -->
<field name="popularity">0</field>
<field name="inStock">true</field>
<!-- Buffalo store -->
<field name="store">45.18414,-93.88141</field>
<field name="manufacturedate_dt">2006-02-13T15:26:37Z</field>
<field name="payloads">electronics|0.9 memory|0.1</field>
</doc>
</add>

View File

@ -0,0 +1,65 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!-- Example documents utilizing the CurrencyField type -->
<add>
<doc>
<field name="id">USD</field>
<field name="name">One Dollar</field>
<field name="manu">Bank of America</field>
<field name="manu_id_s">boa</field>
<field name="cat">currency</field>
<field name="features">Coins and notes</field>
<field name="price_c">1,USD</field>
<field name="inStock">true</field>
</doc>
<doc>
<field name="id">EUR</field>
<field name="name">One Euro</field>
<field name="manu">European Union</field>
<field name="manu_id_s">eu</field>
<field name="cat">currency</field>
<field name="features">Coins and notes</field>
<field name="price_c">1,EUR</field>
<field name="inStock">true</field>
</doc>
<doc>
<field name="id">GBP</field>
<field name="name">One British Pound</field>
<field name="manu">U.K.</field>
<field name="manu_id_s">uk</field>
<field name="cat">currency</field>
<field name="features">Coins and notes</field>
<field name="price_c">1,GBP</field>
<field name="inStock">true</field>
</doc>
<doc>
<field name="id">NOK</field>
<field name="name">One Krone</field>
<field name="manu">Bank of Norway</field>
<field name="manu_id_s">nor</field>
<field name="cat">currency</field>
<field name="features">Coins and notes</field>
<field name="price_c">1,NOK</field>
<field name="inStock">true</field>
</doc>
</add>

View File

@ -0,0 +1,34 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<add><doc>
<field name="id">3007WFP</field>
<field name="name">Dell Widescreen UltraSharp 3007WFP</field>
<field name="manu">Dell, Inc.</field>
<!-- Join -->
<field name="manu_id_s">dell</field>
<field name="cat">electronics and computer1</field>
<field name="features">30" TFT active matrix LCD, 2560 x 1600, .25mm dot pitch, 700:1 contrast</field>
<field name="includes">USB cable</field>
<field name="weight">401.6</field>
<field name="price">2199.0</field>
<field name="popularity">6</field>
<field name="inStock">true</field>
<!-- Buffalo store -->
<field name="store">43.17614,-90.57341</field>
</doc></add>

View File

@ -0,0 +1,38 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<add><doc>
<field name="id">9885A004</field>
<field name="name">Canon PowerShot SD500</field>
<field name="manu">Canon Inc.</field>
<!-- Join -->
<field name="manu_id_s">canon</field>
<field name="cat">electronics</field>
<field name="cat">camera</field>
<field name="features">3x zoop, 7.1 megapixel Digital ELPH</field>
<field name="features">movie clips up to 640x480 @30 fps</field>
<field name="features">2.0" TFT LCD, 118,000 pixels</field>
<field name="features">built in flash, red-eye reduction</field>
<field name="includes">32MB SD card, USB cable, AV cable, battery</field>
<field name="weight">6.4</field>
<field name="price">329.95</field>
<field name="popularity">7</field>
<field name="inStock">true</field>
<field name="manufacturedate_dt">2006-02-13T15:26:37Z</field>
<!-- Buffalo store -->
<field name="store">45.19614,-93.90341</field>
</doc></add>

View File

@ -0,0 +1,3 @@
{"id":"0060248025","name":"Falling Up","inStock": true,"author": "Shel Silverstein"}
{"id":"0679805273","name":"Oh, The Places You'll Go","inStock": true,"author": "Dr. Seuss"}

View File

@ -0,0 +1,43 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<add><doc>
<field name="id">0579B002</field>
<field name="name">Canon PIXMA MP500 All-In-One Photo Printer</field>
<field name="manu">Canon Inc.</field>
<!-- Join -->
<field name="manu_id_s">canon</field>
<field name="cat">electronics</field>
<field name="cat">multifunction printer</field>
<field name="cat">printer</field>
<field name="cat">scanner</field>
<field name="cat">copier</field>
<field name="features">Multifunction ink-jet color photo printer</field>
<field name="features">Flatbed scanner, optical scan resolution of 1,200 x 2,400 dpi</field>
<field name="features">2.5" color LCD preview screen</field>
<field name="features">Duplex Copying</field>
<field name="features">Printing speed up to 29ppm black, 19ppm color</field>
<field name="features">Hi-Speed USB</field>
<field name="features">memory card: CompactFlash, Micro Drive, SmartMedia, Memory Stick, Memory Stick Pro, SD Card, and MultiMediaCard</field>
<field name="weight">352.0</field>
<field name="price">179.99</field>
<field name="popularity">6</field>
<field name="inStock">true</field>
<!-- Buffalo store -->
<field name="store">45.19214,-93.89941</field>
</doc></add>

Binary file not shown.

93
spec/test_folder/test_utf8.sh vendored Executable file
View File

@ -0,0 +1,93 @@
#!/bin/sh
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#Test script to tell if the server is accepting UTF-8
#The python writer currently escapes non-ascii chars, so it's good for testing
SOLR_URL=http://localhost:8983/solr
if [ ! -z $1 ]; then
SOLR_URL=$1
fi
curl "$SOLR_URL/select?q=hello&params=explicit&wt=python" 2> /dev/null | grep 'hello' > /dev/null 2>&1
if [ $? = 0 ]; then
echo "Solr server is up."
else
echo "ERROR: Could not curl to Solr - is curl installed? Is Solr not running?"
exit 1
fi
curl "$SOLR_URL/select?q=h%C3%A9llo&echoParams=explicit&wt=python" 2> /dev/null | grep 'h\\u00e9llo' > /dev/null 2>&1
if [ $? = 0 ]; then
echo "HTTP GET is accepting UTF-8"
else
echo "ERROR: HTTP GET is not accepting UTF-8"
fi
curl $SOLR_URL/select --data-binary 'q=h%C3%A9llo&echoParams=explicit&wt=python' -H 'Content-type:application/x-www-form-urlencoded; charset=UTF-8' 2> /dev/null | grep 'h\\u00e9llo' > /dev/null 2>&1
if [ $? = 0 ]; then
echo "HTTP POST is accepting UTF-8"
else
echo "ERROR: HTTP POST is not accepting UTF-8"
fi
curl $SOLR_URL/select --data-binary 'q=h%C3%A9llo&echoParams=explicit&wt=python' 2> /dev/null | grep 'h\\u00e9llo' > /dev/null 2>&1
if [ $? = 0 ]; then
echo "HTTP POST defaults to UTF-8"
else
echo "HTTP POST does not default to UTF-8"
fi
#A unicode character outside of the BMP (a circle with an x inside)
CHAR="𐌈"
CODEPOINT='0x10308'
#URL encoded UTF8 of the codepoint
UTF8_Q='%F0%90%8C%88'
#expected return of the python writer (currently uses UTF-16 surrogates)
EXPECTED='\\ud800\\udf08'
curl "$SOLR_URL/select?q=$UTF8_Q&echoParams=explicit&wt=python" 2> /dev/null | grep $EXPECTED > /dev/null 2>&1
if [ $? = 0 ]; then
echo "HTTP GET is accepting UTF-8 beyond the basic multilingual plane"
else
echo "ERROR: HTTP GET is not accepting UTF-8 beyond the basic multilingual plane"
fi
curl $SOLR_URL/select --data-binary "q=$UTF8_Q&echoParams=explicit&wt=python" -H 'Content-type:application/x-www-form-urlencoded; charset=UTF-8' 2> /dev/null | grep $EXPECTED > /dev/null 2>&1
if [ $? = 0 ]; then
echo "HTTP POST is accepting UTF-8 beyond the basic multilingual plane"
else
echo "ERROR: HTTP POST is not accepting UTF-8 beyond the basic multilingual plane"
fi
curl "$SOLR_URL/select?q=$UTF8_Q&echoParams=explicit&wt=python" --data-binary '' 2> /dev/null | grep $EXPECTED > /dev/null 2>&1
if [ $? = 0 ]; then
echo "HTTP POST + URL params is accepting UTF-8 beyond the basic multilingual plane"
else
echo "ERROR: HTTP POST + URL params is not accepting UTF-8 beyond the basic multilingual plane"
fi
#curl "$SOLR_URL/select?q=$UTF8_Q&echoParams=explicit" 2> /dev/null | od -tx1 -w1000 | sed 's/ //g' | grep 'f4808198' > /dev/null 2>&1
curl "$SOLR_URL/select?q=$UTF8_Q&echoParams=explicit" 2> /dev/null | grep "$CHAR" > /dev/null 2>&1
if [ $? = 0 ]; then
echo "Response correctly returns UTF-8 beyond the basic multilingual plane"
else
echo "ERROR: Response can't return UTF-8 beyond the basic multilingual plane"
fi

42
spec/test_folder/utf8-example.xml vendored Normal file
View File

@ -0,0 +1,42 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!--
After posting this to Solr with bin/post, searching for "êâîôû" from
the solr/admin/ search page must return this document.
-->
<add>
<doc>
<field name="id">UTF8TEST</field>
<field name="name">Test with some UTF-8 encoded characters</field>
<field name="manu">Apache Software Foundation</field>
<field name="cat">software</field>
<field name="cat">search</field>
<field name="features">No accents here</field>
<field name="features">This is an e acute: é</field>
<field name="features">eaiou with circumflexes: êâîôû</field>
<field name="features">eaiou with umlauts: ëäïöü</field>
<field name="features">tag with escaped chars: &lt;nicetag/&gt;</field>
<field name="features">escaped ampersand: Bonnie &amp; Clyde</field>
<field name="features">Outside the BMP:𐌈 codepoint=10308, a circle with an x inside. UTF8=f0908c88 UTF16=d800 df08</field>
<field name="price">0.0</field>
<field name="inStock">true</field>
</doc>
</add>

62
spec/test_folder/vidcard.xml vendored Normal file
View File

@ -0,0 +1,62 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<add>
<doc>
<field name="id">EN7800GTX/2DHTV/256M</field>
<field name="name">ASUS Extreme N7800GTX/2DHTV (256 MB)</field>
<!-- Denormalized -->
<field name="manu">ASUS Computer Inc.</field>
<!-- Join -->
<field name="manu_id_s">asus</field>
<field name="cat">electronics</field>
<field name="cat">graphics card</field>
<field name="features">NVIDIA GeForce 7800 GTX GPU/VPU clocked at 486MHz</field>
<field name="features">256MB GDDR3 Memory clocked at 1.35GHz</field>
<field name="features">PCI Express x16</field>
<field name="features">Dual DVI connectors, HDTV out, video input</field>
<field name="features">OpenGL 2.0, DirectX 9.0</field>
<field name="weight">16.0</field>
<field name="price">479.95</field>
<field name="popularity">7</field>
<field name="store">40.7143,-74.006</field>
<field name="inStock">false</field>
<field name="manufacturedate_dt">2006-02-13T15:26:37Z/DAY</field>
</doc>
<!-- yes, you can add more than one document at a time -->
<doc>
<field name="id">100-435805</field>
<field name="name">ATI Radeon X1900 XTX 512 MB PCIE Video Card</field>
<field name="manu">ATI Technologies</field>
<!-- Join -->
<field name="manu_id_s">ati</field>
<field name="cat">electronics</field>
<field name="cat">graphics card</field>
<field name="features">ATI RADEON X1900 GPU/VPU clocked at 650MHz</field>
<field name="features">512MB GDDR3 SDRAM clocked at 1.55GHz</field>
<field name="features">PCI Express x16</field>
<field name="features">dual DVI, HDTV, svideo, composite out</field>
<field name="features">OpenGL 2.0, DirectX 9.0</field>
<field name="weight">48.0</field>
<field name="price">649.99</field>
<field name="popularity">7</field>
<field name="inStock">false</field>
<field name="manufacturedate_dt">2006-02-13T15:26:37Z/DAY</field>
<!-- NYC store -->
<field name="store">40.7143,-74.006</field>
</doc>
</add>

107
storage.py Normal file
View File

@ -0,0 +1,107 @@
import sqlite3
import os
class DuplicateDirectoryException(Exception):
pass
class Directory:
"""
Data structure to hold directory information
"""
def __init__(self, path: str, enabled: bool, options: list):
self.path = path
self.enabled = enabled
self.options = options
def __str__(self):
return self.path + " | enabled: " + str(self.enabled) + " | opts: " + str(self.options)
class LocalStorage:
"""
Manages storage of application data to disk.
Could be refactored into a abstract class to switch from SQLite3 to something else
"""
cache_outdated = True
"""Static variable that indicates that the database was changed since the last time it was cached in memory"""
db_path = "../local_storage.db"
def __init__(self):
self.cached_dirs = {}
pass
@staticmethod
def init_db(script_path):
"""Creates a blank database. Overwrites the old one"""
if os.path.isfile(LocalStorage.db_path):
os.remove(LocalStorage.db_path)
conn = sqlite3.connect(LocalStorage.db_path)
c = conn.cursor()
with open(script_path, "r") as f:
c.executescript(f.read())
conn.commit()
c.close()
conn.close()
def save_directory(self, directory: Directory):
"""
Save directory to storage
:param directory: Directory to save
:return: None
"""
LocalStorage.cache_outdated = True
conn = sqlite3.connect(LocalStorage.db_path)
c = conn.cursor()
c.execute("PRAGMA FOREIGN_KEYS = ON;")
try:
c.execute("INSERT INTO Directory (path, enabled) VALUES (?, ?)", (directory.path, directory.enabled))
c.execute("SELECT last_insert_rowid()")
dir_id = c.fetchone()[0]
for opt in directory.options:
conn.execute("INSERT INTO Option (name, directory_id) VALUES (?, ?)", (opt, dir_id))
conn.commit()
except sqlite3.IntegrityError:
raise DuplicateDirectoryException("Duplicate directory path: " + directory.path)
finally:
conn.close()
def dirs(self):
if LocalStorage.cache_outdated:
self.cached_dirs = {}
conn = sqlite3.connect(LocalStorage.db_path)
c = conn.cursor()
c.execute("SELECT id, path, enabled FROM Directory")
db_directories = c.fetchall()
c.execute("SELECT name, directory_id FROM Option")
db_options = c.fetchall()
for db_dir in db_directories:
options = []
directory = Directory(db_dir[1], db_dir[2], options)
for db_opt in db_options:
if db_opt[1] == db_dir[0]:
options.append(db_opt[0])
self.cached_dirs[directory.path] = directory
LocalStorage.cache_outdated = False
return self.cached_dirs
else:
return self.cached_dirs

10
templates/layout.html Normal file
View File

@ -0,0 +1,10 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Layout Title</title>
</head>
<body>
</body>
</html>

View File

@ -1,129 +0,0 @@
from flask import Flask, render_template, send_file, request
import pysolr
import mimetypes
import requests
import json
from PIL import Image
import os
SOLR_URL = "http://localhost:8983/solr/test/"
solr = pysolr.Solr(SOLR_URL, timeout=10)
app = Flask(__name__)
class Document:
def __init__(self, doc_id, name, path, size, md5):
self.doc_id = doc_id
self.name = name
self.path = path
self.size = size
self.md5 = md5
class ImageDocument(Document):
def __init__(self, doc_id, name, path, size, md5):
super().__init__(doc_id, name, path, size, md5)
self.type = "image"
class AudioClipDocument(Document):
def __init__(self, doc_id, name, path, size, md5):
super().__init__(doc_id, name, path, size, md5)
self.type = "audio"
def get_document(id):
response = requests.get(SOLR_URL + "get?id=" + id)
return json.loads(response.text)["doc"]
def make_thumb(doc):
size = (1024, 1024)
thumb_path = "thumbnails/" + doc["id"]
if not os.path.exists(thumb_path):
file_path = doc["path"][0] + "/" + doc["name"][0]
if doc["width"][0] > size[0]:
image = Image.open(file_path)
image.thumbnail(size, Image.ANTIALIAS)
if image.mode == "RGB":
image.save(thumb_path, "JPEG")
elif image.mode == "RGBA":
image.save(thumb_path, "PNG")
else:
image = image.convert("RGB")
image.save(thumb_path, "JPEG")
else:
print("Skipping thumbnail")
os.symlink(file_path, thumb_path)
return "thumbnails/" + doc["id"]
@app.route("/search/")
def search():
query = request.args.get("query")
page = int(request.args.get("page"))
per_page = int(request.args.get("per_page"))
results = solr.search(query, None, rows=per_page, start=per_page * page)
docs = []
for r in results:
if "mime" in r:
mime_type = r["mime"][0]
else:
mime_type = ""
if mime_type.startswith("image"):
docs.append(ImageDocument(r["id"], r["name"][0], r["path"][0], r["size"], r["md5"]))
elif mime_type.startswith("audio"):
docs.append(AudioClipDocument(r["id"], r["name"][0], r["path"][0], r["size"], r["md5"]))
return render_template("search.html", docs=docs)
@app.route("/")
def index():
return render_template("index.html")
@app.route("/files/<id>/")
def files(id):
doc = get_document(id)
if doc is not None:
file_path = doc["path"][0] + "/" + doc["name"][0]
return send_file(file_path, mimetype=mimetypes.guess_type(file_path)[0])
else:
return "File not found"
@app.route("/thumbs/<doc_id>/")
def thumbs(doc_id):
doc = get_document(doc_id)
if doc is not None:
thumb_path = make_thumb(doc)
return send_file("thumbnails/" + doc_id, mimetype=mimetypes.guess_type(thumb_path)[0])
else:
return "File not found"
app.run("0.0.0.0", 8080)