Compare commits

..

24 Commits

Author SHA1 Message Date
75f99025d9 add exif dateTime, allow some special characters in text meta 2020-02-09 08:47:13 -05:00
ebe852bd5a Fix rewrite-url arg 2020-02-09 08:23:17 -05:00
402b103c49 Fix total count for ES 7.5 2020-02-08 09:25:00 -05:00
e9b6e1cdc2 Turn off auto optimisation in libtesseract build 2020-02-08 08:32:04 -05:00
ed1ce8ab5e Handle XML errors #18 2020-02-07 10:08:01 -05:00
d1fa4febc4 Improve scroll feature, UI fix 2020-02-07 10:08:01 -05:00
048c55df7b Update README.md 2020-02-06 19:56:29 -05:00
f77bc6a025 Update README.md 2020-02-06 19:55:32 -05:00
efdde2734e version bump 2020-02-06 19:28:05 -05:00
66658fa8f7 Remove trailing/leading white space in text meta fields 2020-02-06 19:27:30 -05:00
df41c251e4 (Breaking!) Add some exif tags 2020-02-06 19:21:50 -05:00
3282ab56ba Version bump 2020-02-02 09:26:54 -05:00
8300838d30 Suppress XML parsing errors (#18) 2020-02-02 09:26:03 -05:00
c9870a6d3d Remove -march=native for release build... 2020-02-02 09:03:06 -05:00
a143cc4fcf bundle openssl... 2020-02-02 08:39:20 -05:00
9ef1f3781d fix attempt for #11 2020-02-01 20:04:26 -05:00
bbee8aa721 tesseract ocr path fix 2020-02-01 20:03:59 -05:00
d22f83c797 curl fix 2020-02-01 15:22:43 -05:00
50615486a4 curl fix attempt 2020-02-01 14:42:42 -05:00
ca79e4f797 add /status endpoint 2020-01-28 10:18:37 -05:00
6a9fd08a80 Merge pull request #21 from simon987/wip-20
Fixes #20
2020-01-27 09:16:00 -05:00
cab890dc9b #20 wip 2020-01-27 09:09:42 -05:00
b3c4faf2df Update README.md 2020-01-26 12:37:13 -05:00
353937171a Update README.md 2020-01-20 15:54:53 -05:00
30 changed files with 346 additions and 145 deletions

View File

@@ -1,8 +1,6 @@
cmake_minimum_required(VERSION 3.7)
set(CMAKE_C_STANDARD 11)
option(CURL_STATIC "Link to curl statically" on)
project(sist2 C)
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/CMakeModules")
@@ -85,7 +83,7 @@ target_link_directories(
target_compile_options(sist2
PRIVATE
-Ofast
-march=native
# -march=native
-fPIC
-fno-stack-protector
-fomit-frame-pointer
@@ -114,10 +112,6 @@ TARGET_LINK_LIBRARIES(
pthread
# curl
${PROJECT_SOURCE_DIR}/lib/libcurl.a
ssl crypto
m
bz2
${PROJECT_SOURCE_DIR}/lib/libmagic.a
@@ -136,6 +130,12 @@ TARGET_LINK_LIBRARIES(
${PROJECT_SOURCE_DIR}/lib/libtiff.a
${PROJECT_SOURCE_DIR}/lib/libpng16.a
stdc++
# curl
${PROJECT_SOURCE_DIR}/lib/libcurl.a
${PROJECT_SOURCE_DIR}/lib/libcrypto.a
${PROJECT_SOURCE_DIR}/lib/libssl.a
dl
)
add_custom_target(

View File

@@ -16,4 +16,7 @@ RUN mkdir -p /usr/share/tessdata && \
ADD sist2 /root/sist2
ENV LANG C.UTF-8
ENV LC_ALL C.UTF-8
ENTRYPOINT ["/root/sist2"]

View File

@@ -5,6 +5,11 @@ strip sist2
version=$(./sist2 --version)
echo "Version ${version}"
docker build . -t simon987/sist2:${version} -t simon987/sist2:latest
docker build . -t simon987/sist2:${version} -t simon987/sist2:latest \
-t docker.pkg.github.com/simon987/sist2/sist2:latest -t docker.pkg.github.com/simon987/sist2/sist2:${version}
docker push simon987/sist2:${version}
docker push simon987/sist2:latest
docker push docker.pkg.github.com/simon987/sist2/sist2:latest
docker push docker.pkg.github.com/simon987/sist2/sist2:${version}
docker run --rm -it simon987/sist2 -v

View File

@@ -29,7 +29,7 @@ sist2 (Simple incremental search tool)
1. Have an [Elasticsearch](https://www.elastic.co/downloads/elasticsearch) instance running
1.
1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases) *
1. *(or)* Download an [development snapshot](https://files.simon987.net/artifacts/Sist2/Build/) *(Not recommended!)*
1. *(or)* Download a [development snapshot](https://files.simon987.net/artifacts/Sist2/Build/) *(Not recommended!)*
1. *(or)* `docker pull simon987/sist2:latest`
@@ -39,6 +39,9 @@ sist2 (Simple incremental search tool)
## Example usage
![demo](demo.gif)
See help page `sist2 --help` for more details.
**Scan a directory**
@@ -92,7 +95,7 @@ File type | Library | Content | Thumbnail | Metadata
pdf,xps,cbz,fb2,epub | MuPDF | text+ocr | yes, `png` | title |
`audio/*` | ffmpeg | - | yes, `jpeg` | ID3 tags |
`video/*` | ffmpeg | - | yes, `jpeg` | title, comment, artist |
`image/*` | ffmpeg | - | yes, `jpeg` | `EXIF:Artist`, `EXIF:ImageDescription` |
`image/*` | ffmpeg | - | yes, `jpeg` | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190) |
ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
`text/plain` | *(none)* | yes | no | - |
tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no |

Binary file not shown.

View File

@@ -1,5 +1,9 @@
{
"properties": {
"_tie": {
"type": "keyword",
"doc_values": true
},
"path": {
"type": "text",
"analyzer": "path_analyzer",
@@ -7,7 +11,7 @@
},
"suggest-path": {
"type": "completion",
"analyzer": "keyword"
"analyzer": "case_insensitive_kw_analyzer"
},
"mime": {
"type": "keyword"
@@ -105,6 +109,30 @@
},
"tag": {
"type": "keyword"
},
"exif_make": {
"type": "text"
},
"exif_model": {
"type": "text"
},
"exif:software": {
"type": "text"
},
"exif_exposure_time": {
"type": "keyword"
},
"exif_fnumber": {
"type": "keyword"
},
"exif_iso_speed_ratings": {
"type": "keyword"
},
"exif_focal_length": {
"type": "keyword"
},
"exif_user_comment": {
"type": "text"
}
}
}

10
schema/pipeline.json Normal file
View File

@@ -0,0 +1,10 @@
{
"description": "Copy _id to _tie",
"processors": [
{
"script": {
"source": "ctx._tie = ctx._id;"
}
}
]
}

View File

@@ -21,6 +21,12 @@
"lowercase"
]
},
"case_insensitive_kw_analyzer": {
"tokenizer": "keyword",
"filter": [
"lowercase"
]
},
"my_nGram": {
"tokenizer": "my_nGram_tokenizer",
"filter": [

View File

@@ -75,7 +75,7 @@ cd tesseract
mkdir build
cd build
cmake -DSTATIC=on -DBUILD_TRAINING_TOOLS=off -DBUILD_TESTS=off -DCMAKE_BUILD_TYPE=Release \
-DCMAKE_CXX_FLAGS="-fPIC" ..
-DCMAKE_CXX_FLAGS="-fPIC" -DAUTO_OPTIMIZE=off ..
make -j $THREADS
cd ../..
mv tesseract/build/libtesseract.a .
@@ -106,12 +106,23 @@ make -j $THREADS
cd ..
mv libpng/.libs/libpng16.a .
# openssl...
git clone --depth 1 -b OpenSSL_1_1_0-stable https://github.com/openssl/openssl
cd openssl
./config --prefix=$(pwd)/../ssl
make depend
make -j $THREADS
make install
cd ..
mv ./openssl/libcrypto.a ./openssl/libssl.a .
# curl
wget -nc https://curl.haxx.se/download/curl-7.68.0.tar.gz
tar -xzf curl-7.68.0.tar.gz
cd curl-7.68.0
./configure --disable-ldap --disable-ldaps --without-librtmp --disable-rtsp --disable-crypto-auth \
--disable-smtp --enable-static --disable-shared
--disable-smtp --without-libidn2 --without-nghttp2 --without-brotli --enable-static --disable-shared \
--without-libpsl --with-ssl=$(pwd)/../ssl
make -j $THREADS
cd ..
mv curl-7.68.0/lib/.libs/libcurl.a .

View File

@@ -1,6 +1,9 @@
import json
files = [
"schema/mappings.json",
"schema/settings.json",
"schema/pipeline.json",
]
@@ -9,6 +12,6 @@ def clean(filepath):
for file in files:
with open(file, "rb") as f:
data = f.read()
with open(file, "r") as f:
data = json.dumps(json.load(f), separators=(",", ":")).encode()
print("char %s[%d] = {%s};" % (clean(file), len(data), ",".join(str(int(b)) for b in data)))

View File

@@ -15,6 +15,13 @@
#define DEFAULT_BIND_ADDR "localhost"
#define DEFAULT_PORT "4090"
const char* TESS_DATAPATHS[] = {
"/usr/share/tessdata/",
"/usr/share/tesseract-ocr/tessdata/",
"./",
NULL
};
scan_args_t *scan_args_create() {
scan_args_t *args = calloc(sizeof(scan_args_t), 1);
@@ -136,13 +143,23 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
if (args->tesseract_lang != NULL) {
TessBaseAPI *api = TessBaseAPICreate();
ret = TessBaseAPIInit3(api, TESS_DATAPATH, args->tesseract_lang);
char filename[128];
sprintf(filename, "%s.traineddata", args->tesseract_lang);
const char * path = find_file_in_paths(TESS_DATAPATHS, filename);
if (path == NULL) {
LOG_FATAL("cli.c", "Could not find tesseract language file!");
}
ret = TessBaseAPIInit3(api, path, args->tesseract_lang);
if (ret != 0) {
fprintf(stderr, "Could not initialize tesseract with lang '%s'\n", args->tesseract_lang);
return 1;
}
TessBaseAPIEnd(api);
TessBaseAPIDelete(api);
args->tesseract_path = path;
}
LOG_DEBUGF("cli.c", "arg quality=%f", args->quality)
@@ -156,7 +173,8 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
LOG_DEBUGF("cli.c", "arg depth=%d", args->depth)
LOG_DEBUGF("cli.c", "arg path=%s", args->path)
LOG_DEBUGF("cli.c", "arg archive=%s", args->archive)
LOG_DEBUGF("cli.c", "arg ocr=%s", args->tesseract_lang)
LOG_DEBUGF("cli.c", "arg tesseract_lang=%s", args->tesseract_lang)
LOG_DEBUGF("cli.c", "arg tesseract_path=%s", args->tesseract_path)
return 0;
}

View File

@@ -17,6 +17,7 @@ typedef struct scan_args {
char *archive;
archive_mode_t archive_mode;
char *tesseract_lang;
const char *tesseract_path;
} scan_args_t;
scan_args_t *scan_args_create();

View File

@@ -28,6 +28,7 @@ struct {
pthread_mutex_t mupdf_mu;
char * tesseract_lang;
char * tesseract_path;
} ScanCtx;
struct {

View File

@@ -129,7 +129,7 @@ void elastic_flush() {
Indexer->queued = 0;
char bulk_url[4096];
snprintf(bulk_url, 4096, "%s/sist2/_bulk", Indexer->es_url);
snprintf(bulk_url, 4096, "%s/sist2/_bulk?pipeline=tie", Indexer->es_url);
response_t *r = web_post(bulk_url, buf, "Content-Type: application/x-ndjson");
if (r->status_code == 0) {
@@ -245,6 +245,11 @@ void elastic_init(int force_reset) {
LOG_INFOF("elastic.c", "Close index <%d>", r->status_code);
free_response(r);
snprintf(url, 4096, "%s/_ingest/pipeline/tie", IndexCtx.es_url);
r = web_put(url, pipeline_json, "Content-Type: application/json");
LOG_INFOF("elastic.c", "Create pipeline <%d>", r->status_code);
free_response(r);
snprintf(url, 4096, "%s/sist2/_settings", IndexCtx.es_url);
r = web_put(url, settings_json, "Content-Type: application/json");
LOG_INFOF("elastic.c", "Update settings <%d>", r->status_code);
@@ -274,3 +279,28 @@ cJSON *elastic_get_document(const char *uuid_str) {
free_response(r);
return json;
}
char *elastic_get_status() {
char url[4096];
snprintf(url, 4096,
"%s/_cluster/state/metadata/sist2?filter_path=metadata.indices.*.state", WebCtx.es_url);
response_t *r = web_get(url);
cJSON *json = NULL;
char *status = malloc(128 * sizeof(char));
status[0] = '\0';
if (r->status_code == 200) {
json = cJSON_Parse(r->body);
const cJSON *metadata = cJSON_GetObjectItem(json, "metadata");
if (metadata != NULL) {
const cJSON *indices = cJSON_GetObjectItem(metadata, "indices");
const cJSON *sist2 = cJSON_GetObjectItem(indices, "sist2");
const cJSON *state = cJSON_GetObjectItem(sist2, "state");
strcpy(status, state->valuestring);
}
}
free_response(r);
cJSON_Delete(json);
return status;
}

View File

@@ -30,4 +30,6 @@ void elastic_init(int force_reset);
cJSON *elastic_get_document(const char *uuid_str);
char *elastic_get_status();
#endif

File diff suppressed because one or more lines are too long

View File

@@ -118,6 +118,24 @@ char *get_meta_key_text(enum metakey meta_key) {
return "font_name";
case MetaParent:
return "parent";
case MetaExifMake:
return "exif_make";
case MetaExifSoftware:
return "exif_software";
case MetaExifExposureTime:
return "exif_exposure_time";
case MetaExifFNumber:
return "exif_fnumber";
case MetaExifFocalLength:
return "exif_focal_length";
case MetaExifUserComment:
return "exif_user_comment";
case MetaExifIsoSpeedRatings:
return "exif_iso_speed_ratings";
case MetaExifModel:
return "exif_model";
case MetaExifDateTime:
return "exif_datetime";
default:
return NULL;
}
@@ -255,6 +273,15 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
case MetaGenre:
case MetaFontName:
case MetaParent:
case MetaExifMake:
case MetaExifSoftware:
case MetaExifExposureTime:
case MetaExifFNumber:
case MetaExifFocalLength:
case MetaExifUserComment:
case MetaExifIsoSpeedRatings:
case MetaExifDateTime:
case MetaExifModel:
case MetaTitle: {
buf.cur = 0;
while ((c = getc(file)) != 0) {

View File

@@ -6,7 +6,7 @@
#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
static const char *const Version = "1.2.3";
static const char *const Version = "1.2.9";
static const char *const usage[] = {
"sist2 scan [OPTION]... PATH",
"sist2 index [OPTION]... INDEX",
@@ -49,8 +49,10 @@ void sist2_scan(scan_args_t *args) {
strncpy(ScanCtx.index.path, args->output, sizeof(ScanCtx.index.path));
strncpy(ScanCtx.index.desc.name, args->name, sizeof(ScanCtx.index.desc.name));
strncpy(ScanCtx.index.desc.root, args->path, sizeof(ScanCtx.index.desc.root));
strncpy(ScanCtx.index.desc.rewrite_url, args->rewrite_url, sizeof(ScanCtx.index.desc.rewrite_url));
ScanCtx.index.desc.root_len = (short) strlen(ScanCtx.index.desc.root);
ScanCtx.tesseract_lang = args->tesseract_lang;
ScanCtx.tesseract_path = args->tesseract_path;
init_dir(ScanCtx.index.path);

View File

@@ -1,10 +1,20 @@
#include "doc.h"
#include "src/ctx.h"
void dump_text(mceTextReader_t *reader, dyn_buffer_t *buf) {
int dump_text(mceTextReader_t *reader, dyn_buffer_t *buf) {
mce_skip_attributes(reader);
xmlErrorPtr err = xmlGetLastError();
if (err != NULL) {
if (err->level == XML_ERR_FATAL) {
LOG_ERRORF("doc.c", "Got fatal XML error while parsing document: %s", err->message)
return -1;
} else {
LOG_ERRORF("doc.c", "Got recoverable XML error while parsing document: %s", err->message)
}
}
mce_start_children(reader) {
mce_start_element(reader, NULL, _X("t")) {
mce_skip_attributes(reader);
@@ -18,10 +28,14 @@ void dump_text(mceTextReader_t *reader, dyn_buffer_t *buf) {
} mce_end_element(reader);
mce_start_element(reader, NULL, NULL) {
dump_text(reader, buf);
int ret = dump_text(reader, buf);
if (ret != 0) {
return ret;
}
} mce_end_element(reader);
} mce_end_children(reader)
return 0;
}
__always_inline
@@ -52,23 +66,28 @@ int should_read_part(opcPart part) {
}
__always_inline
void read_part(opcContainer *c, dyn_buffer_t *buf, opcPart part, document_t *doc) {
int read_part(opcContainer *c, dyn_buffer_t *buf, opcPart part, document_t *doc) {
mceTextReader_t reader;
int ret = opcXmlReaderOpen(c, &reader, part, NULL, "UTF-8", 0);
int ret = opcXmlReaderOpen(c, &reader, part, NULL, "UTF-8", XML_PARSE_NOWARNING | XML_PARSE_NOERROR | XML_PARSE_NONET);
if (ret != OPC_ERROR_NONE) {
LOG_ERRORF(doc->filepath, "(doc.c) opcXmlReaderOpen() returned error code %d", ret);
return;
return -1;
}
mce_start_document(&reader) {
mce_start_element(&reader, NULL, NULL) {
dump_text(&reader, buf);
ret = dump_text(&reader, buf);
if (ret != 0) {
mceTextReaderCleanup(&reader);
return -1;
}
} mce_end_element(&reader);
} mce_end_document(&reader);
mceTextReaderCleanup(&reader);
return 0;
}
void parse_doc(void *mem, size_t mem_len, document_t *doc) {
@@ -88,7 +107,10 @@ void parse_doc(void *mem, size_t mem_len, document_t *doc) {
opcPart part = opcPartGetFirst(c);
do {
if (should_read_part(part)) {
read_part(c, &buf, part, doc);
int ret = read_part(c, &buf, part, doc);
if (ret != 0) {
break;
}
}
} while ((part = opcPartGetNext(c, part)));

View File

@@ -193,6 +193,24 @@ append_video_meta(AVFormatContext *pFormatCtx, AVFrame *frame, document_t *doc,
APPEND_TAG_META(doc, tag, MetaArtist)
} else if (strcmp(tag->key, "ImageDescription") == 0) {
APPEND_TAG_META(doc, tag, MetaContent)
} else if (strcmp(tag->key, "Make") == 0) {
APPEND_TAG_META(doc, tag, MetaExifMake)
} else if (strcmp(tag->key, "Model") == 0) {
APPEND_TAG_META(doc, tag, MetaExifModel)
} else if (strcmp(tag->key, "Software") == 0) {
APPEND_TAG_META(doc, tag, MetaExifSoftware)
} else if (strcmp(tag->key, "FNumber") == 0) {
APPEND_TAG_META(doc, tag, MetaExifFNumber)
} else if (strcmp(tag->key, "FocalLength") == 0) {
APPEND_TAG_META(doc, tag, MetaExifFocalLength)
} else if (strcmp(tag->key, "UserComment") == 0) {
APPEND_TAG_META(doc, tag, MetaExifUserComment)
} else if (strcmp(tag->key, "ISOSpeedRatings") == 0) {
APPEND_TAG_META(doc, tag, MetaExifIsoSpeedRatings)
} else if (strcmp(tag->key, "ExposureTime") == 0) {
APPEND_TAG_META(doc, tag, MetaExifExposureTime)
} else if (strcmp(tag->key, "DateTime") == 0) {
APPEND_TAG_META(doc, tag, MetaExifDateTime)
}
}
}

View File

@@ -141,7 +141,7 @@ void fill_image(fz_context *ctx, UNUSED(fz_device *dev),
if (pix->h > MIN_OCR_SIZE && img->h > MIN_OCR_SIZE && img->xres != 0) {
TessBaseAPI *api = TessBaseAPICreate();
TessBaseAPIInit3(api, TESS_DATAPATH, ScanCtx.tesseract_lang);
TessBaseAPIInit3(api, ScanCtx.tesseract_path, ScanCtx.tesseract_lang);
TessBaseAPISetImage(api, pix->samples, pix->w, pix->h, pix->n, pix->stride);
TessBaseAPISetSourceResolution(api, pix->xres);
@@ -157,8 +157,8 @@ void fill_image(fz_context *ctx, UNUSED(fz_device *dev),
TessBaseAPIEnd(api);
TessBaseAPIDelete(api);
fz_drop_pixmap(ctx, pix);
}
fz_drop_pixmap(ctx, pix);
}
}

View File

@@ -3,7 +3,6 @@
#define UUID_STR_LEN 37
#define UNUSED(x) __attribute__((__unused__)) x
#define TESS_DATAPATH "/usr/share/tessdata/"
#include <glib-2.0/glib.h>
#include <unistd.h>

View File

@@ -2,9 +2,9 @@
#define SIST2_TYPES_H
#define META_INT_MASK 0xF0
#define META_STR_MASK 0xE0
#define META_LONG_MASK 0xD0
#define META_INT_MASK 0x80
#define META_STR_MASK 0x40
#define META_LONG_MASK 0x20
#define IS_META_INT(key) (key & META_INT_MASK) == META_INT_MASK
#define IS_META_LONG(key) (key & META_LONG_MASK) == META_LONG_MASK
#define IS_META_STR(meta) (meta->key & META_STR_MASK) == META_STR_MASK
@@ -31,6 +31,16 @@ enum metakey {
MetaTitle = 12 | META_STR_MASK,
MetaFontName = 13 | META_STR_MASK,
MetaParent = 14 | META_STR_MASK,
MetaExifMake = 15 | META_STR_MASK,
MetaExifSoftware = 16 | META_STR_MASK,
MetaExifExposureTime = 17 | META_STR_MASK,
MetaExifFNumber = 18 | META_STR_MASK,
MetaExifFocalLength = 19 | META_STR_MASK,
MetaExifUserComment = 20 | META_STR_MASK,
MetaExifModel = 21 | META_STR_MASK,
MetaExifIsoSpeedRatings = 22 | META_STR_MASK,
MetaExifDateTime = 23 | META_STR_MASK,
//Note to self: this will break after 31 entries
};
#define INDEX_TYPE_BIN "binary"

View File

@@ -1,4 +1,5 @@
#include "util.h"
#include "src/ctx.h"
dyn_buffer_t dyn_buffer_create() {
dyn_buffer_t buf;
@@ -90,7 +91,11 @@ text_buffer_t text_buffer_create(int max_size) {
}
void text_buffer_terminate_string(text_buffer_t *buf) {
dyn_buffer_write_char(&buf->dyn_buffer, '\0');
if (*(buf->dyn_buffer.buf + buf->dyn_buffer.cur - 1) == ' ') {
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur - 1) = '\0';
} else {
dyn_buffer_write_char(&buf->dyn_buffer, '\0');
}
}
__always_inline
@@ -171,8 +176,8 @@ int text_buffer_append_string0(text_buffer_t *buf, char *str) {
int text_buffer_append_char(text_buffer_t *buf, int c) {
if (SHOULD_IGNORE_CHAR(c)) {
if (!buf->last_char_was_whitespace) {
if (SHOULD_IGNORE_CHAR(c) || c == ' ') {
if (!buf->last_char_was_whitespace && buf->dyn_buffer.cur != 0) {
dyn_buffer_write_char(&buf->dyn_buffer, ' ');
buf->last_char_was_whitespace = TRUE;
@@ -317,4 +322,29 @@ GHashTable *incremental_get_table() {
return file_table;
}
const char *find_file_in_paths(const char *paths[], const char *filename) {
for (int i = 0; paths[i] != NULL; i++) {
char *apath = abspath(paths[i]);
if (apath == NULL) {
continue;
}
char path[PATH_MAX];
snprintf(path, sizeof(path), "%s%s", apath, filename);
LOG_DEBUGF("util.c", "Looking for '%s' in folder '%s'", filename, apath)
free(apath);
struct stat info;
int ret = stat(path, &info);
if (ret != -1) {
return paths[i];
}
}
return NULL;
}

View File

@@ -7,7 +7,7 @@
#define INITIAL_BUF_SIZE 1024 * 16
#define SHOULD_IGNORE_CHAR(c) !(SHOULD_KEEP_CHAR(c))
#define SHOULD_KEEP_CHAR(c) ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'z') || (c > 127))
#define SHOULD_KEEP_CHAR(c) ((c >= '\'' && c <= ';') || (c >= 'A' && c <= 'z') || (c > 127))
typedef struct dyn_buffer {
@@ -74,5 +74,6 @@ int incremental_get(GHashTable *table, unsigned long inode_no);
int incremental_mark_file_for_copy(GHashTable *table, unsigned long inode_no);
const char *find_file_in_paths(const char **paths, const char *filename);
#endif

View File

@@ -221,14 +221,6 @@ int search(UNUSED(void *p), onion_request *req, onion_response *res) {
return OCS_NOT_PROCESSED;
}
char *scroll_param;
const char *scroll = onion_request_get_query(req, "scroll");
if (scroll != NULL) {
scroll_param = "?scroll=3m";
} else {
scroll_param = "";
}
const struct onion_block_t *block = onion_request_get_data(req);
if (block == NULL) {
@@ -236,7 +228,7 @@ int search(UNUSED(void *p), onion_request *req, onion_response *res) {
}
char url[4096];
snprintf(url, 4096, "%s/sist2/_search%s", WebCtx.es_url, scroll_param);
snprintf(url, 4096, "%s/sist2/_search", WebCtx.es_url);
response_t *r = web_post(url, onion_block_data(block), "Content-Type: application/json");
set_default_headers(res);
@@ -254,43 +246,6 @@ int search(UNUSED(void *p), onion_request *req, onion_response *res) {
return OCS_PROCESSED;
}
int scroll(UNUSED(void *p), onion_request *req, onion_response *res) {
int flags = onion_request_get_flags(req);
if ((flags & OR_METHODS) != OR_GET) {
return OCS_NOT_PROCESSED;
}
char url[4096];
snprintf(url, 4096, "%s/_search/scroll", WebCtx.es_url);
const char *scroll_id = onion_request_get_query(req, "scroll_id");
cJSON *json = cJSON_CreateObject();
cJSON_AddStringToObject(json, "scroll_id", scroll_id);
cJSON_AddStringToObject(json, "scroll", "3m");
char *json_str = cJSON_PrintUnformatted(json);
response_t *r = web_post(url, json_str, "Content-Type: application/json");
cJSON_Delete(json);
cJSON_free(json_str);
if (r->status_code != 200) {
free_response(r);
return OCS_NOT_PROCESSED;
}
set_default_headers(res);
onion_response_set_header(res, "Content-Type", "application/json");
onion_response_set_header(res, "Content-Disposition", "application/json");
onion_response_set_length(res, r->size);
onion_response_write(res, r->body, r->size);
free_response(r);
return OCS_PROCESSED;
}
int serve_file_from_url(cJSON *json, index_t *idx, onion_request *req, onion_response *res) {
const char *path = cJSON_GetObjectItem(json, "path")->valuestring;
@@ -431,6 +386,23 @@ int file(UNUSED(void *p), onion_request *req, onion_response *res) {
return ret;
}
int status(UNUSED(void *p), UNUSED(onion_request *req), onion_response *res) {
set_default_headers(res);
onion_response_set_header(res, "Content-Type", "application/x-empty");
char *status = elastic_get_status();
if (strcmp(status, "open") == 0) {
onion_response_set_code(res, 204);
} else {
onion_response_set_code(res, 500);
}
free(status);
return OCS_PROCESSED;
}
void serve(const char *hostname, const char *port) {
onion *o = onion_new(O_POOL);
onion_set_timeout(o, 3500);
@@ -449,7 +421,7 @@ void serve(const char *hostname, const char *port) {
onion_url_add(urls, "img/sprite-skin-flat.png", img_sprite_skin_flag);
onion_url_add(urls, "es", search);
onion_url_add(urls, "scroll", scroll);
onion_url_add(urls, "status", status);
onion_url_add(
urls,
"^t/([a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12})/"

File diff suppressed because one or more lines are too long

View File

@@ -113,7 +113,7 @@ function getTags(hit, mimeCategory) {
switch (mimeCategory) {
case "video":
case "image":
if (hit["_source"].hasOwnProperty("videoc")) {
if (hit["_source"].hasOwnProperty("videoc") && hit["_source"]["videoc"]) {
const formatTag = document.createElement("span");
formatTag.setAttribute("class", "badge badge-pill badge-video");
formatTag.appendChild(document.createTextNode(hit["_source"]["videoc"].replace(" ", "")));
@@ -121,7 +121,7 @@ function getTags(hit, mimeCategory) {
}
break;
case "audio": {
if (hit["_source"].hasOwnProperty("audioc")) {
if (hit["_source"].hasOwnProperty("audioc") && hit["_source"]["audioc"]) {
let formatTag = document.createElement("span");
formatTag.setAttribute("class", "badge badge-pill badge-audio");
formatTag.appendChild(document.createTextNode(hit["_source"]["audioc"]));
@@ -499,8 +499,7 @@ function makePreloader() {
function makePageIndicator(searchResult) {
let pageIndicator = document.createElement("div");
pageIndicator.setAttribute("class", "page-indicator font-weight-light");
const totalHits = searchResult["hits"]["total"].hasOwnProperty("value")
? searchResult["hits"]["total"]["value"] : searchResult["hits"]["total"];
const totalHits = searchResult["aggregations"]["total_count"]["value"];
pageIndicator.appendChild(document.createTextNode(docCount + " / " + totalHits));
return pageIndicator;
}
@@ -547,8 +546,7 @@ function makeStatsCard(searchResult) {
});
let stat = document.createElement("span");
const totalHits = searchResult["hits"]["total"].hasOwnProperty("value")
? searchResult["hits"]["total"]["value"] : searchResult["hits"]["total"];
const totalHits = searchResult["aggregations"]["total_count"]["value"];
stat.appendChild(document.createTextNode(totalHits + " results in " + searchResult["took"] + "ms"));
statsCardBody.appendChild(stat);

View File

@@ -6,7 +6,8 @@ let tagTree;
let searchBar = document.getElementById("searchBar");
let pathBar = document.getElementById("pathBar");
let scroll_id = null;
let lastDoc = null;
let reachedEnd = false;
let docCount = 0;
let coolingDown = false;
let searchBusy = true;
@@ -259,41 +260,18 @@ function insertHits(resultContainer, hits) {
}
window.addEventListener("scroll", function () {
if (!coolingDown && !searchBusy) {
if (!searchBusy) {
let threshold = 400;
if ((window.innerHeight + window.scrollY) >= document.body.offsetHeight - threshold) {
coolingDown = true;
doScroll();
if (!reachedEnd) {
coolingDown = true;
search(lastDoc);
}
}
}
});
function doScroll() {
$.get("scroll", {scroll_id: scroll_id})
.then(searchResult => {
let searchResults = document.getElementById("searchResults");
let hits = searchResult["hits"]["hits"];
//Page indicator
let pageIndicator = makePageIndicator(searchResult);
searchResults.appendChild(pageIndicator);
//Result container
let resultContainer = makeResultContainer();
searchResults.appendChild(resultContainer);
insertHits(resultContainer, hits);
if (hits.length === SIZE) {
coolingDown = false;
}
})
.fail(() => {
window.location.reload();
})
}
function getSelectedNodes(tree) {
let selectedNodes = [];
@@ -314,21 +292,25 @@ function getSelectedNodes(tree) {
return selectedNodes
}
function search() {
function search(after = null) {
lastDoc = null;
if (searchBusy) {
return;
}
searchBusy = true;
//Clear old search results
let searchResults = document.getElementById("searchResults");
while (searchResults.firstChild) {
searchResults.removeChild(searchResults.firstChild);
//Clear old search results
let preload;
if (!after) {
while (searchResults.firstChild) {
searchResults.removeChild(searchResults.firstChild);
}
preload = makePreloader();
searchResults.appendChild(preload);
}
const preload = makePreloader();
searchResults.appendChild(preload);
let query = searchBar.value;
let empty = query === "";
let condition = empty ? "should" : "must";
@@ -362,9 +344,9 @@ function search() {
filters.push([{terms: {"tag": tags}}]);
}
$.jsonPost("es?scroll=1", {
let q = {
"_source": {
excludes: ["content"]
excludes: ["content", "_tie"]
},
query: {
bool: {
@@ -379,8 +361,9 @@ function search() {
filter: filters
}
},
sort: [
"_score"
"sort": [
{"_score": {"order": "desc"}},
{"_tie": {"order": "asc"}}
],
highlight: {
pre_tags: ["<mark>"],
@@ -393,24 +376,41 @@ function search() {
font_name: {},
}
},
aggs: {
total_size: {"sum": {"field": "size"}}
},
aggs:
{
total_size: {"sum": {"field": "size"}},
total_count: {"value_count": {"field": "size"}}
},
size: SIZE,
}).then(searchResult => {
scroll_id = searchResult["_scroll_id"];
};
preload.remove();
//Search stats
searchResults.appendChild(makeStatsCard(searchResult));
if (after) {
q.search_after = [after["_score"], after["_id"]];
}
$.jsonPost("es", q).then(searchResult => {
let hits = searchResult["hits"]["hits"];
if (hits) {
lastDoc = hits[hits.length - 1];
}
if (!after) {
preload.remove();
searchResults.appendChild(makeStatsCard(searchResult));
} else {
let pageIndicator = makePageIndicator(searchResult);
searchResults.appendChild(pageIndicator);
}
//Setup page
let resultContainer = makeResultContainer();
searchResults.appendChild(resultContainer);
docCount = 0;
insertHits(resultContainer, searchResult["hits"]["hits"]);
if (!after) {
docCount = 0;
}
reachedEnd = hits.length !== SIZE;
insertHits(resultContainer, hits);
searchBusy = false;
});
}

View File

@@ -11,7 +11,7 @@
<nav class="navbar navbar-expand-lg">
<a class="navbar-brand" href="/">sist2</a>
<span class="badge badge-pill version">v1.2.3</span>
<span class="badge badge-pill version">v1.2.9</span>
<span class="tagline">Lightning-fast file system indexer and search tool </span>
<a style="margin-left: auto" id="theme" class="btn" title="Toggle theme" href="/">Theme</a>
</nav>