Compare commits

..

5 Commits

Author SHA1 Message Date
e72fa1587b EXIF metadata for images 2019-11-09 15:18:44 -05:00
ea4fb7fa0d Bug fixes 2019-11-09 12:00:07 -05:00
b0a868bb73 remove 'must match' 2019-11-08 21:46:54 -05:00
d761a3b595 update readme 2019-11-08 19:42:36 -05:00
2d7a8a2fdc fuzzy toggle 2019-11-08 16:15:10 -05:00
10 changed files with 81 additions and 52 deletions

View File

@@ -56,10 +56,10 @@ sist2 web --bind 0.0.0.0 --port 4321 ./my_idx1 ./my_idx2 ./my_idx3
File type | Library | Content | Thumbnail | Metadata File type | Library | Content | Thumbnail | Metadata
:---|:---|:---|:---|:--- :---|:---|:---|:---|:---
pdf,xps,cbz,cbr,fb2,epub | MuPDF | yes | yes, `png` | title | pdf,xps,cbz,fb2,epub | MuPDF | yes | yes, `png` | title |
`audio/*` | ffmpeg | - | yes, `jpeg` | ID3 tags | `audio/*` | ffmpeg | - | yes, `jpeg` | ID3 tags |
`video/*` | ffmpeg | - | yes, `jpeg` | title, comment | `video/*` | ffmpeg | - | yes, `jpeg` | title, comment, artist |
`image/*` | ffmpeg | - | yes, `jpeg` | *planned* | `image/*` | ffmpeg | - | yes, `jpeg` | `EXIF:Artist`, `EXIF:ImageDescription` |
ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style | ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
`text/plain` | *(none)* | yes | no | - | `text/plain` | *(none)* | yes | no | - |
docx, xlsx, pptx | | *planned* | no | *planned* | docx, xlsx, pptx | | *planned* | no | *planned* |
@@ -79,7 +79,7 @@ binaries.
apt install git cmake pkg-config libglib2.0-dev\ apt install git cmake pkg-config libglib2.0-dev\
libssl-dev uuid-dev libavformat-dev libswscale-dev \ libssl-dev uuid-dev libavformat-dev libswscale-dev \
python3 libmagic-dev libfreetype6-dev libcurl-dev \ python3 libmagic-dev libfreetype6-dev libcurl-dev \
libbz2-dev yasm libharfbuzz-dev libbz2-dev yasm libharfbuzz-dev ragel
``` ```
*(FreeBSD)* *(FreeBSD)*
```bash ```bash

View File

@@ -10,7 +10,7 @@
#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0" #define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
static const char *const Version = "1.1.1"; static const char *const Version = "1.1.3";
static const char *const usage[] = { static const char *const usage[] = {
"sist2 scan [OPTION]... PATH", "sist2 scan [OPTION]... PATH",
"sist2 index [OPTION]... INDEX", "sist2 index [OPTION]... INDEX",

View File

@@ -142,6 +142,9 @@ void parse_font(const char *buf, size_t buf_len, document_t *doc) {
if (library == NULL) { if (library == NULL) {
FT_Init_FreeType(&library); FT_Init_FreeType(&library);
} }
if (buf == NULL) {
return;
}
FT_Face face; FT_Face face;
FT_Error err = FT_New_Memory_Face(library, (unsigned char *) buf, buf_len, 0, &face); FT_Error err = FT_New_Memory_Face(library, (unsigned char *) buf, buf_len, 0, &face);

View File

@@ -116,9 +116,9 @@ AVFrame *read_frame(AVFormatContext *pFormatCtx, AVCodecContext *decoder, int st
return frame; return frame;
} }
#define APPEND_TAG_META(doc, tag, keyname) \ #define APPEND_TAG_META(doc, tag_, keyname) \
text_buffer_t tex = text_buffer_create(-1); \ text_buffer_t tex = text_buffer_create(-1); \
text_buffer_append_string0(&tex, tag->value); \ text_buffer_append_string0(&tex, tag_->value); \
meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur); \ meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur); \
meta_tag->key = keyname; \ meta_tag->key = keyname; \
strcpy(meta_tag->strval, tex.dyn_buffer.buf); \ strcpy(meta_tag->strval, tex.dyn_buffer.buf); \
@@ -151,30 +151,39 @@ void append_audio_meta(AVFormatContext *pFormatCtx, document_t *doc) {
} }
__always_inline __always_inline
void append_video_meta(AVFormatContext *pFormatCtx, document_t *doc, int include_audio_tags) { void append_video_meta(AVFormatContext *pFormatCtx, AVFrame *frame, document_t *doc, int include_audio_tags, int is_video) {
meta_line_t *meta_duration = malloc(sizeof(meta_line_t)); if (is_video) {
meta_duration->key = MetaMediaDuration; meta_line_t *meta_duration = malloc(sizeof(meta_line_t));
meta_duration->longval = pFormatCtx->duration / AV_TIME_BASE; meta_duration->key = MetaMediaDuration;
APPEND_META(doc, meta_duration) meta_duration->longval = pFormatCtx->duration / AV_TIME_BASE;
APPEND_META(doc, meta_duration)
meta_line_t *meta_bitrate = malloc(sizeof(meta_line_t)); meta_line_t *meta_bitrate = malloc(sizeof(meta_line_t));
meta_bitrate->key = MetaMediaBitrate; meta_bitrate->key = MetaMediaBitrate;
meta_bitrate->longval = pFormatCtx->bit_rate; meta_bitrate->longval = pFormatCtx->bit_rate;
APPEND_META(doc, meta_bitrate) APPEND_META(doc, meta_bitrate)
}
AVDictionaryEntry *tag = NULL; AVDictionaryEntry *tag = NULL;
while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) { if (is_video) {
char key[32]; while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
strncpy(key, tag->key, sizeof(key)); if (include_audio_tags && strcmp(tag->key, "title") == 0) {
APPEND_TAG_META(doc, tag, MetaTitle)
char *ptr = key; } else if (strcmp(tag->key, "comment") == 0) {
for (; *ptr; ++ptr) *ptr = (char) tolower(*ptr); APPEND_TAG_META(doc, tag, MetaContent)
} else if (include_audio_tags && strcmp(tag->key, "artist") == 0) {
if (strcmp(key, "title") == 0 && include_audio_tags) { APPEND_TAG_META(doc, tag, MetaArtist)
APPEND_TAG_META(doc, tag, MetaTitle) }
} else if (strcmp(key, "comment") == 0) { }
APPEND_TAG_META(doc, tag, MetaContent) } else {
// EXIF metadata
while ((tag = av_dict_get(frame->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
if (include_audio_tags && strcmp(tag->key, "Artist") == 0) {
APPEND_TAG_META(doc, tag, MetaArtist)
} else if (strcmp(tag->key, "ImageDescription") == 0) {
APPEND_TAG_META(doc, tag, MetaContent)
}
} }
} }
} }
@@ -236,11 +245,6 @@ void parse_media(const char *filepath, document_t *doc) {
if (video_stream != -1) { if (video_stream != -1) {
AVStream *stream = pFormatCtx->streams[video_stream]; AVStream *stream = pFormatCtx->streams[video_stream];
if (stream->nb_frames > 1) {
//This is a video (not a still image)
append_video_meta(pFormatCtx, doc, audio_stream == -1);
}
if (stream->codecpar->width <= MIN_SIZE || stream->codecpar->height <= MIN_SIZE) { if (stream->codecpar->width <= MIN_SIZE || stream->codecpar->height <= MIN_SIZE) {
avformat_close_input(&pFormatCtx); avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx); avformat_free_context(pFormatCtx);
@@ -273,6 +277,8 @@ void parse_media(const char *filepath, document_t *doc) {
return; return;
} }
append_video_meta(pFormatCtx, frame, doc, audio_stream == -1, stream->nb_frames > 1);
// Scale frame // Scale frame
AVFrame *scaled_frame = scale_frame(decoder, frame, ScanCtx.tn_size); AVFrame *scaled_frame = scale_frame(decoder, frame, ScanCtx.tn_size);

View File

@@ -16,7 +16,6 @@ void *read_all(parse_job_t *job, const char *buf, int bytes_read, int *fd) {
if (*fd == -1) { if (*fd == -1) {
perror("open"); perror("open");
printf("%s\n", job->filepath); printf("%s\n", job->filepath);
free(job);
return NULL; return NULL;
} }
} }
@@ -25,6 +24,7 @@ void *read_all(parse_job_t *job, const char *buf, int bytes_read, int *fd) {
int ret = read(*fd, full_buf + bytes_read, job->info.st_size - bytes_read); int ret = read(*fd, full_buf + bytes_read, job->info.st_size - bytes_read);
if (ret == -1) { if (ret == -1) {
perror("read"); perror("read");
return NULL;
} }
} }
@@ -108,7 +108,7 @@ void parse(void *arg) {
void *pdf_buf = read_all(job, (char *) buf, bytes_read, &fd); void *pdf_buf = read_all(job, (char *) buf, bytes_read, &fd);
parse_pdf(pdf_buf, doc.size, &doc); parse_pdf(pdf_buf, doc.size, &doc);
if (pdf_buf != buf) { if (pdf_buf != buf && pdf_buf != NULL) {
free(pdf_buf); free(pdf_buf);
} }
@@ -119,7 +119,7 @@ void parse(void *arg) {
void *font_buf = read_all(job, (char *) buf, bytes_read, &fd); void *font_buf = read_all(job, (char *) buf, bytes_read, &fd);
parse_font(font_buf, doc.size, &doc); parse_font(font_buf, doc.size, &doc);
if (font_buf != buf) { if (font_buf != buf && font_buf != NULL) {
free(font_buf); free(font_buf);
} }
} }

View File

@@ -114,6 +114,10 @@ int read_stext_block(fz_stext_block *block, text_buffer_t *tex) {
void parse_pdf(void *buf, size_t buf_len, document_t *doc) { void parse_pdf(void *buf, size_t buf_len, document_t *doc) {
if (buf == NULL) {
return;
}
static int mu_is_initialized = 0; static int mu_is_initialized = 0;
if (!mu_is_initialized) { if (!mu_is_initialized) {
pthread_mutex_init(&ScanCtx.mupdf_mu, NULL); pthread_mutex_init(&ScanCtx.mupdf_mu, NULL);

View File

@@ -90,7 +90,7 @@ void text_buffer_terminate_string(text_buffer_t *buf) {
} }
__always_inline __always_inline
int utf8_validchr(const char* s) { int utf8_validchr(const char *s) {
if (0x00 == (0x80 & *s)) { if (0x00 == (0x80 & *s)) {
return TRUE; return TRUE;
} else if (0xf0 == (0xf8 & *s)) { } else if (0xf0 == (0xf8 & *s)) {
@@ -130,7 +130,7 @@ int utf8_validchr(const char* s) {
if (0 == (0x1e & s[0])) { if (0 == (0x1e & s[0])) {
return FALSE; return FALSE;
} }
} else { } else {
return FALSE; return FALSE;
} }
@@ -140,12 +140,22 @@ int utf8_validchr(const char* s) {
int text_buffer_append_string(text_buffer_t *buf, char *str, size_t len) { int text_buffer_append_string(text_buffer_t *buf, char *str, size_t len) {
utf8_int32_t c; utf8_int32_t c;
for (void *v = utf8codepoint(str, &c); c != '\0' && ((char*)v - str + 4) < len; v = utf8codepoint(v, &c)) { if (str == NULL || len < 1 ||
(0xf0 == (0xf8 & str[0]) && len < 4) ||
(0xe0 == (0xf0 & str[0]) && len < 3) ||
(0xc0 == (0xe0 & str[0]) && len == 1) ||
*(str) == 0) {
text_buffer_terminate_string(buf);
return 0;
}
for (void *v = utf8codepoint(str, &c); c != '\0' && ((char *) v - str + 4) < len; v = utf8codepoint(v, &c)) {
if (utf8_validchr(v)) { if (utf8_validchr(v)) {
text_buffer_append_char(buf, c); text_buffer_append_char(buf, c);
} }
} }
text_buffer_terminate_string(buf); text_buffer_terminate_string(buf);
return 0;
} }
int text_buffer_append_string0(text_buffer_t *buf, char *str) { int text_buffer_append_string0(text_buffer_t *buf, char *str) {

File diff suppressed because one or more lines are too long

View File

@@ -32,7 +32,7 @@ window.onload = () => {
}) })
}; };
function toggleSearchBar() { function toggleFuzzy() {
searchDebounced(); searchDebounced();
} }
@@ -218,11 +218,22 @@ function search() {
let query = searchBar.value; let query = searchBar.value;
let empty = query === ""; let empty = query === "";
let condition = $("#barToggle").prop("checked") && !empty ? "must" : "should"; let condition = empty ? "should" : "must";
let filters = [ let filters = [
{range: {size: {gte: size_min, lte: size_max}}}, {range: {size: {gte: size_min, lte: size_max}}},
{terms: {index: selectedIndices}} {terms: {index: selectedIndices}}
]; ];
let fields = [
"name^8",
"content^3",
"album^8", "artist^8", "title^8", "genre^2", "album_artist^8",
"font_name^6"
];
if ($("#fuzzyToggle").prop("checked")) {
fields.push("content.nGram");
fields.push("name.nGram^3");
}
let path = pathBar.value.replace(/\/$/, "").toLowerCase(); //remove trailing slashes let path = pathBar.value.replace(/\/$/, "").toLowerCase(); //remove trailing slashes
if (path !== "") { if (path !== "") {
@@ -243,12 +254,7 @@ function search() {
multi_match: { multi_match: {
query: query, query: query,
type: "most_fields", type: "most_fields",
fields: [ fields: fields,
"name^8", "name.nGram^3", "content^3",
"content.nGram",
"album^8", "artist^8", "title^8", "genre^2", "album_artist^8",
"font_name^6"
],
operator: "and" operator: "and"
} }
}, },
@@ -265,7 +271,7 @@ function search() {
content: {}, content: {},
name: {}, name: {},
"name.nGram": {}, "name.nGram": {},
// font_name: {}, font_name: {},
} }
}, },
aggs: { aggs: {

View File

@@ -24,9 +24,9 @@
<div class="input-group"> <div class="input-group">
<div class="input-group-prepend"> <div class="input-group-prepend">
<div class="input-group-text"> <div class="input-group-text">
<span onclick="document.getElementById('barToggle').click()">Must match&nbsp</span> <span onclick="document.getElementById('fuzzyToggle').click()">Fuzzy&nbsp</span>
<input title="Toggle between 'Should' and 'Must' match mode" type="checkbox" id="barToggle" <input title="Toggle fuzzy searching" type="checkbox" id="fuzzyToggle"
onclick="toggleSearchBar()" checked> onclick="toggleFuzzy()" checked>
</div> </div>
</div> </div>
<input id="searchBar" type="search" class="form-control" placeholder="Search"> <input id="searchBar" type="search" class="form-control" placeholder="Search">