Compare commits

..

5 Commits

Author SHA1 Message Date
95bbe39afc Update libmupdf 2020-10-25 09:44:30 -04:00
72ce217f9c Optionally ES schema from file #117 2020-10-25 09:44:30 -04:00
641a8ec90c sidecar files #114, version bump 2020-10-25 09:44:30 -04:00
7a505c2287 Fix typo 2020-10-25 09:44:30 -04:00
12f162d760 Fix #110 2020-10-25 09:44:30 -04:00
24 changed files with 554 additions and 378 deletions

View File

@@ -9,8 +9,11 @@ add_subdirectory(third-party/libscan)
set(ARGPARSE_SHARED off)
add_subdirectory(third-party/argparse)
add_executable(
sist2
add_executable(sist2
# argparse
third-party/argparse/argparse.h third-party/argparse/argparse.c
src/main.c
src/sist.h
src/io/walk.h src/io/walk.c
@@ -25,12 +28,9 @@ add_executable(
src/util.c src/util.h
src/ctx.h src/types.h
src/log.c src/log.h
# argparse
third-party/argparse/argparse.h third-party/argparse/argparse.c
src/cli.c src/cli.h
src/stats.c src/stats.h src/ctx.c)
src/stats.c src/stats.h src/ctx.c
src/parsing/sidecar.c src/parsing/sidecar.h)
target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/)
set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib)
@@ -73,7 +73,6 @@ if (SIST_DEBUG)
sist2
PRIVATE
-fsanitize=address
# -static
)
set_target_properties(
sist2
@@ -106,7 +105,6 @@ target_link_libraries(
argparse
unofficial::glib::glib
unofficial::mongoose::mongoose
# OpenSSL::SSL OpenSSL::Crypto
CURL::libcurl
${UUID_LIB}

View File

@@ -16,6 +16,7 @@
* [link to specific indices](#link-to-specific-indices)
* [exec-script](#exec-script)
* [tagging](#tagging)
* [sidecar files](#sidecar-files)
```
Usage: sist2 scan [OPTION]... PATH
@@ -52,6 +53,8 @@ Index options
--es-index=<str> Elasticsearch index name. DEFAULT=sist2
-p, --print Just print JSON documents to stdout.
--script-file=<str> Path to user script.
--mappings-file=<str> Path to Elasticsearch mappings.
--settings-file=<str> Path to Elasticsearch settings.
--async-script Execute user script asynchronously.
--batch-size=<int> Index batch size. DEFAULT: 100
-f, --force-reset Reset Elasticsearch mappings and settings. (You must use this option the first time you use the index command)
@@ -153,10 +156,13 @@ documents.idx/
├── agg_mime.csv
├── agg_date.csv
├── add_size.csv
├── thumbs
├── thumbs/
| ├── data.mdb
| └── lock.mdb
── tags
── tags/
| ├── data.mdb
| └── lock.mdb
└── meta/
├── data.mdb
└── lock.mdb
```
@@ -183,9 +189,11 @@ by a third party application. The 'external' index must have the following forma
my_index/
├── descriptor.json
├── _index_0
└── thumbs
├── data.mdb
└── lock.mdb
└── thumbs/
| ├── data.mdb
| └── lock.mdb
└── meta/
└── <empty>
```
*descriptor.json*:
@@ -251,6 +259,10 @@ it is currently unsupported and has no guaranties of back/forward compatibility.
Print index in JSON format to stdout.
* `--script-file`
Path to user script. See [Scripting](scripting.md).
* `--mappings-file`
Path to custom Elasticsearch mappings. If none is specified, [the bundled mappings](https://github.com/simon987/sist2/tree/master/schema) will be used.
* `--settings-file`
Path to custom Elasticsearch settings. *(See above)*
* `--async-script`
Use `wait_for_completion=false` elasticsearch option while executing user script.
(See [Elasticsearch documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/tasks.html))
@@ -260,7 +272,6 @@ it is currently unsupported and has no guaranties of back/forward compatibility.
down the process.
* `-f, --force-reset`
Reset Elasticsearch mappings and settings.
**(You must use this option the first time you use the index command)**.
### Index examples
@@ -343,9 +354,48 @@ See [Automatic tagging](#automatic-tagging) for information about tag
hierarchies and tag colors.
\* *It can take a few seconds to take effect in new search queries, and the page needs
to be reloaded for the tag tab to update*
to be reloaded for the tags tab to update*
### Automatic tagging
See [scripting](scripting.md) documentation.
# Sidecar files
When scanning, sist2 will read metadata from `.s2meta` JSON files and overwrite the
original document's metadata. Sidecar metadata files will also work inside archives.
Sidecar files themselves are not saved in the index.
This feature is useful to leverage third-party applications such as speech-to-text or
OCR to add additional metadata to a file.
**Example**
```
~/Documents/
├── Video.mp4
└── Video.mp4.s2meta
```
The sidecar file must have exactly the same file path and the `.s2meta` suffix.
`Video.mp4.s2meta`:
```json
{
"content": "This sidecar file will overwrite some metadata fields of Video.mp4",
"author": "Some author",
"duration": 12345,
"bitrate": 67890,
"some_arbitrary_field": [1,2,3]
}
```
```
sist2 scan ~/Documents -o ./docs.idx
sist2 index ./docs.idx
```
*NOTE*: It is technically possible to overwrite the `tag` value using sidecar files, however,
it is not currently possible to restore both manual tags and sidecar tags without user scripts
while reindexing.

View File

@@ -449,3 +449,4 @@ image/x-sony-arw, arw
image/x-sony-sr2, sr2
image/x-sony-srf, srf
image/x-epson-erf, erf
sist2/sidecar, s2meta
1 application/arj arj
449 image/x-sony-sr2 sr2
450 image/x-sony-srf srf
451 image/x-epson-erf erf
452 sist2/sidecar s2meta

View File

@@ -3,6 +3,7 @@ noparse = set()
ext_in_hash = set()
major_mime = {
"sist2": 0,
"model": 1,
"example": 2,
"message": 3,
@@ -122,7 +123,11 @@ def mime_id(mime):
elif mime in raw:
mime_id += " | 0x00800000"
elif mime == "application/x-empty":
cnt -= 1
return "1"
elif mime == "sist2/sidecar":
cnt -= 1
return "2"
return mime_id

View File

@@ -56,6 +56,12 @@ void scan_args_destroy(scan_args_t *args) {
void index_args_destroy(index_args_t *args) {
//todo
if (args->es_mappings_path) {
free(args->es_mappings);
}
if (args->es_settings_path) {
free(args->es_settings);
}
free(args);
}
@@ -231,25 +237,25 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
return 0;
}
int load_script(const char *script_path, char **dst) {
int load_external_file(const char *file_path, char **dst) {
struct stat info;
int res = stat(script_path, &info);
int res = stat(file_path, &info);
if (res == -1) {
fprintf(stderr, "Error opening script file '%s': %s\n", script_path, strerror(errno));
LOG_ERRORF("cli.c", "Error opening file '%s': %s\n", file_path, strerror(errno))
return 1;
}
int fd = open(script_path, O_RDONLY);
int fd = open(file_path, O_RDONLY);
if (fd == -1) {
fprintf(stderr, "Error opening script file '%s': %s\n", script_path, strerror(errno));
LOG_ERRORF("cli.c", "Error opening file '%s': %s\n", file_path, strerror(errno))
return 1;
}
*dst = malloc(info.st_size + 1);
res = read(fd, *dst, info.st_size);
if (res < 0) {
fprintf(stderr, "Error reading script file '%s': %s\n", script_path, strerror(errno));
LOG_ERRORF("cli.c", "Error reading file '%s': %s\n", file_path, strerror(errno))
return 1;
}
@@ -293,7 +299,19 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
}
if (args->script_path != NULL) {
if (load_script(args->script_path, &args->script) != 0) {
if (load_external_file(args->script_path, &args->script) != 0) {
return 1;
}
}
if (args->es_settings_path != NULL) {
if (load_external_file(args->es_settings_path, &args->es_settings) != 0) {
return 1;
}
}
if (args->es_mappings_path != NULL) {
if (load_external_file(args->es_mappings_path, &args->es_mappings) != 0) {
return 1;
}
}
@@ -309,6 +327,10 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
LOG_DEBUGF("cli.c", "arg async_script=%s", args->async_script)
LOG_DEBUGF("cli.c", "arg script=%s", args->script)
LOG_DEBUGF("cli.c", "arg print=%d", args->print)
LOG_DEBUGF("cli.c", "arg es_mappings_path=%s", args->es_mappings_path)
LOG_DEBUGF("cli.c", "arg es_mappings=%s", args->es_mappings)
LOG_DEBUGF("cli.c", "arg es_settings_path=%s", args->es_settings_path)
LOG_DEBUGF("cli.c", "arg es_settings=%s", args->es_settings)
LOG_DEBUGF("cli.c", "arg batch_size=%d", args->batch_size)
LOG_DEBUGF("cli.c", "arg force_reset=%d", args->force_reset)
@@ -445,7 +467,7 @@ int exec_args_validate(exec_args_t *args, int argc, const char **argv) {
LOG_FATAL("cli.c", "--script-file argument is required");
}
if (load_script(args->script_path, &args->script) != 0) {
if (load_external_file(args->script_path, &args->script) != 0) {
return 1;
}

View File

@@ -39,6 +39,10 @@ typedef struct index_args {
const char *index_path;
const char *script_path;
char *script;
const char *es_settings_path;
char *es_settings;
const char *es_mappings_path;
char *es_mappings;
int print;
int batch_size;
int async_script;

View File

@@ -63,6 +63,8 @@ typedef struct {
tpool_t *pool;
store_t *tag_store;
GHashTable *tags;
store_t *meta_store;
GHashTable *meta;
} IndexCtx_t;
typedef struct {

View File

@@ -356,7 +356,7 @@ void finish_indexer(char *script, int async_script, char *index_id) {
free_response(r);
}
void elastic_init(int force_reset) {
void elastic_init(int force_reset, const char* user_mappings, const char* user_settings) {
// Check if index exists
char url[4096];
@@ -392,13 +392,13 @@ void elastic_init(int force_reset) {
free_response(r);
snprintf(url, sizeof(url), "%s/%s/_settings", IndexCtx.es_url, IndexCtx.es_index);
r = web_put(url, settings_json);
LOG_INFOF("elastic.c", "Update settings <%d>", r->status_code);
r = web_put(url, user_settings ? user_settings : settings_json);
LOG_INFOF("elastic.c", "Update user_settings <%d>", r->status_code);
free_response(r);
snprintf(url, sizeof(url), "%s/%s/_mappings/_doc?include_type_name=true", IndexCtx.es_url, IndexCtx.es_index);
r = web_put(url, mappings_json);
LOG_INFOF("elastic.c", "Update mappings <%d>", r->status_code);
r = web_put(url, user_mappings ? user_mappings : mappings_json);
LOG_INFOF("elastic.c", "Update user_mappings <%d>", r->status_code);
free_response(r);
snprintf(url, sizeof(url), "%s/%s/_open", IndexCtx.es_url, IndexCtx.es_index);

View File

@@ -25,7 +25,7 @@ es_indexer_t *create_indexer(const char *url, const char *index);
void elastic_cleanup();
void finish_indexer(char *script, int async_script, char *index_id);
void elastic_init(int force_reset);
void elastic_init(int force_reset, const char* user_mappings, const char* user_settings);
cJSON *elastic_get_document(const char *uuid_str);

View File

@@ -247,13 +247,8 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
}
dyn_buffer_write_char(&buf, '\0');
if (IndexCtx.tags != NULL) {
const char *tags_string = g_hash_table_lookup(IndexCtx.tags, buf.buf);
if (tags_string != NULL) {
cJSON *tags_arr = cJSON_Parse(tags_string);
cJSON_AddItemToObject(document, "tag", tags_arr);
}
}
char full_filename[PATH_MAX];
strcpy(full_filename, buf.buf);
cJSON_AddStringToObject(document, "extension", buf.buf + line.ext);
if (*(buf.buf + line.ext - 1) == '.') {
@@ -334,8 +329,36 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
key = getc(file);
}
cJSON *meta_obj = NULL;
if (IndexCtx.meta != NULL) {
const char *meta_string = g_hash_table_lookup(IndexCtx.meta, full_filename);
if (meta_string != NULL) {
meta_obj = cJSON_Parse(meta_string);
cJSON *child;
for (child = meta_obj->child; child != NULL; child = child->next) {
char meta_key[4096];
strcpy(meta_key, child->string);
cJSON_DeleteItemFromObject(document, meta_key);
cJSON_AddItemReferenceToObject(document, meta_key, child);
}
}
}
if (IndexCtx.tags != NULL) {
const char *tags_string = g_hash_table_lookup(IndexCtx.tags, full_filename);
if (tags_string != NULL) {
cJSON *tags_arr = cJSON_Parse(tags_string);
cJSON_DeleteItemFromObject(document, "tag");
cJSON_AddItemToObject(document, "tag", tags_arr);
}
}
func(document, uuid_str);
cJSON_Delete(document);
if (meta_obj) {
cJSON_Delete(meta_obj);
}
}
dyn_buffer_destroy(&buf);
fclose(file);

View File

@@ -43,9 +43,13 @@ void store_destroy(store_t *store) {
void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len) {
if (LogCtx.very_verbose) {
char uuid_str[UUID_STR_LEN];
if (key_len == 16) {
char uuid_str[UUID_STR_LEN] = {0, };
uuid_unparse((unsigned char *) key, uuid_str);
LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", uuid_str, buf_len)
} else {
LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", key, buf_len)
}
}
MDB_val mdb_key;
@@ -136,7 +140,9 @@ GHashTable *store_read_all(store_t *store) {
count += 1;
}
LOG_DEBUGF("store.c", "Read tags for %d documents", count);
const char *path;
mdb_env_get_path(store->env, &path);
LOG_DEBUGF("store.c", "Read %d entries from %s", count, path);
mdb_cursor_close(cur);
mdb_txn_abort(txn);

View File

@@ -8,6 +8,7 @@
#define STORE_SIZE_TN 1024 * 1024 * 5
#define STORE_SIZE_TAG 1024 * 16
#define STORE_SIZE_META STORE_SIZE_TAG
typedef struct store_t {
MDB_dbi dbi;

View File

@@ -21,7 +21,7 @@
#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
static const char *const Version = "2.8.4";
static const char *const Version = "2.8.5";
static const char *const usage[] = {
"sist2 scan [OPTION]... PATH",
"sist2 index [OPTION]... INDEX",
@@ -182,6 +182,10 @@ void sist2_scan(scan_args_t *args) {
mkdir(store_path, S_IWUSR | S_IRUSR | S_IXUSR);
ScanCtx.index.store = store_create(store_path, STORE_SIZE_TN);
snprintf(store_path, PATH_MAX, "%smeta", ScanCtx.index.path);
mkdir(store_path, S_IWUSR | S_IRUSR | S_IXUSR);
ScanCtx.index.meta_store = store_create(store_path, STORE_SIZE_META);
scan_print_header();
if (args->incremental != NULL) {
@@ -263,7 +267,7 @@ void sist2_index(index_args_t *args) {
IndexCtx.batch_size = args->batch_size;
if (!args->print) {
elastic_init(args->force_reset);
elastic_init(args->force_reset, args->es_mappings, args->es_settings);
}
char descriptor_path[PATH_MAX];
@@ -289,6 +293,10 @@ void sist2_index(index_args_t *args) {
IndexCtx.tag_store = store_create(path_tmp, STORE_SIZE_TAG);
IndexCtx.tags = store_read_all(IndexCtx.tag_store);
snprintf(path_tmp, sizeof(path_tmp), "%s/meta", args->index_path);
IndexCtx.meta_store = store_create(path_tmp, STORE_SIZE_META);
IndexCtx.meta = store_read_all(IndexCtx.meta_store);
index_func f;
if (args->print) {
f = print_json;
@@ -438,6 +446,8 @@ int main(int argc, const char *argv[]) {
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
OPT_STRING(0, "mappings-file", &index_args->es_mappings_path, "Path to Elasticsearch mappings."),
OPT_STRING(0, "settings-file", &index_args->es_settings_path, "Path to Elasticsearch settings."),
OPT_BOOLEAN(0, "async-script", &common_async_script, "Execute user script asynchronously."),
OPT_INTEGER(0, "batch-size", &index_args->batch_size, "Index batch size. DEFAULT: 100"),
OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings. "

View File

@@ -6,6 +6,7 @@
#define MAJOR_MIME(mime_id) (mime_id & 0x000F0000) >> 16
#define MIME_EMPTY 1
#define MIME_SIST2_SIDECAR 2
#define DONT_PARSE 0x80000000
#define SHOULD_PARSE(mime_id) (ScanCtx.fast == 0 && (mime_id & DONT_PARSE) != DONT_PARSE && mime_id != 0)

View File

@@ -128,333 +128,334 @@ enum mime {
application_x_dvi=655480,
application_x_elc=655481,
application_x_empty=1,
application_x_envoy=655483,
application_x_esrehber=655484,
application_x_excel=655485,
application_x_executable=655486,
application_x_font_gdos=655487,
application_x_font_pf2=655488,
application_x_font_pfm=655489,
application_x_font_sfn=655490,
application_x_font_ttf=655491 | 0x20000000,
application_x_fptapplication_x_dbt=655492,
application_x_freelance=655493,
application_x_gamecube_rom=655494,
application_x_gdbm=655495,
application_x_gettext_translation=655496,
application_x_git=655497,
application_x_gsp=655498,
application_x_gss=655499,
application_x_gtar=655500,
application_x_gzip=655501,
application_x_hdf=655502,
application_x_helpfile=655503,
application_x_httpd_imap=655504,
application_x_ima=655505,
application_x_innosetup=655506,
application_x_internett_signup=655507,
application_x_inventor=655508,
application_x_ip2=655509,
application_x_java_applet=655510,
application_x_java_commerce=655511,
application_x_java_image=655512,
application_x_java_jmod=655513,
application_x_java_keystore=655514,
application_x_kdelnk=655515,
application_x_koan=655516,
application_x_latex=655517,
application_x_livescreen=655518,
application_x_lotus=655519,
application_x_lz4=655520 | 0x08000000,
application_x_lz4_json=655521,
application_x_lzh=655522,
application_x_lzh_compressed=655523,
application_x_lzip=655524 | 0x08000000,
application_x_lzma=655525 | 0x08000000,
application_x_lzop=655526 | 0x08000000,
application_x_lzx=655527,
application_x_mach_binary=655528,
application_x_mach_executable=655529,
application_x_magic_cap_package_1_0=655530,
application_x_mathcad=655531,
application_x_maxis_dbpf=655532,
application_x_meme=655533,
application_x_midi=655534,
application_x_mif=655535,
application_x_mix_transfer=655536,
application_x_mobipocket_ebook=655537 | 0x02000000,
application_x_ms_compress_szdd=655538,
application_x_ms_pdb=655539,
application_x_ms_reader=655540,
application_x_msaccess=655541,
application_x_n64_rom=655542,
application_x_navi_animation=655543,
application_x_navidoc=655544,
application_x_navimap=655545,
application_x_navistyle=655546,
application_x_nes_rom=655547,
application_x_netcdf=655548,
application_x_newton_compatible_pkg=655549,
application_x_nintendo_ds_rom=655550,
application_x_object=655551,
application_x_omc=655552,
application_x_omcdatamaker=655553,
application_x_omcregerator=655554,
application_x_pagemaker=655555,
application_x_pcl=655556,
application_x_pgp_keyring=655557,
application_x_pixclscript=655558,
application_x_pkcs7_certreqresp=655559,
application_x_pkcs7_signature=655560,
application_x_project=655561,
application_x_qpro=655562,
application_x_rar=655563 | 0x10000000,
application_x_rpm=655564,
application_x_sdp=655565,
application_x_sea=655566,
application_x_seelogo=655567,
application_x_setupscript=655568,
application_x_shar=655569,
application_x_sharedlib=655570,
application_x_shockwave_flash=655571,
application_x_snappy_framed=655572,
application_x_sprite=655573,
application_x_sqlite3=655574,
application_x_stargallery_thm=655575,
application_x_stuffit=655576,
application_x_sv4cpio=655577,
application_x_sv4crc=655578,
application_x_tar=655579 | 0x10000000,
application_x_tbook=655580,
application_x_terminfo=655581,
application_x_terminfo2=655582,
application_x_tex_tfm=655583,
application_x_texinfo=655584,
application_x_ustar=655585,
application_x_visio=655586,
application_x_vnd_audioexplosion_mzz=655587,
application_x_vnd_ls_xpix=655588,
application_x_vrml=655589,
application_x_wais_source=655590,
application_x_wine_extension_ini=655591,
application_x_wintalk=655592,
application_x_world=655593,
application_x_wri=655594,
application_x_x509_ca_cert=655595,
application_x_xz=655596 | 0x08000000,
application_x_zip=655597,
application_x_zstd=655598 | 0x08000000,
application_x_zstd_dictionary=655599,
application_xml=655600,
application_zip=655601 | 0x10000000,
application_zlib=655602,
audio_basic=458995 | 0x80000000,
audio_it=458996,
audio_make=458997,
audio_mid=458998,
audio_midi=458999,
audio_mp4=459000,
audio_mpeg=459001,
audio_ogg=459002,
audio_s3m=459003,
audio_tsp_audio=459004,
audio_tsplayer=459005,
audio_vnd_qcelp=459006,
audio_voxware=459007,
audio_x_aiff=459008,
audio_x_flac=459009,
audio_x_gsm=459010,
audio_x_hx_aac_adts=459011,
audio_x_jam=459012,
audio_x_liveaudio=459013,
audio_x_m4a=459014,
audio_x_midi=459015,
audio_x_mod=459016,
audio_x_mp4a_latm=459017,
audio_x_mpeg_3=459018,
audio_x_mpequrl=459019,
audio_x_nspaudio=459020,
audio_x_pn_realaudio=459021,
audio_x_psid=459022,
audio_x_realaudio=459023,
audio_x_s3m=459024,
audio_x_twinvq=459025,
audio_x_twinvq_plugin=459026,
audio_x_voc=459027,
audio_x_wav=459028,
audio_x_xbox_executable=459029 | 0x80000000,
audio_x_xbox360_executable=459030 | 0x80000000,
audio_xm=459031,
font_otf=327960 | 0x20000000,
font_sfnt=327961 | 0x20000000,
font_woff=327962 | 0x20000000,
font_woff2=327963 | 0x20000000,
image_bmp=524572,
image_cmu_raster=524573,
image_fif=524574,
image_florian=524575,
image_g3fax=524576,
image_gif=524577,
image_heic=524578,
image_ief=524579,
image_jpeg=524580,
image_jutvision=524581,
image_naplps=524582,
image_pict=524583,
image_png=524584,
image_svg=524585 | 0x80000000,
image_svg_xml=524586 | 0x80000000,
image_tiff=524587,
image_vnd_adobe_photoshop=524588 | 0x80000000,
image_vnd_djvu=524589 | 0x80000000,
image_vnd_fpx=524590,
image_vnd_microsoft_icon=524591,
image_vnd_rn_realflash=524592,
image_vnd_rn_realpix=524593,
image_vnd_wap_wbmp=524594,
image_vnd_xiff=524595,
image_webp=524596,
image_wmf=524597,
image_x_3ds=524598,
image_x_adobe_dng=524599 | 0x00800000,
image_x_award_bioslogo=524600,
image_x_canon_cr2=524601 | 0x00800000,
image_x_canon_crw=524602 | 0x00800000,
image_x_cmu_raster=524603,
image_x_cur=524604,
image_x_dcraw=524605 | 0x00800000,
image_x_dwg=524606,
image_x_eps=524607,
image_x_epson_erf=524608 | 0x00800000,
image_x_exr=524609,
image_x_fuji_raf=524610 | 0x00800000,
image_x_gem=524611,
image_x_icns=524612,
image_x_icon=524613 | 0x80000000,
image_x_jg=524614,
image_x_jps=524615,
image_x_kodak_dcr=524616 | 0x00800000,
image_x_kodak_k25=524617 | 0x00800000,
image_x_kodak_kdc=524618 | 0x00800000,
image_x_minolta_mrw=524619 | 0x00800000,
image_x_ms_bmp=524620,
image_x_niff=524621,
image_x_nikon_nef=524622 | 0x00800000,
image_x_olympus_orf=524623 | 0x00800000,
image_x_panasonic_raw=524624 | 0x00800000,
image_x_pcx=524625,
image_x_pentax_pef=524626 | 0x00800000,
image_x_pict=524627,
image_x_portable_bitmap=524628,
image_x_portable_graymap=524629,
image_x_portable_pixmap=524630,
image_x_quicktime=524631,
image_x_rgb=524632,
image_x_sigma_x3f=524633 | 0x00800000,
image_x_sony_arw=524634 | 0x00800000,
image_x_sony_sr2=524635 | 0x00800000,
image_x_sony_srf=524636 | 0x00800000,
image_x_tga=524637,
image_x_tiff=524638,
image_x_win_bitmap=524639,
image_x_xcf=524640 | 0x80000000,
image_x_xpixmap=524641 | 0x80000000,
image_x_xwindowdump=524642,
message_news=196963,
message_rfc822=196964,
model_vnd_dwf=65893,
model_vnd_gdl=65894,
model_vnd_gs_gdl=65895,
model_vrml=65896,
model_x_pov=65897,
text_PGP=590186,
text_asp=590187,
text_css=590188,
text_html=590189 | 0x01000000,
text_javascript=590190,
text_mcf=590191,
text_pascal=590192,
text_plain=590193,
text_richtext=590194,
text_rtf=590195,
text_scriplet=590196,
text_tab_separated_values=590197,
text_troff=590198,
text_uri_list=590199,
text_vnd_abc=590200,
text_vnd_fmi_flexstor=590201,
text_vnd_wap_wml=590202,
text_vnd_wap_wmlscript=590203,
text_webviewhtml=590204,
text_x_Algol68=590205,
text_x_asm=590206,
text_x_audiosoft_intra=590207,
text_x_awk=590208,
text_x_bcpl=590209,
text_x_c=590210,
text_x_c__=590211,
text_x_component=590212,
text_x_diff=590213,
text_x_fortran=590214,
text_x_java=590215,
text_x_la_asf=590216,
text_x_lisp=590217,
text_x_m=590218,
text_x_m4=590219,
text_x_makefile=590220,
text_x_ms_regedit=590221,
text_x_msdos_batch=590222,
text_x_objective_c=590223,
text_x_pascal=590224,
text_x_perl=590225,
text_x_php=590226,
text_x_po=590227,
text_x_python=590228,
text_x_ruby=590229,
text_x_sass=590230,
text_x_scss=590231,
text_x_server_parsed_html=590232,
text_x_setext=590233,
text_x_sgml=590234 | 0x01000000,
text_x_shellscript=590235,
text_x_speech=590236,
text_x_tcl=590237,
text_x_tex=590238,
text_x_uil=590239,
text_x_uuencode=590240,
text_x_vcalendar=590241,
text_x_vcard=590242,
text_xml=590243 | 0x01000000,
video_MP2T=393636,
video_animaflex=393637,
video_avi=393638,
video_avs_video=393639,
video_mp4=393640,
video_mpeg=393641,
video_quicktime=393642,
video_vdo=393643,
video_vivo=393644,
video_vnd_rn_realvideo=393645,
video_vosaic=393646,
video_webm=393647,
video_x_amt_demorun=393648,
video_x_amt_showrun=393649,
video_x_atomic3d_feature=393650,
video_x_dl=393651,
video_x_dv=393652,
video_x_fli=393653,
video_x_flv=393654,
video_x_isvideo=393655,
video_x_jng=393656 | 0x80000000,
video_x_m4v=393657,
video_x_matroska=393658,
video_x_mng=393659,
video_x_motion_jpeg=393660,
video_x_ms_asf=393661,
video_x_msvideo=393662,
video_x_qtc=393663,
video_x_sgi_movie=393664,
x_epoc_x_sisx_app=721345,
application_x_envoy=655482,
application_x_esrehber=655483,
application_x_excel=655484,
application_x_executable=655485,
application_x_font_gdos=655486,
application_x_font_pf2=655487,
application_x_font_pfm=655488,
application_x_font_sfn=655489,
application_x_font_ttf=655490 | 0x20000000,
application_x_fptapplication_x_dbt=655491,
application_x_freelance=655492,
application_x_gamecube_rom=655493,
application_x_gdbm=655494,
application_x_gettext_translation=655495,
application_x_git=655496,
application_x_gsp=655497,
application_x_gss=655498,
application_x_gtar=655499,
application_x_gzip=655500,
application_x_hdf=655501,
application_x_helpfile=655502,
application_x_httpd_imap=655503,
application_x_ima=655504,
application_x_innosetup=655505,
application_x_internett_signup=655506,
application_x_inventor=655507,
application_x_ip2=655508,
application_x_java_applet=655509,
application_x_java_commerce=655510,
application_x_java_image=655511,
application_x_java_jmod=655512,
application_x_java_keystore=655513,
application_x_kdelnk=655514,
application_x_koan=655515,
application_x_latex=655516,
application_x_livescreen=655517,
application_x_lotus=655518,
application_x_lz4=655519 | 0x08000000,
application_x_lz4_json=655520,
application_x_lzh=655521,
application_x_lzh_compressed=655522,
application_x_lzip=655523 | 0x08000000,
application_x_lzma=655524 | 0x08000000,
application_x_lzop=655525 | 0x08000000,
application_x_lzx=655526,
application_x_mach_binary=655527,
application_x_mach_executable=655528,
application_x_magic_cap_package_1_0=655529,
application_x_mathcad=655530,
application_x_maxis_dbpf=655531,
application_x_meme=655532,
application_x_midi=655533,
application_x_mif=655534,
application_x_mix_transfer=655535,
application_x_mobipocket_ebook=655536 | 0x02000000,
application_x_ms_compress_szdd=655537,
application_x_ms_pdb=655538,
application_x_ms_reader=655539,
application_x_msaccess=655540,
application_x_n64_rom=655541,
application_x_navi_animation=655542,
application_x_navidoc=655543,
application_x_navimap=655544,
application_x_navistyle=655545,
application_x_nes_rom=655546,
application_x_netcdf=655547,
application_x_newton_compatible_pkg=655548,
application_x_nintendo_ds_rom=655549,
application_x_object=655550,
application_x_omc=655551,
application_x_omcdatamaker=655552,
application_x_omcregerator=655553,
application_x_pagemaker=655554,
application_x_pcl=655555,
application_x_pgp_keyring=655556,
application_x_pixclscript=655557,
application_x_pkcs7_certreqresp=655558,
application_x_pkcs7_signature=655559,
application_x_project=655560,
application_x_qpro=655561,
application_x_rar=655562 | 0x10000000,
application_x_rpm=655563,
application_x_sdp=655564,
application_x_sea=655565,
application_x_seelogo=655566,
application_x_setupscript=655567,
application_x_shar=655568,
application_x_sharedlib=655569,
application_x_shockwave_flash=655570,
application_x_snappy_framed=655571,
application_x_sprite=655572,
application_x_sqlite3=655573,
application_x_stargallery_thm=655574,
application_x_stuffit=655575,
application_x_sv4cpio=655576,
application_x_sv4crc=655577,
application_x_tar=655578 | 0x10000000,
application_x_tbook=655579,
application_x_terminfo=655580,
application_x_terminfo2=655581,
application_x_tex_tfm=655582,
application_x_texinfo=655583,
application_x_ustar=655584,
application_x_visio=655585,
application_x_vnd_audioexplosion_mzz=655586,
application_x_vnd_ls_xpix=655587,
application_x_vrml=655588,
application_x_wais_source=655589,
application_x_wine_extension_ini=655590,
application_x_wintalk=655591,
application_x_world=655592,
application_x_wri=655593,
application_x_x509_ca_cert=655594,
application_x_xz=655595 | 0x08000000,
application_x_zip=655596,
application_x_zstd=655597 | 0x08000000,
application_x_zstd_dictionary=655598,
application_xml=655599,
application_zip=655600 | 0x10000000,
application_zlib=655601,
audio_basic=458994 | 0x80000000,
audio_it=458995,
audio_make=458996,
audio_mid=458997,
audio_midi=458998,
audio_mp4=458999,
audio_mpeg=459000,
audio_ogg=459001,
audio_s3m=459002,
audio_tsp_audio=459003,
audio_tsplayer=459004,
audio_vnd_qcelp=459005,
audio_voxware=459006,
audio_x_aiff=459007,
audio_x_flac=459008,
audio_x_gsm=459009,
audio_x_hx_aac_adts=459010,
audio_x_jam=459011,
audio_x_liveaudio=459012,
audio_x_m4a=459013,
audio_x_midi=459014,
audio_x_mod=459015,
audio_x_mp4a_latm=459016,
audio_x_mpeg_3=459017,
audio_x_mpequrl=459018,
audio_x_nspaudio=459019,
audio_x_pn_realaudio=459020,
audio_x_psid=459021,
audio_x_realaudio=459022,
audio_x_s3m=459023,
audio_x_twinvq=459024,
audio_x_twinvq_plugin=459025,
audio_x_voc=459026,
audio_x_wav=459027,
audio_x_xbox_executable=459028 | 0x80000000,
audio_x_xbox360_executable=459029 | 0x80000000,
audio_xm=459030,
font_otf=327959 | 0x20000000,
font_sfnt=327960 | 0x20000000,
font_woff=327961 | 0x20000000,
font_woff2=327962 | 0x20000000,
image_bmp=524571,
image_cmu_raster=524572,
image_fif=524573,
image_florian=524574,
image_g3fax=524575,
image_gif=524576,
image_heic=524577,
image_ief=524578,
image_jpeg=524579,
image_jutvision=524580,
image_naplps=524581,
image_pict=524582,
image_png=524583,
image_svg=524584 | 0x80000000,
image_svg_xml=524585 | 0x80000000,
image_tiff=524586,
image_vnd_adobe_photoshop=524587 | 0x80000000,
image_vnd_djvu=524588 | 0x80000000,
image_vnd_fpx=524589,
image_vnd_microsoft_icon=524590,
image_vnd_rn_realflash=524591,
image_vnd_rn_realpix=524592,
image_vnd_wap_wbmp=524593,
image_vnd_xiff=524594,
image_webp=524595,
image_wmf=524596,
image_x_3ds=524597,
image_x_adobe_dng=524598 | 0x00800000,
image_x_award_bioslogo=524599,
image_x_canon_cr2=524600 | 0x00800000,
image_x_canon_crw=524601 | 0x00800000,
image_x_cmu_raster=524602,
image_x_cur=524603,
image_x_dcraw=524604 | 0x00800000,
image_x_dwg=524605,
image_x_eps=524606,
image_x_epson_erf=524607 | 0x00800000,
image_x_exr=524608,
image_x_fuji_raf=524609 | 0x00800000,
image_x_gem=524610,
image_x_icns=524611,
image_x_icon=524612 | 0x80000000,
image_x_jg=524613,
image_x_jps=524614,
image_x_kodak_dcr=524615 | 0x00800000,
image_x_kodak_k25=524616 | 0x00800000,
image_x_kodak_kdc=524617 | 0x00800000,
image_x_minolta_mrw=524618 | 0x00800000,
image_x_ms_bmp=524619,
image_x_niff=524620,
image_x_nikon_nef=524621 | 0x00800000,
image_x_olympus_orf=524622 | 0x00800000,
image_x_panasonic_raw=524623 | 0x00800000,
image_x_pcx=524624,
image_x_pentax_pef=524625 | 0x00800000,
image_x_pict=524626,
image_x_portable_bitmap=524627,
image_x_portable_graymap=524628,
image_x_portable_pixmap=524629,
image_x_quicktime=524630,
image_x_rgb=524631,
image_x_sigma_x3f=524632 | 0x00800000,
image_x_sony_arw=524633 | 0x00800000,
image_x_sony_sr2=524634 | 0x00800000,
image_x_sony_srf=524635 | 0x00800000,
image_x_tga=524636,
image_x_tiff=524637,
image_x_win_bitmap=524638,
image_x_xcf=524639 | 0x80000000,
image_x_xpixmap=524640 | 0x80000000,
image_x_xwindowdump=524641,
message_news=196962,
message_rfc822=196963,
model_vnd_dwf=65892,
model_vnd_gdl=65893,
model_vnd_gs_gdl=65894,
model_vrml=65895,
model_x_pov=65896,
sist2_sidecar=2,
text_PGP=590185,
text_asp=590186,
text_css=590187,
text_html=590188 | 0x01000000,
text_javascript=590189,
text_mcf=590190,
text_pascal=590191,
text_plain=590192,
text_richtext=590193,
text_rtf=590194,
text_scriplet=590195,
text_tab_separated_values=590196,
text_troff=590197,
text_uri_list=590198,
text_vnd_abc=590199,
text_vnd_fmi_flexstor=590200,
text_vnd_wap_wml=590201,
text_vnd_wap_wmlscript=590202,
text_webviewhtml=590203,
text_x_Algol68=590204,
text_x_asm=590205,
text_x_audiosoft_intra=590206,
text_x_awk=590207,
text_x_bcpl=590208,
text_x_c=590209,
text_x_c__=590210,
text_x_component=590211,
text_x_diff=590212,
text_x_fortran=590213,
text_x_java=590214,
text_x_la_asf=590215,
text_x_lisp=590216,
text_x_m=590217,
text_x_m4=590218,
text_x_makefile=590219,
text_x_ms_regedit=590220,
text_x_msdos_batch=590221,
text_x_objective_c=590222,
text_x_pascal=590223,
text_x_perl=590224,
text_x_php=590225,
text_x_po=590226,
text_x_python=590227,
text_x_ruby=590228,
text_x_sass=590229,
text_x_scss=590230,
text_x_server_parsed_html=590231,
text_x_setext=590232,
text_x_sgml=590233 | 0x01000000,
text_x_shellscript=590234,
text_x_speech=590235,
text_x_tcl=590236,
text_x_tex=590237,
text_x_uil=590238,
text_x_uuencode=590239,
text_x_vcalendar=590240,
text_x_vcard=590241,
text_xml=590242 | 0x01000000,
video_MP2T=393635,
video_animaflex=393636,
video_avi=393637,
video_avs_video=393638,
video_mp4=393639,
video_mpeg=393640,
video_quicktime=393641,
video_vdo=393642,
video_vivo=393643,
video_vnd_rn_realvideo=393644,
video_vosaic=393645,
video_webm=393646,
video_x_amt_demorun=393647,
video_x_amt_showrun=393648,
video_x_atomic3d_feature=393649,
video_x_dl=393650,
video_x_dv=393651,
video_x_fli=393652,
video_x_flv=393653,
video_x_isvideo=393654,
video_x_jng=393655 | 0x80000000,
video_x_m4v=393656,
video_x_matroska=393657,
video_x_mng=393658,
video_x_motion_jpeg=393659,
video_x_ms_asf=393660,
video_x_msvideo=393661,
video_x_qtc=393662,
video_x_sgi_movie=393663,
x_epoc_x_sisx_app=721344,
};
char *mime_get_mime_text(unsigned int mime_id) {switch (mime_id) {
case application_arj: return "application/arj";
@@ -906,6 +907,7 @@ case image_x_sony_arw: return "image/x-sony-arw";
case image_x_sony_sr2: return "image/x-sony-sr2";
case image_x_sony_srf: return "image/x-sony-srf";
case image_x_epson_erf: return "image/x-epson-erf";
case sist2_sidecar: return "sist2/sidecar";
default: return NULL;}}
GHashTable *mime_get_ext_table() {GHashTable *ext_table = g_hash_table_new(g_str_hash, g_str_equal);
g_hash_table_insert(ext_table, "arj", (gpointer)application_arj);
@@ -1449,6 +1451,7 @@ g_hash_table_insert(ext_table, "arw", (gpointer)image_x_sony_arw);
g_hash_table_insert(ext_table, "sr2", (gpointer)image_x_sony_sr2);
g_hash_table_insert(ext_table, "srf", (gpointer)image_x_sony_srf);
g_hash_table_insert(ext_table, "erf", (gpointer)image_x_epson_erf);
g_hash_table_insert(ext_table, "s2meta", (gpointer)sist2_sidecar);
return ext_table;}
GHashTable *mime_get_mime_table() {GHashTable *mime_table = g_hash_table_new(g_str_hash, g_str_equal);
g_hash_table_insert(mime_table, "application/arj", (gpointer)application_arj);
@@ -1900,5 +1903,6 @@ g_hash_table_insert(mime_table, "image/x-sony-arw", (gpointer)image_x_sony_arw);
g_hash_table_insert(mime_table, "image/x-sony-sr2", (gpointer)image_x_sony_sr2);
g_hash_table_insert(mime_table, "image/x-sony-srf", (gpointer)image_x_sony_srf);
g_hash_table_insert(mime_table, "image/x-epson-erf", (gpointer)image_x_epson_erf);
g_hash_table_insert(mime_table, "sist2/sidecar", (gpointer)sist2_sidecar);
return mime_table;}
#endif

View File

@@ -4,6 +4,7 @@
#include "src/ctx.h"
#include "mime.h"
#include "src/io/serialize.h"
#include "src/parsing/sidecar.h"
#include <magic.h>
@@ -157,6 +158,10 @@ void parse(void *arg) {
parse_comic(&ScanCtx.comic_ctx, &job->vfile, &doc);
} else if (IS_MOBI(doc.mime)) {
parse_mobi(&ScanCtx.mobi_ctx, &job->vfile, &doc);
} else if (doc.mime == MIME_SIST2_SIDECAR) {
parse_sidecar(&job->vfile, &doc);
CLOSE_FILE(job->vfile)
return;
}
abort:

35
src/parsing/sidecar.c Normal file
View File

@@ -0,0 +1,35 @@
#include "sidecar.h"
#include "src/ctx.h"
void parse_sidecar(vfile_t *vfile, document_t *doc) {
LOG_DEBUGF("sidecar.c", "Parsing sidecar file %s", vfile->filepath)
size_t size;
char* buf = read_all(vfile, &size);
if (buf == NULL) {
LOG_ERRORF("sidecar.c", "Read error for %s", vfile->filepath)
return;
}
buf = realloc(buf, size + 1);
*(buf + size) = '\0';
cJSON *json = cJSON_Parse(buf);
if (json == NULL) {
LOG_ERRORF("sidecar.c", "Could not parse JSON sidecar %s", vfile->filepath)
return;
}
char *json_str = cJSON_PrintUnformatted(json);
char filepath[PATH_MAX];
memcpy(filepath, vfile->filepath + ScanCtx.index.desc.root_len, doc->ext - 1 - ScanCtx.index.desc.root_len);
*(filepath + doc->ext - 1) = '\0';
store_write(ScanCtx.index.meta_store, filepath, doc->ext, json_str, strlen(json_str) + 1);
cJSON_Delete(json);
free(json_str);
free(buf);
}

8
src/parsing/sidecar.h Normal file
View File

@@ -0,0 +1,8 @@
#ifndef SIST2_SIDECAR_H
#define SIST2_SIDECAR_H
#include "src/sist.h"
void parse_sidecar(vfile_t *vfile, document_t *doc);
#endif

View File

@@ -168,7 +168,7 @@ window.onload = () => {
};
function saveTag(tag, hit) {
const relPath = hit["_source"]["path"] + "/" + hit["_source"]["name"] + ext(hit);
const relPath = hit["_source"]["path"] + (hit["_source"]["path"] ? "/" : "") + hit["_source"]["name"] + ext(hit);
return $.jsonPost("/tag/" + hit["_source"]["index"], {
delete: false,

View File

@@ -12,7 +12,7 @@
<nav class="navbar navbar-expand-lg">
<a class="navbar-brand" href="/">sist2</a>
<span class="badge badge-pill version">2.8.4</span>
<span class="badge badge-pill version">2.8.5</span>
<span class="tagline">Lightning-fast file system indexer and search tool </span>
<a class="btn ml-auto" href="/stats">Stats</a>
<button class="btn" type="button" data-toggle="modal" data-target="#settings" onclick="loadSettings()">Settings

View File

@@ -10,7 +10,7 @@
<nav class="navbar navbar-expand-lg">
<a class="navbar-brand" href="/">sist2</a>
<span class="badge badge-pill version">2.8.4</span>
<span class="badge badge-pill version">2.8.5</span>
<span class="tagline">Lightning-fast file system indexer and search tool </span>
<a style="margin-left: auto" class="btn" href="/">Back</a>
<button class="btn" type="button" data-toggle="modal" data-target="#settings"

View File

@@ -20,6 +20,7 @@ typedef struct index_t {
struct index_descriptor desc;
struct store_t *store;
struct store_t *tag_store;
struct store_t *meta_store;
char path[PATH_MAX];
} index_t;

File diff suppressed because one or more lines are too long