mirror of
https://github.com/simon987/sist2.git
synced 2025-12-12 23:18:51 +00:00
refactor index schema, remove sidecar parsing, remove TS
This commit is contained in:
@@ -61,4 +61,6 @@ unsigned int mime_get_mime_by_ext(const char *ext);
|
||||
|
||||
unsigned int mime_get_mime_by_string(const char *str);
|
||||
|
||||
unsigned int* get_mime_ids();
|
||||
|
||||
#endif
|
||||
|
||||
@@ -365,7 +365,6 @@ model_vnd_gdl=65893,
|
||||
model_vnd_gs_gdl=65894,
|
||||
model_vrml=65895,
|
||||
model_x_pov=65896,
|
||||
sist2_sidecar=2,
|
||||
text_PGP=590185,
|
||||
text_asp=590186,
|
||||
text_css=590187,
|
||||
@@ -909,7 +908,6 @@ case image_x_sony_arw: return "image/x-sony-arw";
|
||||
case image_x_sony_sr2: return "image/x-sony-sr2";
|
||||
case image_x_sony_srf: return "image/x-sony-srf";
|
||||
case image_x_epson_erf: return "image/x-epson-erf";
|
||||
case sist2_sidecar: return "sist2/sidecar";
|
||||
default: return NULL;}}
|
||||
unsigned int mime_extension_lookup(unsigned long extension_crc32) {switch (extension_crc32) {
|
||||
case 2495639202:return application_x_matlab_data;
|
||||
@@ -1293,7 +1291,6 @@ case 1698465774:return image_x_sony_arw;
|
||||
case 2083014127:return image_x_sony_sr2;
|
||||
case 271503362:return image_x_sony_srf;
|
||||
case 142938048:return image_x_epson_erf;
|
||||
case 287571459:return sist2_sidecar;
|
||||
default: return 0;}}
|
||||
unsigned int mime_name_lookup(unsigned long mime_crc32) {switch (mime_crc32) {
|
||||
case 3272851765: return application_x_matlab_data;
|
||||
@@ -1747,6 +1744,7 @@ case 3060720351: return image_x_sony_arw;
|
||||
case 2944016606: return image_x_sony_sr2;
|
||||
case 3279729971: return image_x_sony_srf;
|
||||
case 1665206815: return image_x_epson_erf;
|
||||
case 521139448: return sist2_sidecar;
|
||||
default: return 0;}}
|
||||
unsigned int mime_ids[] = {655530,655363,655364,655365,655366,655362,655361,655367,655368,655369,655370,655371,655372 | 0x40000000,655373,655374,655375,655376 | 0x08000000,655377,655378,655379,655380,655382,655381,655383,655384,655390,655385,655386,655387,655388,655389,655391,655392,655393,655394,655395 | 0x40000000,655396,655397,655398,655399,655400,655401,655402,655403,655404,655405,655406,655407,655408,655411,655412,655413,655414,655415,655416,655417,655418,655419 | 0x20000000,655421,655422,655423,655424,655425,655426,655427,655428,655429,655430,655431,655432 | 0x04000000,655433 | 0x04000000,655434 | 0x04000000,655435,655436,655437,655438,655439,655440,655441,655442,655443,655444,655445,655446 | 0x10000000,655447,655448,655449 | 0x10000000,655450,655451,655452,655453,655454,655455,655456,655457,655458,655459,655461 | 0x08000000,655460,655462,655463,655464,655465,655466,655467,655468,655469,655470,655471,655472,655473,655474,655475,655476,655477,655478,655479,655480,1,655481,655482,655483,655484,655485,655486,655487,655488,655489 | 0x20000000,655490,655491,655492,655493,655494,655495,655496,655497,655498,655499,655500,655501,655502,655503,655504,655505,655506,655507,655508,655509,655510,655511,655512,655513,655514,655515,655516,655517,655519,655518 | 0x08000000,655521,655520,655522 | 0x08000000,655523 | 0x08000000,655524 | 0x08000000,655525,655526,655527,655528,655529,655531,655532,655533,655534,655535,655599,655536 | 0x02000000,655409 | 0x02000000,655540,655537,655538,655539,655541,655542,655543,655544,655545,655546,655547,655548,655549,655550,655552,655551,655553,655554,655555,655556,655557,655558,655559,655560,655561,655562 | 0x10000000,655563,655564,655565,655566,655567,655569,655568,655570,655571,655572,655573,655574,655575,655576,655577,655578 | 0x10000000,655579,655580,655581,655583,655582,655584,655585,655586,655587,655588,655589,655590,655591,655592,655593,655594,655595 | 0x08000000,655596,655597 | 0x08000000,655600 | 0x10000000,655601,458994 | 0x80000000,458995,458996,458998,458997,458999,459000,459001,459002,459003,459004,459005,459006,459007,459008,459009,459010,459011,459012,459013,459014,459015,459016,459017,459018,459030,459019,459020,459021,459022,459023,459025,459024,459026,459027,459029 | 0x80000000,459028 | 0x80000000,327959 | 0x20000000,327960 | 0x20000000,327962 | 0x20000000,327961 | 0x20000000,524571,524572,524573,524574,524575,524576,524577,524578,524579,524580,524581,524582,524583,524584 | 0x80000000,524585 | 0x80000000,524586,524587 | 0x80000000,524588 | 0x80000000,524589,524590,524591,524592,524593,524594,524595,524596,524597,524599,524602,524603,524605,524606,524608,524610,524611,524612 | 0x80000000,524613,524614,524619,524620,524624,524626,524627,524628,524629,524630,524631,524636,524637,524638,524639 | 0x80000000,524640 | 0x80000000,524641,196962,196963,65892,65893,65894,65895,65896,590186,590187,590189 | 0x01000000,590190,590191,590192,590185,590193,590231,590188,655410,590194,590195,590196,590197,590198,590199,590200,590201,590203,590202,590204,590205,590206,590207,590208,590209,590210,590211,590212,590213,590214,590215,590216,590217,590219,590220,590244 | 0x01000000,590218,590222,590221,590223,590224,590225,590226,590227,590228,590229,590230,590232,590233,590234,590235 | 0x01000000,590236,590237,590238,590239,590240,590241,590242,590243,393638,393639,393640,393637,393641,393642,393643,393644,393645,393646,393647,393648,393649,393650,393651,393652,393653,393654,393655,393656,393657 | 0x80000000,393658,393659,393660,393661,393662,393663,393664,393665,721346,655598,655420,524622 | 0x00800000,524621 | 0x00800000,524609 | 0x00800000,524623 | 0x00800000,524598 | 0x00800000,524600 | 0x00800000,524601 | 0x00800000,524604 | 0x00800000,524615 | 0x00800000,524616 | 0x00800000,524617 | 0x00800000,524618 | 0x00800000,524625 | 0x00800000,524632 | 0x00800000,524633 | 0x00800000,524634 | 0x00800000,524635 | 0x00800000,524607 | 0x00800000,0};
|
||||
unsigned int* get_mime_ids() { return mime_ids; }
|
||||
#endif
|
||||
|
||||
@@ -4,10 +4,8 @@
|
||||
#include "src/ctx.h"
|
||||
#include "mime.h"
|
||||
#include "src/io/serialize.h"
|
||||
#include "src/parsing/sidecar.h"
|
||||
#include "src/parsing/fs_util.h"
|
||||
#include "src/parsing/magic_util.h"
|
||||
#include <pthread.h>
|
||||
|
||||
|
||||
#define MIN_VIDEO_SIZE (1024 * 64)
|
||||
@@ -27,7 +25,6 @@ typedef enum {
|
||||
FILETYPE_OOXML,
|
||||
FILETYPE_COMIC,
|
||||
FILETYPE_MOBI,
|
||||
FILETYPE_SIST2_SIDECAR,
|
||||
FILETYPE_MSDOC,
|
||||
FILETYPE_JSON,
|
||||
FILETYPE_NDJSON,
|
||||
@@ -63,8 +60,6 @@ file_type_t get_file_type(unsigned int mime, size_t size, const char *filepath)
|
||||
return FILETYPE_COMIC;
|
||||
} else if (IS_MOBI(mime)) {
|
||||
return FILETYPE_MOBI;
|
||||
} else if (mime == MIME_SIST2_SIDECAR) {
|
||||
return FILETYPE_SIST2_SIDECAR;
|
||||
} else if (is_msdoc(&ScanCtx.msdoc_ctx, mime)) {
|
||||
return FILETYPE_MSDOC;
|
||||
} else if (is_json(&ScanCtx.json_ctx, mime)) {
|
||||
@@ -157,7 +152,8 @@ void parse(parse_job_t *job) {
|
||||
doc->size = job->vfile.st_size;
|
||||
doc->mtime = MAX(job->vfile.mtime, 0);
|
||||
doc->mime = get_mime(job);
|
||||
generate_doc_id(doc->filepath + ScanCtx.index.desc.root_len, doc->doc_id);
|
||||
doc->thumbnail_count = 0;
|
||||
strcpy(doc->parent, job->parent);
|
||||
|
||||
if (doc->mime == GET_MIME_ERROR_FATAL) {
|
||||
CLOSE_FILE(job->vfile)
|
||||
@@ -165,16 +161,12 @@ void parse(parse_job_t *job) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (database_mark_document(ProcData.index_db, doc->doc_id, doc->mtime)) {
|
||||
if (database_mark_document(ProcData.index_db, doc->filepath + ScanCtx.index.desc.root_len, doc->mtime)) {
|
||||
CLOSE_FILE(job->vfile)
|
||||
free(doc);
|
||||
return;
|
||||
}
|
||||
|
||||
if (LogCtx.very_verbose) {
|
||||
LOG_DEBUGF(job->filepath, "Starting parse job {%s}", doc->doc_id);
|
||||
}
|
||||
|
||||
switch (get_file_type(doc->mime, doc->size, doc->filepath)) {
|
||||
case FILETYPE_RAW:
|
||||
parse_raw(&ScanCtx.raw_ctx, &job->vfile, doc);
|
||||
@@ -195,6 +187,10 @@ void parse(parse_job_t *job) {
|
||||
parse_font(&ScanCtx.font_ctx, &job->vfile, doc);
|
||||
break;
|
||||
case FILETYPE_ARCHIVE:
|
||||
|
||||
// Insert the document now so that the children documents can link to an existing ID
|
||||
database_write_document(ProcData.index_db, doc, NULL);
|
||||
|
||||
parse_archive(&ScanCtx.arc_ctx, &job->vfile, doc, ScanCtx.exclude, ScanCtx.exclude_extra);
|
||||
break;
|
||||
case FILETYPE_OOXML:
|
||||
@@ -206,11 +202,6 @@ void parse(parse_job_t *job) {
|
||||
case FILETYPE_MOBI:
|
||||
parse_mobi(&ScanCtx.mobi_ctx, &job->vfile, doc);
|
||||
break;
|
||||
case FILETYPE_SIST2_SIDECAR:
|
||||
parse_sidecar(&job->vfile, doc);
|
||||
CLOSE_FILE(job->vfile)
|
||||
free(doc);
|
||||
return;
|
||||
case FILETYPE_MSDOC:
|
||||
parse_msdoc(&ScanCtx.msdoc_ctx, &job->vfile, doc);
|
||||
break;
|
||||
@@ -225,14 +216,6 @@ void parse(parse_job_t *job) {
|
||||
break;
|
||||
}
|
||||
|
||||
//Parent meta
|
||||
if (job->parent[0] != '\0') {
|
||||
meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + SIST_INDEX_ID_LEN);
|
||||
meta_parent->key = MetaParent;
|
||||
strcpy(meta_parent->str_val, job->parent);
|
||||
APPEND_META((doc), meta_parent);
|
||||
}
|
||||
|
||||
CLOSE_FILE(job->vfile)
|
||||
|
||||
if (job->vfile.has_checksum) {
|
||||
|
||||
@@ -1,40 +0,0 @@
|
||||
#include "sidecar.h"
|
||||
|
||||
#include "src/ctx.h"
|
||||
|
||||
void parse_sidecar(vfile_t *vfile, document_t *doc) {
|
||||
|
||||
LOG_DEBUGF("sidecar.c", "Parsing sidecar file %s", vfile->filepath);
|
||||
|
||||
size_t size;
|
||||
char *buf = read_all(vfile, &size);
|
||||
if (buf == NULL) {
|
||||
LOG_ERRORF("sidecar.c", "Read error for %s", vfile->filepath);
|
||||
return;
|
||||
}
|
||||
|
||||
buf = realloc(buf, size + 1);
|
||||
*(buf + size) = '\0';
|
||||
|
||||
cJSON *json = cJSON_Parse(buf);
|
||||
if (json == NULL) {
|
||||
LOG_ERRORF("sidecar.c", "Could not parse JSON sidecar %s", vfile->filepath);
|
||||
return;
|
||||
}
|
||||
char *json_str = cJSON_PrintUnformatted(json);
|
||||
|
||||
char assoc_doc_id[SIST_DOC_ID_LEN];
|
||||
|
||||
char rel_path[PATH_MAX];
|
||||
size_t rel_path_len = doc->ext - 1 - ScanCtx.index.desc.root_len;
|
||||
memcpy(rel_path, vfile->filepath + ScanCtx.index.desc.root_len, rel_path_len);
|
||||
*(rel_path + rel_path_len) = '\0';
|
||||
|
||||
generate_doc_id(rel_path, assoc_doc_id);
|
||||
|
||||
database_write_document_sidecar(ProcData.index_db, assoc_doc_id, json_str);
|
||||
|
||||
cJSON_Delete(json);
|
||||
free(json_str);
|
||||
free(buf);
|
||||
}
|
||||
@@ -1,8 +0,0 @@
|
||||
#ifndef SIST2_SIDECAR_H
|
||||
#define SIST2_SIDECAR_H
|
||||
|
||||
#include "src/sist.h"
|
||||
|
||||
void parse_sidecar(vfile_t *vfile, document_t *doc);
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user