mirror of
https://github.com/simon987/sist2.git
synced 2025-04-19 10:16:42 +00:00
Archive file support
This commit is contained in:
parent
9778acda77
commit
be23201210
@ -41,7 +41,7 @@ if (WITH_SIST2)
|
||||
|
||||
# utf8.h
|
||||
utf8.h/utf8.h
|
||||
)
|
||||
src/parsing/arc.c src/parsing/arc.h)
|
||||
endif ()
|
||||
|
||||
if (WITH_SIST2_SCAN)
|
||||
@ -74,7 +74,7 @@ if (WITH_SIST2_SCAN)
|
||||
|
||||
# utf8.h
|
||||
utf8.h/utf8.h
|
||||
)
|
||||
src/parsing/arc.c src/parsing/arc.h)
|
||||
endif ()
|
||||
|
||||
find_package(PkgConfig REQUIRED)
|
||||
@ -160,6 +160,7 @@ if (WITH_SIST2)
|
||||
${PROJECT_SOURCE_DIR}/lib/libharfbuzz.a
|
||||
${PROJECT_SOURCE_DIR}/lib/libopenjp2.a
|
||||
freetype
|
||||
archive
|
||||
)
|
||||
|
||||
endif ()
|
||||
@ -228,6 +229,11 @@ if (WITH_SIST2_SCAN)
|
||||
${PROJECT_SOURCE_DIR}/lib/libharfbuzz.a
|
||||
${PROJECT_SOURCE_DIR}/lib/libopenjp2.a
|
||||
freetype
|
||||
|
||||
${PROJECT_SOURCE_DIR}/lib/libarchive.a
|
||||
${PROJECT_SOURCE_DIR}/lib/liblz4.a
|
||||
${PROJECT_SOURCE_DIR}/lib/liblzma.a
|
||||
${PROJECT_SOURCE_DIR}/lib/libzstd.a
|
||||
)
|
||||
endif ()
|
||||
|
||||
|
25
README.md
25
README.md
@ -15,9 +15,11 @@ sist2 (Simple incremental search tool)
|
||||
* Generates thumbnails\*
|
||||
* Incremental scanning
|
||||
* Automatic tagging from file attributes via [user scripts](scripting/README.md)
|
||||
* Recursive scan inside archive files \*\*
|
||||
|
||||
|
||||
\* See [format support](#format-support)
|
||||
\* See [format support](#format-support)
|
||||
\** See [Archive files](#archive-files)
|
||||
|
||||
## Getting Started
|
||||
|
||||
@ -33,8 +35,6 @@ sist2 (Simple incremental search tool)
|
||||
|
||||
## Example usage
|
||||
|
||||

|
||||
|
||||
See help page `sist2 --help` for more details.
|
||||
|
||||
**Scan a directory**
|
||||
@ -91,10 +91,25 @@ pdf,xps,cbz,fb2,epub | MuPDF | yes | yes, `png` | title |
|
||||
`image/*` | ffmpeg | - | yes, `jpeg` | `EXIF:Artist`, `EXIF:ImageDescription` |
|
||||
ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
|
||||
`text/plain` | *(none)* | yes | no | - |
|
||||
tar, zip, rar, 7z, ar ... | Libarchive | *planned* | - | no |
|
||||
docx, xlsx, pptx | | *planned* | no | *planned* |
|
||||
tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no |
|
||||
docx, xlsx, pptx | | yes | no | *planned* |
|
||||
|
||||
\* *See [Archive files](#archive-files)*
|
||||
|
||||
### Archive files
|
||||
**sist2** will scan files stored into archive files (zip, tar, 7z...) as if
|
||||
they were directly in the file system. Recursive (archives inside archives)
|
||||
scan is also supported.
|
||||
|
||||
**Limitations**:
|
||||
* Parsing media files with formats that require
|
||||
*seek* (e.g. `.gif`, `.mp4` w/ fragmented metadata etc.) is not supported.
|
||||
* Archive files are scanned sequentially, by a single thread. On systems where
|
||||
**sist2** is not I/O bound, scans might be faster when larger archives are split
|
||||
into smaller parts.
|
||||
|
||||
To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
|
||||
|
||||
|
||||
|
||||
## Build from source
|
||||
|
@ -1 +0,0 @@
|
||||
Subproject commit 288acf97a15d558f96c24c89f578b724d6e06b0c
|
@ -1 +0,0 @@
|
||||
Subproject commit 53c21c2d6bebba887be9a30de204875fb41b1169
|
@ -1 +0,0 @@
|
||||
Subproject commit 878e3588a3349c2660b0f9aa6d94a994034d7c10
|
@ -1 +0,0 @@
|
||||
Subproject commit 1249b5cd02c3b6fb9b917d16c76bc76c862932b6
|
@ -1 +0,0 @@
|
||||
Subproject commit 355cedaefe68358ad533ffb6a59bbb4e6444267a
|
@ -1 +0,0 @@
|
||||
Subproject commit d8d4cc9290982e1fdd254377ff62d8175f9c6059
|
@ -1 +0,0 @@
|
||||
Subproject commit 5875a6b44618fb7dfd5cd6d742533eaee2014060
|
7
mime.csv
7
mime.csv
@ -410,4 +410,9 @@ text/PGP,
|
||||
audio/x-hx-aac-adts,
|
||||
application/x-chrome-extension,
|
||||
image/heic, heic
|
||||
image/x-gem,
|
||||
image/x-gem,
|
||||
application/x-lzma, lzma
|
||||
application/warc, warc
|
||||
application/x-lz4, lz4
|
||||
application/x-lzip, lz
|
||||
application/x-lzop, lzo
|
||||
|
|
@ -67,5 +67,39 @@ make -j 4
|
||||
cd ..
|
||||
mv libmagic/src/.libs/libmagic.a .
|
||||
|
||||
# libarchive
|
||||
git clone https://github.com/libarchive/libarchive
|
||||
cd libarchive/build
|
||||
./autogen.sh
|
||||
cd ..
|
||||
./configure --without-nettle --without-expat --without-xml2 --without-openssl
|
||||
make -j 4
|
||||
cd ..
|
||||
mv libarchive/.libs/libarchive.a .
|
||||
|
||||
# lz4
|
||||
git clone https://github.com/lz4/lz4
|
||||
cd lz4
|
||||
make -j 4
|
||||
cd ..
|
||||
mv lz4/lib/liblz4.a .
|
||||
|
||||
# lzma
|
||||
wget https://newcontinuum.dl.sourceforge.net/project/lzmautils/xz-5.2.3.tar.gz
|
||||
tar -xzf xz-5.2.3.tar.gz
|
||||
rm xz-5.2.3.tar.gz
|
||||
cd xz-5.2.3
|
||||
./autogen.sh
|
||||
./configure
|
||||
make -j 4
|
||||
cd ..
|
||||
mv xz-5.2.3/src/liblzma/.libs/liblzma.a .
|
||||
|
||||
# zstd
|
||||
git clone https://github.com/facebook/zstd
|
||||
cd zstd
|
||||
make -j 4
|
||||
cd ..
|
||||
mv zstd/lib/libzstd.a .
|
||||
|
||||
cd ..
|
||||
|
@ -34,6 +34,28 @@ font = (
|
||||
"font/woff2"
|
||||
)
|
||||
|
||||
# Archive "formats"
|
||||
archive = (
|
||||
"application/x-tar",
|
||||
"application/zip",
|
||||
"application/x-rar",
|
||||
"application/x-arc",
|
||||
"application/x-warc",
|
||||
"application/x-7z-compressed",
|
||||
)
|
||||
|
||||
# Archive "filters"
|
||||
arc_filter = (
|
||||
"application/gzip",
|
||||
"application/x-bzip2",
|
||||
"application/x-xz",
|
||||
"application/x-zstd",
|
||||
"application/x-lzma",
|
||||
"application/x-lz4",
|
||||
"application/x-lzip",
|
||||
"application/x-lzop",
|
||||
)
|
||||
|
||||
cnt = 1
|
||||
|
||||
|
||||
@ -48,6 +70,10 @@ def mime_id(mime):
|
||||
mime_id += " | 0x40000000"
|
||||
elif mime in font:
|
||||
mime_id += " | 0x20000000"
|
||||
elif mime in archive:
|
||||
mime_id += " | 0x10000000"
|
||||
elif mime in arc_filter:
|
||||
mime_id += " | 0x08000000"
|
||||
elif mime == "application/x-empty":
|
||||
return "1"
|
||||
return mime_id
|
||||
|
21
src/cli.c
21
src/cli.c
@ -1,7 +1,7 @@
|
||||
#include "cli.h"
|
||||
|
||||
#define DEFAULT_OUTPUT "index.sist2/"
|
||||
#define DEFAULT_CONTENT_SIZE 4096
|
||||
#define DEFAULT_CONTENT_SIZE 32768
|
||||
#define DEFAULT_QUALITY 5
|
||||
#define DEFAULT_SIZE 500
|
||||
#define DEFAULT_REWRITE_URL ""
|
||||
@ -35,6 +35,7 @@ void scan_args_destroy(scan_args_t *args) {
|
||||
}
|
||||
|
||||
#ifndef SIST_SCAN_ONLY
|
||||
|
||||
void index_args_destroy(index_args_t *args) {
|
||||
//todo
|
||||
free(args);
|
||||
@ -44,6 +45,7 @@ void web_args_destroy(web_args_t *args) {
|
||||
//todo
|
||||
free(args);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
@ -119,10 +121,24 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
if (args->rewrite_url == NULL) {
|
||||
args->rewrite_url = DEFAULT_REWRITE_URL;
|
||||
}
|
||||
|
||||
if (args->archive == NULL || strcmp(args->archive, "recurse") == 0) {
|
||||
args->archive_mode = ARC_MODE_RECURSE;
|
||||
} else if (strcmp(args->archive, "list") == 0) {
|
||||
args->archive_mode = ARC_MODE_LIST;
|
||||
} else if (strcmp(args->archive, "shallow") == 0) {
|
||||
args->archive_mode = ARC_MODE_SHALLOW;
|
||||
} else if (strcmp(args->archive, "skip") == 0) {
|
||||
args->archive_mode = ARC_MODE_SKIP;
|
||||
} else {
|
||||
fprintf(stderr, "Archive mode must be one of (skip, list, shallow, recurse), got '%s'", args->archive);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifndef SIST_SCAN_ONLY
|
||||
|
||||
int index_args_validate(index_args_t *args, int argc, const char **argv) {
|
||||
|
||||
if (argc < 2) {
|
||||
@ -196,7 +212,7 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
|
||||
}
|
||||
|
||||
if (args->credentials != NULL) {
|
||||
args->b64credentials = onion_base64_encode(args->credentials, (int)strlen(args->credentials));
|
||||
args->b64credentials = onion_base64_encode(args->credentials, (int) strlen(args->credentials));
|
||||
//Remove trailing newline
|
||||
*(args->b64credentials + strlen(args->b64credentials) - 1) = '\0';
|
||||
}
|
||||
@ -223,5 +239,6 @@ web_args_t *web_args_create() {
|
||||
web_args_t *args = calloc(sizeof(web_args_t), 1);
|
||||
return args;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -14,6 +14,8 @@ typedef struct scan_args {
|
||||
char *name;
|
||||
int depth;
|
||||
char *path;
|
||||
char *archive;
|
||||
archive_mode_t archive_mode;
|
||||
} scan_args_t;
|
||||
|
||||
scan_args_t *scan_args_create();
|
||||
|
@ -16,6 +16,7 @@ struct {
|
||||
int content_size;
|
||||
float tn_qscale;
|
||||
int depth;
|
||||
archive_mode_t archive_mode;
|
||||
|
||||
size_t stat_tn_size;
|
||||
size_t stat_index_size;
|
||||
|
@ -111,6 +111,8 @@ char *get_meta_key_text(enum metakey meta_key) {
|
||||
return "title";
|
||||
case MetaFontName:
|
||||
return "font_name";
|
||||
case MetaParent:
|
||||
return "parent";
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
@ -247,6 +249,7 @@ void read_index(const char *path, const char index_id[UUID_STR_LEN], index_func
|
||||
case MetaAlbumArtist:
|
||||
case MetaGenre:
|
||||
case MetaFontName:
|
||||
case MetaParent:
|
||||
case MetaTitle: {
|
||||
buf.cur = 0;
|
||||
while ((c = getc(file)) != 0) {
|
||||
|
@ -1,7 +1,8 @@
|
||||
#include "walk.h"
|
||||
#include "src/ctx.h"
|
||||
|
||||
parse_job_t *create_parse_job(const char *filepath, const struct stat *info, int base) {
|
||||
__always_inline
|
||||
parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info, int base) {
|
||||
int len = (int) strlen(filepath);
|
||||
parse_job_t *job = malloc(sizeof(parse_job_t) + len);
|
||||
|
||||
@ -14,14 +15,22 @@ parse_job_t *create_parse_job(const char *filepath, const struct stat *info, int
|
||||
job->ext = len;
|
||||
}
|
||||
|
||||
memcpy(&(job->info), info, sizeof(struct stat));
|
||||
job->info = *info;
|
||||
|
||||
memset(job->parent, 0, 16);
|
||||
|
||||
job->vfile.filepath = job->filepath;
|
||||
job->vfile.read = fs_read;
|
||||
job->vfile.close = fs_close;
|
||||
job->vfile.fd = -1;
|
||||
job->vfile.is_fs_file = TRUE;
|
||||
|
||||
return job;
|
||||
}
|
||||
|
||||
int handle_entry(const char *filepath, const struct stat *info, int typeflag, struct FTW *ftw) {
|
||||
if (ftw->level <= ScanCtx.depth && typeflag == FTW_F && S_ISREG(info->st_mode)) {
|
||||
parse_job_t *job = create_parse_job(filepath, info, ftw->base);
|
||||
parse_job_t *job = create_fs_parse_job(filepath, info, ftw->base);
|
||||
tpool_add_work(ScanCtx.pool, parse, job);
|
||||
}
|
||||
|
||||
|
10
src/main.c
10
src/main.c
@ -10,7 +10,7 @@
|
||||
#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
|
||||
|
||||
|
||||
static const char *const Version = "1.1.9";
|
||||
static const char *const Version = "1.1.10";
|
||||
static const char *const usage[] = {
|
||||
"sist2 scan [OPTION]... PATH",
|
||||
"sist2 index [OPTION]... INDEX",
|
||||
@ -51,7 +51,7 @@ void scan_print_header() {
|
||||
}
|
||||
|
||||
if (ScanCtx.content_size > 0) {
|
||||
printf("content_size\t%d B\n", ScanCtx.content_size);
|
||||
printf("content_size\t\t%d B\n", ScanCtx.content_size);
|
||||
} else {
|
||||
printf("content_size\t\t\tdisabled\n");
|
||||
}
|
||||
@ -66,6 +66,7 @@ void sist2_scan(scan_args_t *args) {
|
||||
ScanCtx.content_size = args->content_size;
|
||||
ScanCtx.threads = args->threads;
|
||||
ScanCtx.depth = args->depth;
|
||||
ScanCtx.archive_mode = args->archive_mode;
|
||||
strncpy(ScanCtx.index.path, args->output, sizeof(ScanCtx.index.path));
|
||||
strncpy(ScanCtx.index.desc.name, args->name, sizeof(ScanCtx.index.desc.name));
|
||||
strncpy(ScanCtx.index.desc.root, args->path, sizeof(ScanCtx.index.desc.root));
|
||||
@ -242,7 +243,7 @@ int main(int argc, const char *argv[]) {
|
||||
OPT_INTEGER(0, "size", &scan_args->size,
|
||||
"Thumbnail size, in pixels. Use negative value to disable. DEFAULT=500"),
|
||||
OPT_INTEGER(0, "content-size", &scan_args->content_size,
|
||||
"Number of bytes to be extracted from text documents. Use negative value to disable. DEFAULT=4096"),
|
||||
"Number of bytes to be extracted from text documents. Use negative value to disable. DEFAULT=32768"),
|
||||
OPT_STRING(0, "incremental", &scan_args->incremental,
|
||||
"Reuse an existing index and only scan modified files."),
|
||||
OPT_STRING('o', "output", &scan_args->output, "Output directory. DEFAULT=index.sist2/"),
|
||||
@ -250,6 +251,9 @@ int main(int argc, const char *argv[]) {
|
||||
OPT_STRING(0, "name", &scan_args->name, "Index display name. DEFAULT: (name of the directory)"),
|
||||
OPT_INTEGER(0, "depth", &scan_args->depth, "Scan up to DEPTH subdirectories deep. "
|
||||
"Use 0 to only scan files in PATH. DEFAULT: -1"),
|
||||
OPT_STRING(0, "archive", &scan_args->archive, "Archive file mode (skip|list|shallow|recurse). "
|
||||
"skip: Don't parse, list: only get file names as text, "
|
||||
"shallow: Don't parse archives inside archives. DEFAULT: recurse"),
|
||||
|
||||
#ifndef SIST_SCAN_ONLY
|
||||
OPT_GROUP("Index options"),
|
||||
|
152
src/parsing/arc.c
Normal file
152
src/parsing/arc.c
Normal file
@ -0,0 +1,152 @@
|
||||
#include "arc.h"
|
||||
#include "src/ctx.h"
|
||||
|
||||
#define ARC_BUF_SIZE 8192
|
||||
|
||||
int should_parse_filtered_file(const char *filepath, int ext) {
|
||||
char tmp[PATH_MAX * 2];
|
||||
|
||||
if (ext == 0) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
memcpy(tmp, filepath, ext - 1);
|
||||
*(tmp + ext - 1) = '\0';
|
||||
|
||||
char *idx = strrchr(tmp, '.');
|
||||
|
||||
if (idx == NULL) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if (strcmp(idx, ".tar") == 0) {
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
int arc_read(struct vfile *f, void *buf, size_t size) {
|
||||
return archive_read_data(f->arc, buf, size);
|
||||
}
|
||||
|
||||
typedef struct arc_data {
|
||||
vfile_t *f;
|
||||
char buf[ARC_BUF_SIZE];
|
||||
} arc_data_f;
|
||||
|
||||
int vfile_open_callback(struct archive *a, void *user_data) {
|
||||
arc_data_f *data = user_data;
|
||||
|
||||
if (data->f->is_fs_file && data->f->fd == -1) {
|
||||
data->f->fd = open(data->f->filepath, O_RDONLY);
|
||||
}
|
||||
|
||||
return ARCHIVE_OK;
|
||||
}
|
||||
|
||||
long vfile_read_callback(struct archive *a, void *user_data, const void **buf) {
|
||||
arc_data_f *data = user_data;
|
||||
|
||||
*buf = data->buf;
|
||||
return data->f->read(data->f, data->buf, ARC_BUF_SIZE);
|
||||
}
|
||||
|
||||
int vfile_close_callback(struct archive *a, void *user_data) {
|
||||
arc_data_f *data = user_data;
|
||||
|
||||
if (data->f->close != NULL) {
|
||||
data->f->close(data->f);
|
||||
}
|
||||
|
||||
return ARCHIVE_OK;
|
||||
}
|
||||
|
||||
void parse_archive(vfile_t *f, document_t *doc) {
|
||||
|
||||
struct archive *a;
|
||||
struct archive_entry *entry;
|
||||
|
||||
a = archive_read_new();
|
||||
|
||||
archive_read_support_filter_all(a);
|
||||
archive_read_support_format_all(a);
|
||||
|
||||
arc_data_f data;
|
||||
data.f = f;
|
||||
|
||||
int ret = 0;
|
||||
if (data.f->is_fs_file) {
|
||||
ret = archive_read_open_filename(a, doc->filepath, ARC_BUF_SIZE);
|
||||
} else if (ScanCtx.archive_mode == ARC_MODE_RECURSE) {
|
||||
ret = archive_read_open(
|
||||
a, &data,
|
||||
vfile_open_callback,
|
||||
vfile_read_callback,
|
||||
vfile_close_callback
|
||||
);
|
||||
} else {
|
||||
archive_read_free(a);
|
||||
return;
|
||||
}
|
||||
|
||||
if (ret != ARCHIVE_OK) {
|
||||
fprintf(stderr, "OPEN[%d]:%s %s\n", ret, archive_error_string(a), doc->filepath);
|
||||
archive_read_free(a);
|
||||
return;
|
||||
}
|
||||
|
||||
if (ScanCtx.archive_mode == ARC_MODE_LIST) {
|
||||
|
||||
dyn_buffer_t buf = dyn_buffer_create();
|
||||
|
||||
while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
|
||||
if (S_ISREG(archive_entry_stat(entry)->st_mode)) {
|
||||
|
||||
char *path = (char *) archive_entry_pathname(entry);
|
||||
|
||||
dyn_buffer_append_string(&buf, path);
|
||||
dyn_buffer_write_char(&buf, '\n');
|
||||
}
|
||||
}
|
||||
dyn_buffer_write_char(&buf, '\0');
|
||||
|
||||
meta_line_t *meta_list = malloc(sizeof(meta_line_t) + buf.cur);
|
||||
meta_list->key = MetaContent;
|
||||
strcpy(meta_list->strval, buf.buf);
|
||||
APPEND_META(doc, meta_list);
|
||||
dyn_buffer_destroy(&buf);
|
||||
|
||||
} else {
|
||||
|
||||
parse_job_t *sub_job = malloc(sizeof(parse_job_t) + PATH_MAX * 2);
|
||||
|
||||
sub_job->vfile.close = NULL;
|
||||
sub_job->vfile.read = arc_read;
|
||||
sub_job->vfile.arc = a;
|
||||
sub_job->vfile.filepath = sub_job->filepath;
|
||||
sub_job->vfile.is_fs_file = FALSE;
|
||||
memcpy(sub_job->parent, doc->uuid, sizeof(uuid_t));
|
||||
|
||||
while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
|
||||
sub_job->info = *archive_entry_stat(entry);
|
||||
if (S_ISREG(sub_job->info.st_mode)) {
|
||||
sprintf(sub_job->filepath, "%s#/%s", f->filepath, archive_entry_pathname(entry));
|
||||
sub_job->base = (int) (strrchr(sub_job->filepath, '/') - sub_job->filepath) + 1;
|
||||
|
||||
char *p = strrchr(sub_job->filepath, '.');
|
||||
if (p != NULL) {
|
||||
sub_job->ext = (int) (p - sub_job->filepath + 1);
|
||||
} else {
|
||||
sub_job->ext = (int) strlen(sub_job->filepath);
|
||||
}
|
||||
|
||||
parse(sub_job);
|
||||
}
|
||||
}
|
||||
|
||||
free(sub_job);
|
||||
}
|
||||
|
||||
archive_read_free(a);
|
||||
}
|
12
src/parsing/arc.h
Normal file
12
src/parsing/arc.h
Normal file
@ -0,0 +1,12 @@
|
||||
#ifndef SIST2_ARC_H
|
||||
#define SIST2_ARC_H
|
||||
|
||||
#include "src/sist.h"
|
||||
|
||||
int should_parse_filtered_file(const char *filepath, int ext);
|
||||
|
||||
void parse_archive(vfile_t *f, document_t *doc);
|
||||
|
||||
int arc_read(struct vfile * f, void *buf, size_t size);
|
||||
|
||||
#endif
|
@ -2,6 +2,7 @@
|
||||
#include "src/ctx.h"
|
||||
|
||||
#define MIN_SIZE 32
|
||||
#define AVIO_BUF_SIZE 8192
|
||||
|
||||
__always_inline
|
||||
AVCodecContext *alloc_jpeg_encoder(int dstW, int dstH, float qscale) {
|
||||
@ -89,9 +90,9 @@ AVFrame *read_frame(AVFormatContext *pFormatCtx, AVCodecContext *decoder, int st
|
||||
int read_frame_ret = av_read_frame(pFormatCtx, &avPacket);
|
||||
|
||||
if (read_frame_ret != 0) {
|
||||
if (read_frame_ret != AVERROR_EOF) {
|
||||
fprintf(stderr, "Error reading frame: %d\n", read_frame_ret);
|
||||
}
|
||||
// if (read_frame_ret != AVERROR_EOF) {
|
||||
// fprintf(stderr, "Error reading frame: %d\n", read_frame_ret);
|
||||
// }
|
||||
av_frame_free(&frame);
|
||||
av_packet_unref(&avPacket);
|
||||
return NULL;
|
||||
@ -188,22 +189,11 @@ void append_video_meta(AVFormatContext *pFormatCtx, AVFrame *frame, document_t *
|
||||
}
|
||||
}
|
||||
|
||||
void parse_media(const char *filepath, document_t *doc) {
|
||||
void parse_media(AVFormatContext *pFormatCtx, document_t *doc) {
|
||||
|
||||
int video_stream = -1;
|
||||
int audio_stream = -1;
|
||||
|
||||
AVFormatContext *pFormatCtx = avformat_alloc_context();
|
||||
if (pFormatCtx == NULL) {
|
||||
fprintf(stderr, "Could not allocate AVFormatContext! %s \n", filepath);
|
||||
return;
|
||||
}
|
||||
int res = avformat_open_input(&pFormatCtx, filepath, NULL, NULL);
|
||||
if (res < 0) {
|
||||
fprintf(stderr, "media error: %s %s\n", filepath, av_err2str(res));
|
||||
return;
|
||||
}
|
||||
|
||||
avformat_find_stream_info(pFormatCtx, NULL);
|
||||
|
||||
for (int i = (int) pFormatCtx->nb_streams - 1; i >= 0; i--) {
|
||||
@ -314,3 +304,58 @@ void parse_media(const char *filepath, document_t *doc) {
|
||||
avformat_free_context(pFormatCtx);
|
||||
}
|
||||
|
||||
void parse_media_filename(const char *filepath, document_t *doc) {
|
||||
|
||||
AVFormatContext *pFormatCtx = avformat_alloc_context();
|
||||
if (pFormatCtx == NULL) {
|
||||
fprintf(stderr, "Could not allocate AVFormatContext! %s \n", filepath);
|
||||
return;
|
||||
}
|
||||
int res = avformat_open_input(&pFormatCtx, filepath, NULL, NULL);
|
||||
if (res < 0) {
|
||||
fprintf(stderr, "media error: %s %s\n", filepath, av_err2str(res));
|
||||
return;
|
||||
}
|
||||
|
||||
parse_media(pFormatCtx, doc);
|
||||
}
|
||||
|
||||
|
||||
int vfile_read(void *ptr, uint8_t *buf, int buf_size) {
|
||||
struct vfile *f = ptr;
|
||||
|
||||
int ret = f->read(f, buf, buf_size);
|
||||
|
||||
if (ret == 0) {
|
||||
return AVERROR_EOF;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
void parse_media_vfile(struct vfile *f, document_t *doc) {
|
||||
|
||||
AVFormatContext *pFormatCtx = avformat_alloc_context();
|
||||
if (pFormatCtx == NULL) {
|
||||
fprintf(stderr, "Could not allocate AVFormatContext! %s \n", f->filepath);
|
||||
return;
|
||||
}
|
||||
|
||||
unsigned char *buffer = (unsigned char *) av_malloc(AVIO_BUF_SIZE);
|
||||
AVIOContext *io_ctx = avio_alloc_context(buffer, AVIO_BUF_SIZE, 0, f, vfile_read, NULL, NULL);
|
||||
|
||||
pFormatCtx->pb = io_ctx;
|
||||
pFormatCtx->flags |= AVFMT_FLAG_CUSTOM_IO;
|
||||
|
||||
int res = avformat_open_input(&pFormatCtx, "", NULL, NULL);
|
||||
if (res == -5) {
|
||||
// Tried to parse media that requires seek
|
||||
return;
|
||||
} else if(res < 0) {
|
||||
fprintf(stderr, "media error: %s %s\n", f->filepath, av_err2str(res));
|
||||
return;
|
||||
}
|
||||
|
||||
parse_media(pFormatCtx, doc);
|
||||
av_free(io_ctx);
|
||||
}
|
||||
|
||||
|
@ -7,6 +7,8 @@
|
||||
#define MIN_VIDEO_SIZE 1024 * 64
|
||||
#define MIN_IMAGE_SIZE 1024 * 2
|
||||
|
||||
void parse_media(const char * filepath, document_t *doc);
|
||||
void parse_media_filename(const char * filepath, document_t *doc);
|
||||
|
||||
void parse_media_vfile(struct vfile *f, document_t *doc);
|
||||
|
||||
#endif
|
||||
|
@ -8,7 +8,7 @@
|
||||
#define MIME_EMPTY 1
|
||||
|
||||
#define DONT_PARSE 0x80000000
|
||||
#define SHOULD_PARSE(mime_id) (mime_id & DONT_PARSE) != DONT_PARSE
|
||||
#define SHOULD_PARSE(mime_id) (mime_id & DONT_PARSE) != DONT_PARSE && mime_id != 0
|
||||
|
||||
#define PDF_MASK 0x40000000
|
||||
#define IS_PDF(mime_id) (mime_id & PDF_MASK) == PDF_MASK
|
||||
@ -16,6 +16,12 @@
|
||||
#define FONT_MASK 0x20000000
|
||||
#define IS_FONT(mime_id) (mime_id & FONT_MASK) == FONT_MASK
|
||||
|
||||
#define ARC_MASK 0x10000000
|
||||
#define IS_ARC(mime_id) (mime_id & ARC_MASK) == ARC_MASK
|
||||
|
||||
#define ARC_FILTER_MASK 0x08000000
|
||||
#define IS_ARC_FILTER(mime_id) (mime_id & ARC_FILTER_MASK) == ARC_FILTER_MASK
|
||||
|
||||
enum major_mime {
|
||||
MimeInvalid = 0,
|
||||
MimeModel = 1,
|
||||
|
@ -20,7 +20,7 @@ enum mime {
|
||||
application_freeloader=655372,
|
||||
application_futuresplash=655373,
|
||||
application_groupwise=655374,
|
||||
application_gzip=655375,
|
||||
application_gzip=655375 | 0x08000000,
|
||||
application_hta=655376,
|
||||
application_i_deas=655377,
|
||||
application_iges=655378,
|
||||
@ -82,342 +82,346 @@ enum mime {
|
||||
application_vnd_xara=655434,
|
||||
application_vocaltec_media_desc=655435,
|
||||
application_vocaltec_media_file=655436,
|
||||
application_winhelp=655437,
|
||||
application_wordperfect=655438,
|
||||
application_wordperfect6_0=655439,
|
||||
application_wordperfect6_1=655440,
|
||||
application_x_123=655441,
|
||||
application_x_7z_compressed=655442,
|
||||
application_x_aim=655443,
|
||||
application_x_apple_diskimage=655444,
|
||||
application_x_arc=655445,
|
||||
application_x_archive=655446,
|
||||
application_x_atari_7800_rom=655447,
|
||||
application_x_authorware_bin=655448,
|
||||
application_x_authorware_map=655449,
|
||||
application_x_authorware_seg=655450,
|
||||
application_x_avira_qua=655451,
|
||||
application_x_bcpio=655452,
|
||||
application_x_bittorrent=655453,
|
||||
application_x_bsh=655454,
|
||||
application_x_bytecode_python=655455,
|
||||
application_x_bzip=655456,
|
||||
application_x_bzip2=655457,
|
||||
application_x_cbr=655458,
|
||||
application_x_cbz=655459 | 0x40000000,
|
||||
application_x_cdlink=655460,
|
||||
application_x_chat=655461,
|
||||
application_x_chrome_extension=655462,
|
||||
application_x_cocoa=655463,
|
||||
application_x_conference=655464,
|
||||
application_x_coredump=655465,
|
||||
application_x_cpio=655466,
|
||||
application_x_dbf=655467,
|
||||
application_x_dbt=655468,
|
||||
application_x_debian_package=655469,
|
||||
application_x_deepv=655470,
|
||||
application_x_director=655471,
|
||||
application_x_dmp=655472,
|
||||
application_x_dosdriver=655473,
|
||||
application_x_dosexec=655474,
|
||||
application_x_dvi=655475,
|
||||
application_x_elc=655476,
|
||||
application_warc=655437,
|
||||
application_winhelp=655438,
|
||||
application_wordperfect=655439,
|
||||
application_wordperfect6_0=655440,
|
||||
application_wordperfect6_1=655441,
|
||||
application_x_123=655442,
|
||||
application_x_7z_compressed=655443 | 0x10000000,
|
||||
application_x_aim=655444,
|
||||
application_x_apple_diskimage=655445,
|
||||
application_x_arc=655446 | 0x10000000,
|
||||
application_x_archive=655447,
|
||||
application_x_atari_7800_rom=655448,
|
||||
application_x_authorware_bin=655449,
|
||||
application_x_authorware_map=655450,
|
||||
application_x_authorware_seg=655451,
|
||||
application_x_avira_qua=655452,
|
||||
application_x_bcpio=655453,
|
||||
application_x_bittorrent=655454,
|
||||
application_x_bsh=655455,
|
||||
application_x_bytecode_python=655456,
|
||||
application_x_bzip=655457,
|
||||
application_x_bzip2=655458 | 0x08000000,
|
||||
application_x_cbr=655459,
|
||||
application_x_cbz=655460 | 0x40000000,
|
||||
application_x_cdlink=655461,
|
||||
application_x_chat=655462,
|
||||
application_x_chrome_extension=655463,
|
||||
application_x_cocoa=655464,
|
||||
application_x_conference=655465,
|
||||
application_x_coredump=655466,
|
||||
application_x_cpio=655467,
|
||||
application_x_dbf=655468,
|
||||
application_x_dbt=655469,
|
||||
application_x_debian_package=655470,
|
||||
application_x_deepv=655471,
|
||||
application_x_director=655472,
|
||||
application_x_dmp=655473,
|
||||
application_x_dosdriver=655474,
|
||||
application_x_dosexec=655475,
|
||||
application_x_dvi=655476,
|
||||
application_x_elc=655477,
|
||||
application_x_empty=1,
|
||||
application_x_envoy=655478,
|
||||
application_x_esrehber=655479,
|
||||
application_x_excel=655480,
|
||||
application_x_executable=655481,
|
||||
application_x_font_gdos=655482,
|
||||
application_x_font_pf2=655483,
|
||||
application_x_font_pfm=655484,
|
||||
application_x_font_sfn=655485,
|
||||
application_x_font_ttf=655486 | 0x20000000,
|
||||
application_x_freelance=655487,
|
||||
application_x_gamecube_rom=655488,
|
||||
application_x_gdbm=655489,
|
||||
application_x_gettext_translation=655490,
|
||||
application_x_git=655491,
|
||||
application_x_gsp=655492,
|
||||
application_x_gss=655493,
|
||||
application_x_gtar=655494,
|
||||
application_x_gzip=655495,
|
||||
application_x_hdf=655496,
|
||||
application_x_helpfile=655497,
|
||||
application_x_httpd_imap=655498,
|
||||
application_x_ima=655499,
|
||||
application_x_innosetup=655500,
|
||||
application_x_internett_signup=655501,
|
||||
application_x_inventor=655502,
|
||||
application_x_ip2=655503,
|
||||
application_x_java_applet=655504,
|
||||
application_x_java_commerce=655505,
|
||||
application_x_java_image=655506,
|
||||
application_x_java_jmod=655507,
|
||||
application_x_java_keystore=655508,
|
||||
application_x_kdelnk=655509,
|
||||
application_x_koan=655510,
|
||||
application_x_latex=655511,
|
||||
application_x_livescreen=655512,
|
||||
application_x_lotus=655513,
|
||||
application_x_lz4=655514,
|
||||
application_x_lz4_json=655515,
|
||||
application_x_lzh=655516,
|
||||
application_x_lzh_compressed=655517,
|
||||
application_x_lzx=655518,
|
||||
application_x_mach_binary=655519,
|
||||
application_x_mach_executable=655520,
|
||||
application_x_magic_cap_package_1_0=655521,
|
||||
application_x_mathcad=655522,
|
||||
application_x_maxis_dbpf=655523,
|
||||
application_x_meme=655524,
|
||||
application_x_midi=655525,
|
||||
application_x_mif=655526,
|
||||
application_x_mix_transfer=655527,
|
||||
application_x_mobipocket_ebook=655528,
|
||||
application_x_ms_compress_szdd=655529,
|
||||
application_x_ms_pdb=655530,
|
||||
application_x_ms_reader=655531,
|
||||
application_x_msaccess=655532,
|
||||
application_x_navi_animation=655533,
|
||||
application_x_navidoc=655534,
|
||||
application_x_navimap=655535,
|
||||
application_x_navistyle=655536,
|
||||
application_x_nes_rom=655537,
|
||||
application_x_netcdf=655538,
|
||||
application_x_newton_compatible_pkg=655539,
|
||||
application_x_nintendo_ds_rom=655540,
|
||||
application_x_object=655541,
|
||||
application_x_omc=655542,
|
||||
application_x_omcdatamaker=655543,
|
||||
application_x_omcregerator=655544,
|
||||
application_x_pagemaker=655545,
|
||||
application_x_pcl=655546,
|
||||
application_x_pgp_keyring=655547,
|
||||
application_x_pixclscript=655548,
|
||||
application_x_pkcs7_certreqresp=655549,
|
||||
application_x_pkcs7_signature=655550,
|
||||
application_x_project=655551,
|
||||
application_x_qpro=655552,
|
||||
application_x_rar=655553,
|
||||
application_x_rpm=655554,
|
||||
application_x_sdp=655555,
|
||||
application_x_sea=655556,
|
||||
application_x_seelogo=655557,
|
||||
application_x_setupscript=655558,
|
||||
application_x_shar=655559,
|
||||
application_x_sharedlib=655560,
|
||||
application_x_shockwave_flash=655561,
|
||||
application_x_snappy_framed=655562,
|
||||
application_x_sprite=655563,
|
||||
application_x_sqlite3=655564,
|
||||
application_x_sv4cpio=655565,
|
||||
application_x_sv4crc=655566,
|
||||
application_x_tar=655567,
|
||||
application_x_tbook=655568,
|
||||
application_x_terminfo=655569,
|
||||
application_x_terminfo2=655570,
|
||||
application_x_tex_tfm=655571,
|
||||
application_x_texinfo=655572,
|
||||
application_x_ustar=655573,
|
||||
application_x_visio=655574,
|
||||
application_x_vnd_audioexplosion_mzz=655575,
|
||||
application_x_vnd_ls_xpix=655576,
|
||||
application_x_vrml=655577,
|
||||
application_x_wais_source=655578,
|
||||
application_x_wine_extension_ini=655579,
|
||||
application_x_wintalk=655580,
|
||||
application_x_world=655581,
|
||||
application_x_wri=655582,
|
||||
application_x_x509_ca_cert=655583,
|
||||
application_x_xz=655584,
|
||||
application_x_zip=655585,
|
||||
application_x_zstd=655586,
|
||||
application_xml=655587,
|
||||
application_zip=655588,
|
||||
application_zlib=655589,
|
||||
audio_it=458982,
|
||||
audio_make=458983,
|
||||
audio_mid=458984,
|
||||
audio_midi=458985,
|
||||
audio_mp4=458986,
|
||||
audio_mpeg=458987,
|
||||
audio_ogg=458988,
|
||||
audio_s3m=458989,
|
||||
audio_tsp_audio=458990,
|
||||
audio_tsplayer=458991,
|
||||
audio_vnd_qcelp=458992,
|
||||
audio_voxware=458993,
|
||||
audio_x_aiff=458994,
|
||||
audio_x_flac=458995,
|
||||
audio_x_gsm=458996,
|
||||
audio_x_hx_aac_adts=458997,
|
||||
audio_x_jam=458998,
|
||||
audio_x_liveaudio=458999,
|
||||
audio_x_m4a=459000,
|
||||
audio_x_midi=459001,
|
||||
audio_x_mod=459002,
|
||||
audio_x_mp4a_latm=459003,
|
||||
audio_x_mpeg_3=459004,
|
||||
audio_x_mpequrl=459005,
|
||||
audio_x_nspaudio=459006,
|
||||
audio_x_pn_realaudio=459007,
|
||||
audio_x_psid=459008,
|
||||
audio_x_realaudio=459009,
|
||||
audio_x_twinvq=459010,
|
||||
audio_x_twinvq_plugin=459011,
|
||||
audio_x_voc=459012,
|
||||
audio_x_wav=459013,
|
||||
audio_xm=459014,
|
||||
font_otf=327943 | 0x20000000,
|
||||
font_sfnt=327944 | 0x20000000,
|
||||
font_woff=327945 | 0x20000000,
|
||||
font_woff2=327946 | 0x20000000,
|
||||
image_cmu_raster=524555,
|
||||
image_fif=524556,
|
||||
image_florian=524557,
|
||||
image_g3fax=524558,
|
||||
image_gif=524559,
|
||||
image_heic=524560,
|
||||
image_ief=524561,
|
||||
image_jpeg=524562,
|
||||
image_jutvision=524563,
|
||||
image_naplps=524564,
|
||||
image_pict=524565,
|
||||
image_png=524566,
|
||||
image_svg=524567 | 0x80000000,
|
||||
image_svg_xml=524568 | 0x80000000,
|
||||
image_tiff=524569,
|
||||
image_vnd_adobe_photoshop=524570 | 0x80000000,
|
||||
image_vnd_djvu=524571 | 0x80000000,
|
||||
image_vnd_fpx=524572,
|
||||
image_vnd_microsoft_icon=524573,
|
||||
image_vnd_rn_realflash=524574,
|
||||
image_vnd_rn_realpix=524575,
|
||||
image_vnd_wap_wbmp=524576,
|
||||
image_vnd_xiff=524577,
|
||||
image_webp=524578,
|
||||
image_wmf=524579,
|
||||
image_x_3ds=524580,
|
||||
image_x_cmu_raster=524581,
|
||||
image_x_cur=524582,
|
||||
image_x_dwg=524583,
|
||||
image_x_eps=524584,
|
||||
image_x_exr=524585,
|
||||
image_x_gem=524586,
|
||||
image_x_icns=524587,
|
||||
image_x_icon=524588 | 0x80000000,
|
||||
image_x_jg=524589,
|
||||
image_x_jps=524590,
|
||||
image_x_ms_bmp=524591,
|
||||
image_x_niff=524592,
|
||||
image_x_pcx=524593,
|
||||
image_x_pict=524594,
|
||||
image_x_portable_bitmap=524595,
|
||||
image_x_portable_graymap=524596,
|
||||
image_x_portable_pixmap=524597,
|
||||
image_x_quicktime=524598,
|
||||
image_x_rgb=524599,
|
||||
image_x_tga=524600,
|
||||
image_x_tiff=524601,
|
||||
image_x_win_bitmap=524602,
|
||||
image_x_xcf=524603 | 0x80000000,
|
||||
image_x_xpixmap=524604 | 0x80000000,
|
||||
image_x_xwindowdump=524605,
|
||||
message_news=196926,
|
||||
message_rfc822=196927,
|
||||
model_vnd_dwf=65856,
|
||||
model_vnd_gdl=65857,
|
||||
model_vnd_gs_gdl=65858,
|
||||
model_vrml=65859,
|
||||
model_x_pov=65860,
|
||||
text_PGP=590149,
|
||||
text_asp=590150,
|
||||
text_css=590151,
|
||||
text_html=590152,
|
||||
text_javascript=590153,
|
||||
text_mcf=590154,
|
||||
text_pascal=590155,
|
||||
text_plain=590156,
|
||||
text_richtext=590157,
|
||||
text_rtf=590158,
|
||||
text_scriplet=590159,
|
||||
text_tab_separated_values=590160,
|
||||
text_troff=590161,
|
||||
text_uri_list=590162,
|
||||
text_vnd_abc=590163,
|
||||
text_vnd_fmi_flexstor=590164,
|
||||
text_vnd_wap_wml=590165,
|
||||
text_vnd_wap_wmlscript=590166,
|
||||
text_webviewhtml=590167,
|
||||
text_x_Algol68=590168,
|
||||
text_x_asm=590169,
|
||||
text_x_audiosoft_intra=590170,
|
||||
text_x_awk=590171,
|
||||
text_x_bcpl=590172,
|
||||
text_x_c=590173,
|
||||
text_x_c__=590174,
|
||||
text_x_component=590175,
|
||||
text_x_diff=590176,
|
||||
text_x_fortran=590177,
|
||||
text_x_java=590178,
|
||||
text_x_la_asf=590179,
|
||||
text_x_lisp=590180,
|
||||
text_x_m=590181,
|
||||
text_x_m4=590182,
|
||||
text_x_makefile=590183,
|
||||
text_x_ms_regedit=590184,
|
||||
text_x_msdos_batch=590185,
|
||||
text_x_objective_c=590186,
|
||||
text_x_pascal=590187,
|
||||
text_x_perl=590188,
|
||||
text_x_php=590189,
|
||||
text_x_po=590190,
|
||||
text_x_python=590191,
|
||||
text_x_ruby=590192,
|
||||
text_x_sass=590193,
|
||||
text_x_scss=590194,
|
||||
text_x_server_parsed_html=590195,
|
||||
text_x_setext=590196,
|
||||
text_x_sgml=590197,
|
||||
text_x_shellscript=590198,
|
||||
text_x_speech=590199,
|
||||
text_x_tcl=590200,
|
||||
text_x_tex=590201,
|
||||
text_x_uil=590202,
|
||||
text_x_uuencode=590203,
|
||||
text_x_vcalendar=590204,
|
||||
text_x_vcard=590205,
|
||||
text_xml=590206,
|
||||
video_MP2T=393599,
|
||||
video_animaflex=393600,
|
||||
video_avi=393601,
|
||||
video_avs_video=393602,
|
||||
video_mp4=393603,
|
||||
video_mpeg=393604,
|
||||
video_quicktime=393605,
|
||||
video_vdo=393606,
|
||||
video_vivo=393607,
|
||||
video_vnd_rn_realvideo=393608,
|
||||
video_vosaic=393609,
|
||||
video_webm=393610,
|
||||
video_x_amt_demorun=393611,
|
||||
video_x_amt_showrun=393612,
|
||||
video_x_atomic3d_feature=393613,
|
||||
video_x_dl=393614,
|
||||
video_x_dv=393615,
|
||||
video_x_fli=393616,
|
||||
video_x_flv=393617,
|
||||
video_x_isvideo=393618,
|
||||
video_x_jng=393619 | 0x80000000,
|
||||
video_x_m4v=393620,
|
||||
video_x_matroska=393621,
|
||||
video_x_mng=393622,
|
||||
video_x_motion_jpeg=393623,
|
||||
video_x_ms_asf=393624,
|
||||
video_x_msvideo=393625,
|
||||
video_x_qtc=393626,
|
||||
video_x_sgi_movie=393627,
|
||||
x_epoc_x_sisx_app=721308,
|
||||
application_x_envoy=655479,
|
||||
application_x_esrehber=655480,
|
||||
application_x_excel=655481,
|
||||
application_x_executable=655482,
|
||||
application_x_font_gdos=655483,
|
||||
application_x_font_pf2=655484,
|
||||
application_x_font_pfm=655485,
|
||||
application_x_font_sfn=655486,
|
||||
application_x_font_ttf=655487 | 0x20000000,
|
||||
application_x_freelance=655488,
|
||||
application_x_gamecube_rom=655489,
|
||||
application_x_gdbm=655490,
|
||||
application_x_gettext_translation=655491,
|
||||
application_x_git=655492,
|
||||
application_x_gsp=655493,
|
||||
application_x_gss=655494,
|
||||
application_x_gtar=655495,
|
||||
application_x_gzip=655496,
|
||||
application_x_hdf=655497,
|
||||
application_x_helpfile=655498,
|
||||
application_x_httpd_imap=655499,
|
||||
application_x_ima=655500,
|
||||
application_x_innosetup=655501,
|
||||
application_x_internett_signup=655502,
|
||||
application_x_inventor=655503,
|
||||
application_x_ip2=655504,
|
||||
application_x_java_applet=655505,
|
||||
application_x_java_commerce=655506,
|
||||
application_x_java_image=655507,
|
||||
application_x_java_jmod=655508,
|
||||
application_x_java_keystore=655509,
|
||||
application_x_kdelnk=655510,
|
||||
application_x_koan=655511,
|
||||
application_x_latex=655512,
|
||||
application_x_livescreen=655513,
|
||||
application_x_lotus=655514,
|
||||
application_x_lz4=655515 | 0x08000000,
|
||||
application_x_lz4_json=655516,
|
||||
application_x_lzh=655517,
|
||||
application_x_lzh_compressed=655518,
|
||||
application_x_lzip=655519 | 0x08000000,
|
||||
application_x_lzma=655520 | 0x08000000,
|
||||
application_x_lzop=655521 | 0x08000000,
|
||||
application_x_lzx=655522,
|
||||
application_x_mach_binary=655523,
|
||||
application_x_mach_executable=655524,
|
||||
application_x_magic_cap_package_1_0=655525,
|
||||
application_x_mathcad=655526,
|
||||
application_x_maxis_dbpf=655527,
|
||||
application_x_meme=655528,
|
||||
application_x_midi=655529,
|
||||
application_x_mif=655530,
|
||||
application_x_mix_transfer=655531,
|
||||
application_x_mobipocket_ebook=655532,
|
||||
application_x_ms_compress_szdd=655533,
|
||||
application_x_ms_pdb=655534,
|
||||
application_x_ms_reader=655535,
|
||||
application_x_msaccess=655536,
|
||||
application_x_navi_animation=655537,
|
||||
application_x_navidoc=655538,
|
||||
application_x_navimap=655539,
|
||||
application_x_navistyle=655540,
|
||||
application_x_nes_rom=655541,
|
||||
application_x_netcdf=655542,
|
||||
application_x_newton_compatible_pkg=655543,
|
||||
application_x_nintendo_ds_rom=655544,
|
||||
application_x_object=655545,
|
||||
application_x_omc=655546,
|
||||
application_x_omcdatamaker=655547,
|
||||
application_x_omcregerator=655548,
|
||||
application_x_pagemaker=655549,
|
||||
application_x_pcl=655550,
|
||||
application_x_pgp_keyring=655551,
|
||||
application_x_pixclscript=655552,
|
||||
application_x_pkcs7_certreqresp=655553,
|
||||
application_x_pkcs7_signature=655554,
|
||||
application_x_project=655555,
|
||||
application_x_qpro=655556,
|
||||
application_x_rar=655557 | 0x10000000,
|
||||
application_x_rpm=655558,
|
||||
application_x_sdp=655559,
|
||||
application_x_sea=655560,
|
||||
application_x_seelogo=655561,
|
||||
application_x_setupscript=655562,
|
||||
application_x_shar=655563,
|
||||
application_x_sharedlib=655564,
|
||||
application_x_shockwave_flash=655565,
|
||||
application_x_snappy_framed=655566,
|
||||
application_x_sprite=655567,
|
||||
application_x_sqlite3=655568,
|
||||
application_x_sv4cpio=655569,
|
||||
application_x_sv4crc=655570,
|
||||
application_x_tar=655571 | 0x10000000,
|
||||
application_x_tbook=655572,
|
||||
application_x_terminfo=655573,
|
||||
application_x_terminfo2=655574,
|
||||
application_x_tex_tfm=655575,
|
||||
application_x_texinfo=655576,
|
||||
application_x_ustar=655577,
|
||||
application_x_visio=655578,
|
||||
application_x_vnd_audioexplosion_mzz=655579,
|
||||
application_x_vnd_ls_xpix=655580,
|
||||
application_x_vrml=655581,
|
||||
application_x_wais_source=655582,
|
||||
application_x_wine_extension_ini=655583,
|
||||
application_x_wintalk=655584,
|
||||
application_x_world=655585,
|
||||
application_x_wri=655586,
|
||||
application_x_x509_ca_cert=655587,
|
||||
application_x_xz=655588 | 0x08000000,
|
||||
application_x_zip=655589,
|
||||
application_x_zstd=655590 | 0x08000000,
|
||||
application_xml=655591,
|
||||
application_zip=655592 | 0x10000000,
|
||||
application_zlib=655593,
|
||||
audio_it=458986,
|
||||
audio_make=458987,
|
||||
audio_mid=458988,
|
||||
audio_midi=458989,
|
||||
audio_mp4=458990,
|
||||
audio_mpeg=458991,
|
||||
audio_ogg=458992,
|
||||
audio_s3m=458993,
|
||||
audio_tsp_audio=458994,
|
||||
audio_tsplayer=458995,
|
||||
audio_vnd_qcelp=458996,
|
||||
audio_voxware=458997,
|
||||
audio_x_aiff=458998,
|
||||
audio_x_flac=458999,
|
||||
audio_x_gsm=459000,
|
||||
audio_x_hx_aac_adts=459001,
|
||||
audio_x_jam=459002,
|
||||
audio_x_liveaudio=459003,
|
||||
audio_x_m4a=459004,
|
||||
audio_x_midi=459005,
|
||||
audio_x_mod=459006,
|
||||
audio_x_mp4a_latm=459007,
|
||||
audio_x_mpeg_3=459008,
|
||||
audio_x_mpequrl=459009,
|
||||
audio_x_nspaudio=459010,
|
||||
audio_x_pn_realaudio=459011,
|
||||
audio_x_psid=459012,
|
||||
audio_x_realaudio=459013,
|
||||
audio_x_twinvq=459014,
|
||||
audio_x_twinvq_plugin=459015,
|
||||
audio_x_voc=459016,
|
||||
audio_x_wav=459017,
|
||||
audio_xm=459018,
|
||||
font_otf=327947 | 0x20000000,
|
||||
font_sfnt=327948 | 0x20000000,
|
||||
font_woff=327949 | 0x20000000,
|
||||
font_woff2=327950 | 0x20000000,
|
||||
image_cmu_raster=524559,
|
||||
image_fif=524560,
|
||||
image_florian=524561,
|
||||
image_g3fax=524562,
|
||||
image_gif=524563,
|
||||
image_heic=524564,
|
||||
image_ief=524565,
|
||||
image_jpeg=524566,
|
||||
image_jutvision=524567,
|
||||
image_naplps=524568,
|
||||
image_pict=524569,
|
||||
image_png=524570,
|
||||
image_svg=524571 | 0x80000000,
|
||||
image_svg_xml=524572 | 0x80000000,
|
||||
image_tiff=524573,
|
||||
image_vnd_adobe_photoshop=524574 | 0x80000000,
|
||||
image_vnd_djvu=524575 | 0x80000000,
|
||||
image_vnd_fpx=524576,
|
||||
image_vnd_microsoft_icon=524577,
|
||||
image_vnd_rn_realflash=524578,
|
||||
image_vnd_rn_realpix=524579,
|
||||
image_vnd_wap_wbmp=524580,
|
||||
image_vnd_xiff=524581,
|
||||
image_webp=524582,
|
||||
image_wmf=524583,
|
||||
image_x_3ds=524584,
|
||||
image_x_cmu_raster=524585,
|
||||
image_x_cur=524586,
|
||||
image_x_dwg=524587,
|
||||
image_x_eps=524588,
|
||||
image_x_exr=524589,
|
||||
image_x_gem=524590,
|
||||
image_x_icns=524591,
|
||||
image_x_icon=524592 | 0x80000000,
|
||||
image_x_jg=524593,
|
||||
image_x_jps=524594,
|
||||
image_x_ms_bmp=524595,
|
||||
image_x_niff=524596,
|
||||
image_x_pcx=524597,
|
||||
image_x_pict=524598,
|
||||
image_x_portable_bitmap=524599,
|
||||
image_x_portable_graymap=524600,
|
||||
image_x_portable_pixmap=524601,
|
||||
image_x_quicktime=524602,
|
||||
image_x_rgb=524603,
|
||||
image_x_tga=524604,
|
||||
image_x_tiff=524605,
|
||||
image_x_win_bitmap=524606,
|
||||
image_x_xcf=524607 | 0x80000000,
|
||||
image_x_xpixmap=524608 | 0x80000000,
|
||||
image_x_xwindowdump=524609,
|
||||
message_news=196930,
|
||||
message_rfc822=196931,
|
||||
model_vnd_dwf=65860,
|
||||
model_vnd_gdl=65861,
|
||||
model_vnd_gs_gdl=65862,
|
||||
model_vrml=65863,
|
||||
model_x_pov=65864,
|
||||
text_PGP=590153,
|
||||
text_asp=590154,
|
||||
text_css=590155,
|
||||
text_html=590156,
|
||||
text_javascript=590157,
|
||||
text_mcf=590158,
|
||||
text_pascal=590159,
|
||||
text_plain=590160,
|
||||
text_richtext=590161,
|
||||
text_rtf=590162,
|
||||
text_scriplet=590163,
|
||||
text_tab_separated_values=590164,
|
||||
text_troff=590165,
|
||||
text_uri_list=590166,
|
||||
text_vnd_abc=590167,
|
||||
text_vnd_fmi_flexstor=590168,
|
||||
text_vnd_wap_wml=590169,
|
||||
text_vnd_wap_wmlscript=590170,
|
||||
text_webviewhtml=590171,
|
||||
text_x_Algol68=590172,
|
||||
text_x_asm=590173,
|
||||
text_x_audiosoft_intra=590174,
|
||||
text_x_awk=590175,
|
||||
text_x_bcpl=590176,
|
||||
text_x_c=590177,
|
||||
text_x_c__=590178,
|
||||
text_x_component=590179,
|
||||
text_x_diff=590180,
|
||||
text_x_fortran=590181,
|
||||
text_x_java=590182,
|
||||
text_x_la_asf=590183,
|
||||
text_x_lisp=590184,
|
||||
text_x_m=590185,
|
||||
text_x_m4=590186,
|
||||
text_x_makefile=590187,
|
||||
text_x_ms_regedit=590188,
|
||||
text_x_msdos_batch=590189,
|
||||
text_x_objective_c=590190,
|
||||
text_x_pascal=590191,
|
||||
text_x_perl=590192,
|
||||
text_x_php=590193,
|
||||
text_x_po=590194,
|
||||
text_x_python=590195,
|
||||
text_x_ruby=590196,
|
||||
text_x_sass=590197,
|
||||
text_x_scss=590198,
|
||||
text_x_server_parsed_html=590199,
|
||||
text_x_setext=590200,
|
||||
text_x_sgml=590201,
|
||||
text_x_shellscript=590202,
|
||||
text_x_speech=590203,
|
||||
text_x_tcl=590204,
|
||||
text_x_tex=590205,
|
||||
text_x_uil=590206,
|
||||
text_x_uuencode=590207,
|
||||
text_x_vcalendar=590208,
|
||||
text_x_vcard=590209,
|
||||
text_xml=590210,
|
||||
video_MP2T=393603,
|
||||
video_animaflex=393604,
|
||||
video_avi=393605,
|
||||
video_avs_video=393606,
|
||||
video_mp4=393607,
|
||||
video_mpeg=393608,
|
||||
video_quicktime=393609,
|
||||
video_vdo=393610,
|
||||
video_vivo=393611,
|
||||
video_vnd_rn_realvideo=393612,
|
||||
video_vosaic=393613,
|
||||
video_webm=393614,
|
||||
video_x_amt_demorun=393615,
|
||||
video_x_amt_showrun=393616,
|
||||
video_x_atomic3d_feature=393617,
|
||||
video_x_dl=393618,
|
||||
video_x_dv=393619,
|
||||
video_x_fli=393620,
|
||||
video_x_flv=393621,
|
||||
video_x_isvideo=393622,
|
||||
video_x_jng=393623 | 0x80000000,
|
||||
video_x_m4v=393624,
|
||||
video_x_matroska=393625,
|
||||
video_x_mng=393626,
|
||||
video_x_motion_jpeg=393627,
|
||||
video_x_ms_asf=393628,
|
||||
video_x_msvideo=393629,
|
||||
video_x_qtc=393630,
|
||||
video_x_sgi_movie=393631,
|
||||
x_epoc_x_sisx_app=721312,
|
||||
};
|
||||
char *mime_get_mime_text(unsigned int mime_id) {switch (mime_id) {
|
||||
case application_arj: return "application/arj";
|
||||
@ -832,6 +836,10 @@ case audio_x_hx_aac_adts: return "audio/x-hx-aac-adts";
|
||||
case application_x_chrome_extension: return "application/x-chrome-extension";
|
||||
case image_heic: return "image/heic";
|
||||
case image_x_gem: return "image/x-gem";
|
||||
case application_x_lzma: return "application/x-lzma";
|
||||
case application_warc: return "application/warc";
|
||||
case application_x_lzip: return "application/x-lzip";
|
||||
case application_x_lzop: return "application/x-lzop";
|
||||
default: return NULL;}}
|
||||
GHashTable *mime_get_ext_table() {GHashTable *ext_table = g_hash_table_new(g_str_hash, g_str_equal);
|
||||
g_hash_table_insert(ext_table, "arj", (gpointer)application_arj);
|
||||
@ -1337,6 +1345,10 @@ g_hash_table_insert(ext_table, "z", (gpointer)application_zlib);
|
||||
g_hash_table_insert(ext_table, "pf2", (gpointer)application_x_font_pf2);
|
||||
g_hash_table_insert(ext_table, "jmod", (gpointer)application_x_java_jmod);
|
||||
g_hash_table_insert(ext_table, "heic", (gpointer)image_heic);
|
||||
g_hash_table_insert(ext_table, "lzma", (gpointer)application_x_lzma);
|
||||
g_hash_table_insert(ext_table, "warc", (gpointer)application_warc);
|
||||
g_hash_table_insert(ext_table, "lz", (gpointer)application_x_lzip);
|
||||
g_hash_table_insert(ext_table, "lzo", (gpointer)application_x_lzop);
|
||||
return ext_table;}
|
||||
GHashTable *mime_get_mime_table() {GHashTable *mime_table = g_hash_table_new(g_str_hash, g_str_equal);
|
||||
g_hash_table_insert(mime_table, "application/arj", (gpointer)application_arj);
|
||||
@ -1751,5 +1763,9 @@ g_hash_table_insert(mime_table, "audio/x-hx-aac-adts", (gpointer)audio_x_hx_aac_
|
||||
g_hash_table_insert(mime_table, "application/x-chrome-extension", (gpointer)application_x_chrome_extension);
|
||||
g_hash_table_insert(mime_table, "image/heic", (gpointer)image_heic);
|
||||
g_hash_table_insert(mime_table, "image/x-gem", (gpointer)image_x_gem);
|
||||
g_hash_table_insert(mime_table, "application/x-lzma", (gpointer)application_x_lzma);
|
||||
g_hash_table_insert(mime_table, "application/warc", (gpointer)application_warc);
|
||||
g_hash_table_insert(mime_table, "application/x-lzip", (gpointer)application_x_lzip);
|
||||
g_hash_table_insert(mime_table, "application/x-lzop", (gpointer)application_x_lzop);
|
||||
return mime_table;}
|
||||
#endif
|
||||
|
@ -1,9 +1,32 @@
|
||||
#include <src/ctx.h>
|
||||
#include "src/sist.h"
|
||||
#include "src/ctx.h"
|
||||
|
||||
__thread magic_t Magic = NULL;
|
||||
|
||||
void *read_all(parse_job_t *job, const char *buf, int bytes_read, int *fd) {
|
||||
int fs_read(struct vfile *f, void *buf, size_t size) {
|
||||
|
||||
if (f->fd == -1) {
|
||||
f->fd = open(f->filepath, O_RDONLY);
|
||||
if (f->fd == -1) {
|
||||
perror("open");
|
||||
printf("%s\n", f->filepath);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return read(f->fd, buf, size);
|
||||
}
|
||||
|
||||
#define CLOSE_FILE(f) if (f.close != NULL) {f.close(&f);};
|
||||
|
||||
void fs_close(struct vfile *f) {
|
||||
if (f->fd != -1) {
|
||||
close(f->fd);
|
||||
}
|
||||
}
|
||||
|
||||
void *read_all(parse_job_t *job, const char *buf, int bytes_read) {
|
||||
|
||||
void *full_buf;
|
||||
|
||||
@ -11,17 +34,10 @@ void *read_all(parse_job_t *job, const char *buf, int bytes_read, int *fd) {
|
||||
full_buf = malloc(job->info.st_size);
|
||||
memcpy(full_buf, buf, job->info.st_size);
|
||||
} else {
|
||||
if (*fd == -1) {
|
||||
*fd = open(job->filepath, O_RDONLY);
|
||||
if (*fd == -1) {
|
||||
perror("open");
|
||||
printf("%s\n", job->filepath);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
full_buf = malloc(job->info.st_size);
|
||||
memcpy(full_buf, buf, bytes_read);
|
||||
int ret = read(*fd, full_buf + bytes_read, job->info.st_size - bytes_read);
|
||||
|
||||
int ret = job->vfile.read(&job->vfile, full_buf + bytes_read, job->info.st_size - bytes_read);
|
||||
if (ret == -1) {
|
||||
perror("read");
|
||||
return NULL;
|
||||
@ -65,24 +81,13 @@ void parse(void *arg) {
|
||||
doc.mime = mime_get_mime_by_ext(ScanCtx.ext_table, job->filepath + job->ext);
|
||||
}
|
||||
|
||||
int fd = -1;
|
||||
int bytes_read = 0;
|
||||
|
||||
if (doc.mime == 0) {
|
||||
// Get mime type with libmagic
|
||||
fd = open(job->filepath, O_RDONLY);
|
||||
if (fd == -1) {
|
||||
perror("open");
|
||||
free(job);
|
||||
return;
|
||||
}
|
||||
|
||||
bytes_read = read(fd, buf, PARSE_BUF_SIZE);
|
||||
|
||||
bytes_read = job->vfile.read(&job->vfile, buf, PARSE_BUF_SIZE);
|
||||
if (bytes_read == -1) {
|
||||
perror("read");
|
||||
close(fd);
|
||||
free(job);
|
||||
CLOSE_FILE(job->vfile)
|
||||
return;
|
||||
}
|
||||
|
||||
@ -100,11 +105,16 @@ void parse(void *arg) {
|
||||
if (!(SHOULD_PARSE(doc.mime))) {
|
||||
|
||||
} else if ((mmime == MimeVideo && doc.size >= MIN_VIDEO_SIZE) ||
|
||||
(mmime == MimeImage && doc.size >= MIN_IMAGE_SIZE) || mmime == MimeAudio) {
|
||||
parse_media(job->filepath, &doc);
|
||||
(mmime == MimeImage && doc.size >= MIN_IMAGE_SIZE) || mmime == MimeAudio) {
|
||||
|
||||
if (job->vfile.is_fs_file) {
|
||||
parse_media_filename(job->filepath, &doc);
|
||||
} else {
|
||||
parse_media_vfile(&job->vfile, &doc);
|
||||
}
|
||||
|
||||
} else if (IS_PDF(doc.mime)) {
|
||||
void *pdf_buf = read_all(job, (char *) buf, bytes_read, &fd);
|
||||
void *pdf_buf = read_all(job, (char *) buf, bytes_read);
|
||||
parse_pdf(pdf_buf, doc.size, &doc);
|
||||
|
||||
if (pdf_buf != buf && pdf_buf != NULL) {
|
||||
@ -112,22 +122,35 @@ void parse(void *arg) {
|
||||
}
|
||||
|
||||
} else if (mmime == MimeText && ScanCtx.content_size > 0) {
|
||||
parse_text(bytes_read, &fd, (char *) buf, &doc);
|
||||
parse_text(bytes_read, &job->vfile, (char *) buf, &doc);
|
||||
|
||||
} else if (IS_FONT(doc.mime)) {
|
||||
void *font_buf = read_all(job, (char *) buf, bytes_read, &fd);
|
||||
void *font_buf = read_all(job, (char *) buf, bytes_read);
|
||||
parse_font(font_buf, doc.size, &doc);
|
||||
|
||||
if (font_buf != buf && font_buf != NULL) {
|
||||
free(font_buf);
|
||||
}
|
||||
} else if (
|
||||
ScanCtx.archive_mode != ARC_MODE_SKIP && (
|
||||
IS_ARC(doc.mime) ||
|
||||
(IS_ARC_FILTER(doc.mime) && should_parse_filtered_file(doc.filepath, doc.ext))
|
||||
)) {
|
||||
parse_archive(&job->vfile, &doc);
|
||||
}
|
||||
|
||||
//Parent meta
|
||||
if (!uuid_is_null(job->parent)) {
|
||||
char tmp[UUID_STR_LEN];
|
||||
uuid_unparse(job->parent, tmp);
|
||||
|
||||
meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + UUID_STR_LEN + 1);
|
||||
meta_parent->key = MetaParent;
|
||||
strcpy(meta_parent->strval, tmp);
|
||||
APPEND_META((&doc), meta_parent)
|
||||
}
|
||||
|
||||
write_document(&doc);
|
||||
|
||||
if (fd != -1) {
|
||||
close(fd);
|
||||
}
|
||||
|
||||
free(job);
|
||||
CLOSE_FILE(job->vfile)
|
||||
}
|
||||
|
@ -5,6 +5,9 @@
|
||||
|
||||
#define PARSE_BUF_SIZE 4096
|
||||
|
||||
int fs_read(struct vfile *f, void *buf, size_t size);
|
||||
void fs_close(struct vfile *f);
|
||||
|
||||
void parse(void *arg);
|
||||
|
||||
#endif
|
||||
|
@ -1,7 +1,7 @@
|
||||
#include "text.h"
|
||||
#include "src/ctx.h"
|
||||
|
||||
void parse_text(int bytes_read, int *fd, char *buf, document_t *doc) {
|
||||
void parse_text(int bytes_read, struct vfile *f, char *buf, document_t *doc) {
|
||||
|
||||
char *intermediate_buf;
|
||||
int intermediate_buf_len;
|
||||
@ -13,10 +13,6 @@ void parse_text(int bytes_read, int *fd, char *buf, document_t *doc) {
|
||||
memcpy(intermediate_buf, buf, to_copy);
|
||||
|
||||
} else {
|
||||
if (*fd == -1) {
|
||||
*fd = open(doc->filepath, O_RDONLY);
|
||||
}
|
||||
|
||||
int to_read = MIN(ScanCtx.content_size, doc->size) - bytes_read;
|
||||
|
||||
intermediate_buf = malloc(to_read + bytes_read);
|
||||
@ -25,7 +21,7 @@ void parse_text(int bytes_read, int *fd, char *buf, document_t *doc) {
|
||||
memcpy(intermediate_buf, buf, bytes_read);
|
||||
}
|
||||
|
||||
read(*fd, intermediate_buf + bytes_read, to_read);
|
||||
f->read(f, intermediate_buf + bytes_read, to_read);
|
||||
}
|
||||
text_buffer_t tex = text_buffer_create(ScanCtx.content_size);
|
||||
text_buffer_append_string(&tex, intermediate_buf, intermediate_buf_len);
|
||||
|
@ -3,6 +3,6 @@
|
||||
|
||||
#include "src/sist.h"
|
||||
|
||||
void parse_text(int bytes_read, int *fd, char *buf, document_t *doc);
|
||||
void parse_text(int bytes_read, struct vfile *f, char *buf, document_t *doc);
|
||||
|
||||
#endif
|
||||
|
@ -28,6 +28,8 @@
|
||||
#include <wordexp.h>
|
||||
#include "ft2build.h"
|
||||
#include "freetype/freetype.h"
|
||||
#include <archive.h>
|
||||
#include <archive_entry.h>
|
||||
|
||||
#ifndef SIST_SCAN_ONLY
|
||||
#include <onion/onion.h>
|
||||
@ -52,6 +54,7 @@
|
||||
#include "parsing/pdf.h"
|
||||
#include "parsing/media.h"
|
||||
#include "parsing/font.h"
|
||||
#include "parsing/arc.h"
|
||||
#include "cli.h"
|
||||
#include "utf8.h/utf8.h"
|
||||
|
||||
|
@ -119,6 +119,7 @@ static void *tpool_worker(void *arg) {
|
||||
}
|
||||
|
||||
work->func(work->arg);
|
||||
free(work->arg);
|
||||
free(work);
|
||||
}
|
||||
|
||||
|
33
src/types.h
33
src/types.h
@ -9,6 +9,12 @@
|
||||
#define IS_META_LONG(key) (key & META_LONG_MASK) == META_LONG_MASK
|
||||
#define IS_META_STR(meta) (meta->key & META_STR_MASK) == META_STR_MASK
|
||||
|
||||
#define ARC_MODE_SKIP 0
|
||||
#define ARC_MODE_LIST 1
|
||||
#define ARC_MODE_SHALLOW 2
|
||||
#define ARC_MODE_RECURSE 3
|
||||
typedef int archive_mode_t;
|
||||
|
||||
// This is written to file as a 8bit char!
|
||||
enum metakey {
|
||||
MetaContent = 1 | META_STR_MASK,
|
||||
@ -24,6 +30,7 @@ enum metakey {
|
||||
MetaGenre = 11 | META_STR_MASK,
|
||||
MetaTitle = 12 | META_STR_MASK,
|
||||
MetaFontName = 13 | META_STR_MASK,
|
||||
MetaParent = 14 | META_STR_MASK,
|
||||
};
|
||||
|
||||
typedef struct index_descriptor {
|
||||
@ -63,13 +70,39 @@ typedef struct document {
|
||||
short ext;
|
||||
meta_line_t *meta_head;
|
||||
meta_line_t *meta_tail;
|
||||
struct document *child_head;
|
||||
struct document *child_tail;
|
||||
char *filepath;
|
||||
} document_t;
|
||||
|
||||
typedef struct vfile vfile_t;
|
||||
|
||||
typedef int (*read_func_t)(struct vfile *, void *buf, size_t size);
|
||||
|
||||
typedef int (*seek_func_t)(struct vfile *, size_t size, int whence);
|
||||
|
||||
typedef void (*close_func_t)(struct vfile *);
|
||||
|
||||
typedef struct vfile {
|
||||
|
||||
union {
|
||||
int fd;
|
||||
struct archive *arc;
|
||||
};
|
||||
|
||||
int is_fs_file;
|
||||
char *filepath;
|
||||
|
||||
read_func_t read;
|
||||
close_func_t close;
|
||||
} vfile_t;
|
||||
|
||||
typedef struct parse_job_t {
|
||||
int base;
|
||||
int ext;
|
||||
struct stat info;
|
||||
struct vfile vfile;
|
||||
uuid_t parent;
|
||||
char filepath[1];
|
||||
} parse_job_t;
|
||||
|
||||
|
@ -46,6 +46,10 @@ void dyn_buffer_write_str(dyn_buffer_t *buf, char *str) {
|
||||
dyn_buffer_write_char(buf, '\0');
|
||||
}
|
||||
|
||||
void dyn_buffer_append_string(dyn_buffer_t *buf, char *str) {
|
||||
dyn_buffer_write(buf, str, strlen(str));
|
||||
}
|
||||
|
||||
void dyn_buffer_write_int(dyn_buffer_t *buf, int d) {
|
||||
grow_buffer_small(buf);
|
||||
|
||||
|
@ -47,6 +47,8 @@ void dyn_buffer_write_char(dyn_buffer_t *buf, char c);
|
||||
|
||||
void dyn_buffer_write_str(dyn_buffer_t *buf, char *str);
|
||||
|
||||
void dyn_buffer_append_string(dyn_buffer_t *buf, char *str);
|
||||
|
||||
void dyn_buffer_write_int(dyn_buffer_t *buf, int d);
|
||||
|
||||
void dyn_buffer_write_short(dyn_buffer_t *buf, short s);
|
||||
|
@ -360,12 +360,24 @@ int file(void *p, onion_request *req, onion_response *res) {
|
||||
return OCS_PROCESSED;
|
||||
}
|
||||
|
||||
cJSON *doc = elastic_get_document(arg_uuid);
|
||||
cJSON *source = cJSON_GetObjectItem(doc, "_source");
|
||||
cJSON *index_id = cJSON_GetObjectItem(source, "index");
|
||||
if (index_id == NULL) {
|
||||
cJSON_Delete(doc);
|
||||
return OCS_NOT_PROCESSED;
|
||||
char *next = arg_uuid;
|
||||
cJSON *doc = NULL;
|
||||
cJSON *index_id = NULL;
|
||||
cJSON *source = NULL;
|
||||
|
||||
while (true) {
|
||||
doc = elastic_get_document(next);
|
||||
source = cJSON_GetObjectItem(doc, "_source");
|
||||
index_id = cJSON_GetObjectItem(source, "index");
|
||||
if (index_id == NULL) {
|
||||
cJSON_Delete(doc);
|
||||
return OCS_NOT_PROCESSED;
|
||||
}
|
||||
cJSON *parent = cJSON_GetObjectItem(source, "parent");
|
||||
if (parent == NULL) {
|
||||
break;
|
||||
}
|
||||
next = parent->valuestring;
|
||||
}
|
||||
|
||||
index_t *idx = get_index_by_id(index_id->valuestring);
|
||||
|
File diff suppressed because one or more lines are too long
@ -23,6 +23,15 @@ body {
|
||||
border: none;
|
||||
}
|
||||
|
||||
.sub-document {
|
||||
background: #37474F;
|
||||
}
|
||||
|
||||
.sub-document .text-muted {
|
||||
color: #8a949c !important;
|
||||
}
|
||||
|
||||
|
||||
.list-group-item {
|
||||
background: #212121;
|
||||
color: #e0e0e0;
|
||||
|
@ -15,6 +15,10 @@ body {
|
||||
box-shadow: 0 .125rem .25rem rgba(0, 0, 0, .075) !important;
|
||||
}
|
||||
|
||||
.sub-document {
|
||||
background: #AB47BC1F;
|
||||
}
|
||||
|
||||
.navbar-brand {
|
||||
font-size: 1.75rem;
|
||||
padding: 0;
|
||||
|
@ -165,12 +165,19 @@ function createDocCard(hit) {
|
||||
let docCardBody = document.createElement("div");
|
||||
docCardBody.setAttribute("class", "card-body document");
|
||||
|
||||
//Title
|
||||
let title = makeTitle(hit);
|
||||
let isSubDocument = false;
|
||||
|
||||
let link = document.createElement("a");
|
||||
link.setAttribute("href", "f/" + hit["_id"]);
|
||||
link.setAttribute("target", "_blank");
|
||||
link.appendChild(title);
|
||||
|
||||
//Title
|
||||
let title = makeTitle(hit);
|
||||
if (hit["_source"].hasOwnProperty("parent")) {
|
||||
docCard.classList.add("sub-document");
|
||||
isSubDocument = true;
|
||||
}
|
||||
|
||||
let tagContainer = document.createElement("div");
|
||||
tagContainer.setAttribute("class", "card-text");
|
||||
@ -204,7 +211,7 @@ function createDocCard(hit) {
|
||||
}
|
||||
|
||||
// Hover
|
||||
if (thumbnail && hit["_source"]["videoc"] === "gif") {
|
||||
if (thumbnail && hit["_source"]["videoc"] === "gif" && !isSubDocument) {
|
||||
gifOver(thumbnail, hit);
|
||||
}
|
||||
break;
|
||||
@ -241,7 +248,7 @@ function createDocCard(hit) {
|
||||
}
|
||||
|
||||
//Audio
|
||||
if (mimeCategory === "audio" && hit["_source"].hasOwnProperty("audioc")) {
|
||||
if (mimeCategory === "audio" && hit["_source"].hasOwnProperty("audioc") && !isSubDocument) {
|
||||
|
||||
let audio = document.createElement("audio");
|
||||
audio.setAttribute("preload", "none");
|
||||
@ -267,7 +274,6 @@ function createDocCard(hit) {
|
||||
docCardBody.appendChild(link);
|
||||
docCard.appendChild(docCardBody);
|
||||
|
||||
link.appendChild(title);
|
||||
docCardBody.appendChild(tagContainer);
|
||||
|
||||
return docCard;
|
||||
@ -275,8 +281,9 @@ function createDocCard(hit) {
|
||||
|
||||
function makeThumbnail(mimeCategory, hit, imgWrapper, small) {
|
||||
let thumbnail;
|
||||
let isSubDocument = hit["_source"].hasOwnProperty("parent");
|
||||
|
||||
if (mimeCategory === "video" && shouldPlayVideo(hit)) {
|
||||
if (mimeCategory === "video" && shouldPlayVideo(hit) && !isSubDocument) {
|
||||
thumbnail = document.createElement("video");
|
||||
addVidSrc("f/" + hit["_id"], hit["_source"]["mime"], thumbnail);
|
||||
|
||||
|
@ -3,7 +3,7 @@
|
||||
*/
|
||||
function humanFileSize(bytes) {
|
||||
if (bytes === 0) {
|
||||
return "? B"
|
||||
return "0 B"
|
||||
}
|
||||
|
||||
let thresh = 1000;
|
||||
|
@ -11,7 +11,7 @@
|
||||
|
||||
<nav class="navbar navbar-expand-lg">
|
||||
<a class="navbar-brand" href="/">sist2</a>
|
||||
<span class="badge badge-pill version">v1.1.9</span>
|
||||
<span class="badge badge-pill version">v1.1.10</span>
|
||||
<span class="tagline">Lightning-fast file system indexer and search tool </span>
|
||||
<a style="margin-left: auto" id="theme" class="btn" title="Toggle theme" href="/">Theme</a>
|
||||
</nav>
|
||||
|
Loading…
x
Reference in New Issue
Block a user