mirror of
https://github.com/simon987/sist2.git
synced 2025-04-24 12:45:56 +00:00
It compiles! (I think)
This commit is contained in:
parent
035fa26dc4
commit
647fd70d8c
@ -113,18 +113,16 @@ sist2 scan --ocr eng ~/Books/Textbooks/
|
||||
## Build from source
|
||||
|
||||
You can compile **sist2** by yourself if you don't want to use the pre-compiled
|
||||
binaries.
|
||||
binaries (GCC 7+ required).
|
||||
|
||||
1. Install compile-time dependencies
|
||||
|
||||
```bash
|
||||
vcpkg install lmdb cjson glib
|
||||
vcpkg install lmdb cjson glib libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 ffmpeg zstd
|
||||
```
|
||||
|
||||
2. Build
|
||||
```bash
|
||||
git clone --recurse-submodules https://github.com/simon987/sist2
|
||||
./scripts/get_static_libs.sh
|
||||
cmake .
|
||||
cmake -D <VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake .
|
||||
make
|
||||
```
|
||||
|
@ -1,12 +1,11 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
./scripts/get_static_libs.sh
|
||||
|
||||
rm -rf CMakeFiles CmakeCache.txt
|
||||
cmake -DSIST_DEBUG=off .
|
||||
cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
|
||||
make
|
||||
strip sist2
|
||||
|
||||
rm -rf CMakeFiles CmakeCache.txt
|
||||
cmake -DSIST_DEBUG=on .
|
||||
cmake -DSIST_DEBUG=on -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
|
||||
make
|
||||
|
@ -3,7 +3,7 @@
|
||||
|
||||
#include "sist.h"
|
||||
|
||||
#include "libscan/scan.h"
|
||||
#include "libscan/arc/arc.h"
|
||||
|
||||
typedef struct scan_args {
|
||||
float quality;
|
||||
|
25
src/ctx.h
25
src/ctx.h
@ -4,6 +4,13 @@
|
||||
#include "sist.h"
|
||||
#include "tpool.h"
|
||||
#include "libscan/scan.h"
|
||||
#include "libscan/arc/arc.h"
|
||||
#include "libscan/cbr/cbr.h"
|
||||
#include "libscan/ebook/ebook.h"
|
||||
#include "libscan/font/font.h"
|
||||
#include "libscan/media/media.h"
|
||||
#include "libscan/ooxml/ooxml.h"
|
||||
#include "libscan/text/text.h"
|
||||
|
||||
#include <glib.h>
|
||||
#include <pcre.h>
|
||||
@ -17,12 +24,8 @@ struct {
|
||||
|
||||
tpool_t *pool;
|
||||
|
||||
int tn_size;
|
||||
int threads;
|
||||
int content_size;
|
||||
float tn_qscale;
|
||||
int depth;
|
||||
archive_mode_t archive_mode;
|
||||
int verbose;
|
||||
int very_verbose;
|
||||
|
||||
@ -32,28 +35,30 @@ struct {
|
||||
GHashTable *original_table;
|
||||
GHashTable *copy_table;
|
||||
|
||||
pthread_mutex_t mupdf_mu;
|
||||
char * tesseract_lang;
|
||||
const char * tesseract_path;
|
||||
pcre *exclude;
|
||||
pcre_extra *exclude_extra;
|
||||
int fast;
|
||||
|
||||
scan_arc_ctx_t arc_ctx;
|
||||
scan_cbr_ctx_t cbr_ctx;
|
||||
scan_ebook_ctx_t ebook_ctx;
|
||||
scan_font_ctx_t font_ctx;
|
||||
scan_media_ctx_t media_ctx;
|
||||
scan_ooxml_ctx_t ooxml_ctx;
|
||||
scan_text_ctx_t text_ctx;
|
||||
} ScanCtx;
|
||||
|
||||
//TODO Move to log.h
|
||||
struct {
|
||||
int verbose;
|
||||
int very_verbose;
|
||||
int no_color;
|
||||
} LogCtx;
|
||||
|
||||
//TODO Move to index.h ?
|
||||
struct {
|
||||
char *es_url;
|
||||
int batch_size;
|
||||
} IndexCtx;
|
||||
|
||||
//TODO Move to serve.h ?
|
||||
struct {
|
||||
char *es_url;
|
||||
int index_count;
|
||||
|
@ -11,8 +11,6 @@ typedef struct store_t {
|
||||
pthread_rwlock_t lock;
|
||||
} store_t;
|
||||
|
||||
#include "../sist.h"
|
||||
|
||||
store_t *store_create(char *path);
|
||||
|
||||
void store_destroy(store_t *store);
|
||||
|
@ -3,8 +3,6 @@
|
||||
|
||||
#define _XOPEN_SOURCE 500
|
||||
|
||||
#include "../sist.h"
|
||||
|
||||
int walk_directory_tree(const char *);
|
||||
|
||||
#endif
|
||||
|
72
src/main.c
72
src/main.c
@ -2,7 +2,6 @@
|
||||
#include "ctx.h"
|
||||
|
||||
#include <third-party/argparse/argparse.h>
|
||||
#include <uuid/uuid.h>
|
||||
#include <glib.h>
|
||||
|
||||
#include "cli.h"
|
||||
@ -10,10 +9,10 @@
|
||||
#include "io/store.h"
|
||||
#include "tpool.h"
|
||||
#include "io/walk.h"
|
||||
#include "io/walk.h"
|
||||
#include "index/elastic.h"
|
||||
#include "web/serve.h"
|
||||
#include "parsing/mime.h"
|
||||
#include "parsing/parse.h"
|
||||
|
||||
#define DESCRIPTION "Lightning-fast file system indexer and search tool."
|
||||
|
||||
@ -28,11 +27,6 @@ static const char *const usage[] = {
|
||||
NULL,
|
||||
};
|
||||
|
||||
void global_init() {
|
||||
//TODO
|
||||
// curl_global_init(CURL_GLOBAL_NOTHING);
|
||||
}
|
||||
|
||||
void init_dir(const char *dirpath) {
|
||||
char path[PATH_MAX];
|
||||
snprintf(path, PATH_MAX, "%sdescriptor.json", dirpath);
|
||||
@ -51,28 +45,74 @@ void scan_print_header() {
|
||||
LOG_INFOF("main.c", "sist2 v%s", Version)
|
||||
}
|
||||
|
||||
void sist2_scan(scan_args_t *args) {
|
||||
void _store(char *key, size_t key_len, char *buf, size_t buf_len) {
|
||||
store_write(ScanCtx.index.store, key, key_len, buf, buf_len);
|
||||
}
|
||||
|
||||
void initialize_scan_context(scan_args_t *args) {
|
||||
|
||||
// Arc
|
||||
ScanCtx.arc_ctx.mode = args->archive_mode;
|
||||
ScanCtx.arc_ctx.log = sist_log;
|
||||
ScanCtx.arc_ctx.logf = sist_logf;
|
||||
ScanCtx.arc_ctx.parse = (parse_callback_t) parse;
|
||||
|
||||
// Cbr
|
||||
ScanCtx.cbr_ctx.log = sist_log;
|
||||
ScanCtx.cbr_ctx.logf = sist_logf;
|
||||
ScanCtx.cbr_ctx.store = _store;
|
||||
ScanCtx.cbr_ctx.cbr_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbr");
|
||||
|
||||
// Ebook
|
||||
pthread_mutex_init(&ScanCtx.ebook_ctx.mupdf_mutex, NULL);
|
||||
ScanCtx.ebook_ctx.content_size = args->content_size;
|
||||
ScanCtx.ebook_ctx.tn_size = args->size;
|
||||
ScanCtx.ebook_ctx.tesseract_lang = args->tesseract_lang;
|
||||
ScanCtx.ebook_ctx.tesseract_path = args->tesseract_path;
|
||||
ScanCtx.ebook_ctx.log = sist_log;
|
||||
ScanCtx.ebook_ctx.logf = sist_logf;
|
||||
ScanCtx.ebook_ctx.store = _store;
|
||||
|
||||
// Font
|
||||
ScanCtx.font_ctx.enable_tn = args->size > 0;
|
||||
ScanCtx.font_ctx.log = sist_log;
|
||||
ScanCtx.font_ctx.logf = sist_logf;
|
||||
ScanCtx.font_ctx.store = _store;
|
||||
|
||||
// Media
|
||||
ScanCtx.media_ctx.tn_qscale = args->quality;
|
||||
ScanCtx.media_ctx.tn_size = args->size;
|
||||
ScanCtx.media_ctx.content_size = args->content_size;
|
||||
ScanCtx.media_ctx.log = sist_log;
|
||||
ScanCtx.media_ctx.logf = sist_logf;
|
||||
ScanCtx.media_ctx.store = _store;
|
||||
|
||||
// OOXML
|
||||
ScanCtx.ooxml_ctx.content_size = args->content_size;
|
||||
ScanCtx.ooxml_ctx.log = sist_log;
|
||||
ScanCtx.ooxml_ctx.logf = sist_logf;
|
||||
|
||||
ScanCtx.tn_qscale = args->quality;
|
||||
ScanCtx.tn_size = args->size;
|
||||
ScanCtx.content_size = args->content_size;
|
||||
ScanCtx.threads = args->threads;
|
||||
ScanCtx.depth = args->depth;
|
||||
ScanCtx.archive_mode = args->archive_mode;
|
||||
|
||||
strncpy(ScanCtx.index.path, args->output, sizeof(ScanCtx.index.path));
|
||||
strncpy(ScanCtx.index.desc.name, args->name, sizeof(ScanCtx.index.desc.name));
|
||||
strncpy(ScanCtx.index.desc.root, args->path, sizeof(ScanCtx.index.desc.root));
|
||||
strncpy(ScanCtx.index.desc.rewrite_url, args->rewrite_url, sizeof(ScanCtx.index.desc.rewrite_url));
|
||||
ScanCtx.index.desc.root_len = (short) strlen(ScanCtx.index.desc.root);
|
||||
ScanCtx.tesseract_lang = args->tesseract_lang;
|
||||
ScanCtx.tesseract_path = args->tesseract_path;
|
||||
ScanCtx.fast = args->fast;
|
||||
}
|
||||
|
||||
init_dir(ScanCtx.index.path);
|
||||
|
||||
void sist2_scan(scan_args_t *args) {
|
||||
|
||||
ScanCtx.mime_table = mime_get_mime_table();
|
||||
ScanCtx.ext_table = mime_get_ext_table();
|
||||
|
||||
initialize_scan_context(args);
|
||||
|
||||
init_dir(ScanCtx.index.path);
|
||||
|
||||
char store_path[PATH_MAX];
|
||||
snprintf(store_path, PATH_MAX, "%sthumbs", ScanCtx.index.path);
|
||||
mkdir(store_path, S_IWUSR | S_IRUSR | S_IXUSR);
|
||||
@ -222,8 +262,6 @@ void sist2_web(web_args_t *args) {
|
||||
|
||||
int main(int argc, const char *argv[]) {
|
||||
|
||||
global_init();
|
||||
|
||||
scan_args_t *scan_args = scan_args_create();
|
||||
index_args_t *index_args = index_args_create();
|
||||
web_args_t *web_args = web_args_create();
|
||||
|
@ -11,6 +11,9 @@
|
||||
|
||||
__thread magic_t Magic = NULL;
|
||||
|
||||
#define MIN_VIDEO_SIZE 1024 * 64
|
||||
#define MIN_IMAGE_SIZE 1024 * 2
|
||||
|
||||
int fs_read(struct vfile *f, void *buf, size_t size) {
|
||||
|
||||
if (f->fd == -1) {
|
||||
@ -112,34 +115,28 @@ void parse(void *arg) {
|
||||
} else if ((mmime == MimeVideo && doc.size >= MIN_VIDEO_SIZE) ||
|
||||
(mmime == MimeImage && doc.size >= MIN_IMAGE_SIZE) || mmime == MimeAudio) {
|
||||
|
||||
scan_media_ctx_t media_ctx;
|
||||
media_ctx.tn_qscale = ScanCtx.tn_qscale;
|
||||
media_ctx.tn_size = ScanCtx.tn_size;
|
||||
media_ctx.content_size = ScanCtx.content_size;
|
||||
|
||||
parse_media(&media_ctx, &job->vfile, &doc);
|
||||
parse_media(&ScanCtx.media_ctx, &job->vfile, &doc);
|
||||
|
||||
} else if (IS_PDF(doc.mime)) {
|
||||
// parse_ebook(pdf_buf, doc.size, &doc);
|
||||
parse_ebook(&ScanCtx.ebook_ctx, &job->vfile, mime_get_mime_text(doc.mime), &doc);
|
||||
|
||||
} else if (mmime == MimeText && ScanCtx.content_size > 0) {
|
||||
// parse_text(bytes_read, &job->vfile, (char *) buf, &doc);
|
||||
} else if (mmime == MimeText && ScanCtx.text_ctx.content_size > 0) {
|
||||
parse_text(&ScanCtx.text_ctx, &job->vfile, &doc);
|
||||
|
||||
} else if (IS_FONT(doc.mime)) {
|
||||
// parse_font(font_buf, doc.size, &doc);
|
||||
parse_font(&ScanCtx.font_ctx, &job->vfile, &doc);
|
||||
|
||||
} else if (
|
||||
ScanCtx.archive_mode != ARC_MODE_SKIP && (
|
||||
ScanCtx.arc_ctx.mode != ARC_MODE_SKIP && (
|
||||
IS_ARC(doc.mime) ||
|
||||
(IS_ARC_FILTER(doc.mime) && should_parse_filtered_file(doc.filepath, doc.ext))
|
||||
)) {
|
||||
// parse_archive(&job->vfile, &doc);
|
||||
} else if (ScanCtx.content_size > 0 && IS_DOC(doc.mime)) {
|
||||
// parse_doc(doc_buf, doc.size, &doc);
|
||||
|
||||
} else if (is_cbr(doc.mime)) {
|
||||
// parse_cbr(cbr_buf, doc.size, &doc);
|
||||
parse_archive(&ScanCtx.arc_ctx, &job->vfile, &doc);
|
||||
} else if (ScanCtx.ooxml_ctx.content_size > 0 && IS_DOC(doc.mime)) {
|
||||
parse_ooxml(&ScanCtx.ooxml_ctx, &job->vfile, &doc);
|
||||
|
||||
} else if (is_cbr(&ScanCtx.cbr_ctx, doc.mime)) {
|
||||
parse_cbr(&ScanCtx.cbr_ctx, &job->vfile, &doc);
|
||||
}
|
||||
|
||||
//Parent meta
|
||||
|
@ -11,7 +11,6 @@
|
||||
#include "libscan/scan.h"
|
||||
|
||||
|
||||
|
||||
char *abspath(const char *path);
|
||||
|
||||
char *expandpath(const char *path);
|
||||
|
102
src/web/serve.c
102
src/web/serve.c
@ -157,108 +157,6 @@ void thumbnail(struct mg_connection *nc, struct http_message *hm, struct mg_str
|
||||
nc->flags |= MG_F_SEND_AND_CLOSE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Modified version of onion_shortcut_response_file that allows
|
||||
* browsers to seek media files.
|
||||
*/
|
||||
int chunked_response_file(const char *filename, const char *mime,
|
||||
int partial, onion_request *request, onion_response *res) {
|
||||
int fd = open(filename, O_RDONLY | O_CLOEXEC);
|
||||
struct stat st;
|
||||
|
||||
if (fd < 0 || stat(filename, &st) != 0 || S_ISDIR(st.st_mode)) {
|
||||
close(fd);
|
||||
return OCS_NOT_PROCESSED;
|
||||
}
|
||||
|
||||
size_t length = st.st_size;
|
||||
size_t ends;
|
||||
|
||||
const char *range = onion_request_get_header(request, "Range");
|
||||
if (partial && range && strncmp(range, "bytes=", 6) == 0) {
|
||||
onion_response_set_header(res, "Accept-Ranges", "bytes");
|
||||
|
||||
onion_response_set_code(res, HTTP_PARTIAL_CONTENT);
|
||||
|
||||
char tmp[1024];
|
||||
if (strlen(range + 6) >= sizeof(tmp)) {
|
||||
close(fd);
|
||||
return OCS_INTERNAL_ERROR;
|
||||
}
|
||||
strncpy(tmp, range + 6, sizeof(tmp) - 1);
|
||||
char *start = tmp;
|
||||
char *end = tmp;
|
||||
|
||||
while (*end != '-' && *end) {
|
||||
end++;
|
||||
}
|
||||
|
||||
if (*end == '-') {
|
||||
*end = '\0';
|
||||
end++;
|
||||
|
||||
size_t starts;
|
||||
starts = atol(start);
|
||||
if (*end) {
|
||||
// %d-%d
|
||||
ends = atol(end);
|
||||
} else {
|
||||
// %d-
|
||||
ends = MIN(starts + CHUNK_SIZE, length);
|
||||
}
|
||||
if (ends > length || starts >= length || starts < 0) {
|
||||
close(fd);
|
||||
return OCS_INTERNAL_ERROR;
|
||||
}
|
||||
length = ends - starts;
|
||||
|
||||
if (starts != 0) {
|
||||
lseek(fd, starts, SEEK_SET);
|
||||
}
|
||||
snprintf(tmp, sizeof(tmp), "bytes %ld-%ld/%ld",
|
||||
starts, ends - 1, st.st_size);
|
||||
onion_response_set_header(res, "Content-Range", tmp);
|
||||
}
|
||||
}
|
||||
onion_response_set_length(res, length);
|
||||
if (mime != NULL) {
|
||||
onion_response_set_header(res, "Content-Type", mime);
|
||||
} else {
|
||||
onion_response_set_header(res, "Content-Type", "application/octet-stream");
|
||||
}
|
||||
|
||||
onion_response_write_headers(res);
|
||||
if ((onion_request_get_flags(request) & OR_HEAD) == OR_HEAD) {
|
||||
length = 0;
|
||||
}
|
||||
|
||||
if (length) {
|
||||
int bytes_read = 0, bytes_written;
|
||||
size_t total_read = 0;
|
||||
char buf[4046];
|
||||
if (length > sizeof(buf)) {
|
||||
size_t max = length - sizeof(buf);
|
||||
while (total_read < max) {
|
||||
bytes_read = read(fd, buf, sizeof(buf));
|
||||
if (bytes_read < 0) {
|
||||
break;
|
||||
}
|
||||
total_read += bytes_read;
|
||||
bytes_written = onion_response_write(res, buf, bytes_read);
|
||||
if (bytes_written != bytes_read) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (sizeof(buf) >= (length - total_read)) {
|
||||
bytes_read = read(fd, buf, length - total_read);
|
||||
onion_response_write(res, buf, bytes_read);
|
||||
}
|
||||
}
|
||||
close(fd);
|
||||
return OCS_PROCESSED;
|
||||
}
|
||||
|
||||
void search(struct mg_connection *nc, struct http_message *hm) {
|
||||
|
||||
if (hm->body.len == 0) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user