mirror of
https://github.com/simon987/sist2.git
synced 2025-12-12 23:18:51 +00:00
2.0 (#46)
* extract scan code to libscan, (wip) * submodules * replace curl with mongoose (wip) * replace onion with mongoose (wip) * replace onion with mongoose (wip) * It compiles! (I think) * Update readme * Entirely remove libonion (WIP) * unscramble submodules * recover screenshot * Update mappings * Bug fixes * update * media meta fix * memory fixes * More bug fixes... * Bug fix w/ libmagic & vfile * libmagic fix (again) * Better lightbox, better video handler, random reloads fix * Use svg for info icon * re-enable http auth * mobi support #41, fix logs * Update README & cleanup
This commit is contained in:
@@ -1,155 +0,0 @@
|
||||
#include "arc.h"
|
||||
#include "src/ctx.h"
|
||||
|
||||
int should_parse_filtered_file(const char *filepath, int ext) {
|
||||
char tmp[PATH_MAX * 2];
|
||||
|
||||
if (ext == 0) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
memcpy(tmp, filepath, ext - 1);
|
||||
*(tmp + ext - 1) = '\0';
|
||||
|
||||
char *idx = strrchr(tmp, '.');
|
||||
|
||||
if (idx == NULL) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if (strcmp(idx, ".tar") == 0) {
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
int arc_read(struct vfile *f, void *buf, size_t size) {
|
||||
return archive_read_data(f->arc, buf, size);
|
||||
}
|
||||
|
||||
typedef struct arc_data {
|
||||
vfile_t *f;
|
||||
char buf[ARC_BUF_SIZE];
|
||||
} arc_data_f;
|
||||
|
||||
int vfile_open_callback(struct archive *a, void *user_data) {
|
||||
arc_data_f *data = user_data;
|
||||
|
||||
if (data->f->is_fs_file && data->f->fd == -1) {
|
||||
data->f->fd = open(data->f->filepath, O_RDONLY);
|
||||
}
|
||||
|
||||
return ARCHIVE_OK;
|
||||
}
|
||||
|
||||
long vfile_read_callback(struct archive *a, void *user_data, const void **buf) {
|
||||
arc_data_f *data = user_data;
|
||||
|
||||
*buf = data->buf;
|
||||
return data->f->read(data->f, data->buf, ARC_BUF_SIZE);
|
||||
}
|
||||
|
||||
int vfile_close_callback(struct archive *a, void *user_data) {
|
||||
arc_data_f *data = user_data;
|
||||
|
||||
if (data->f->close != NULL) {
|
||||
data->f->close(data->f);
|
||||
}
|
||||
|
||||
return ARCHIVE_OK;
|
||||
}
|
||||
|
||||
void parse_archive(vfile_t *f, document_t *doc) {
|
||||
|
||||
struct archive *a;
|
||||
struct archive_entry *entry;
|
||||
|
||||
|
||||
arc_data_f data;
|
||||
data.f = f;
|
||||
|
||||
int ret = 0;
|
||||
if (data.f->is_fs_file) {
|
||||
|
||||
a = archive_read_new();
|
||||
archive_read_support_filter_all(a);
|
||||
archive_read_support_format_all(a);
|
||||
|
||||
ret = archive_read_open_filename(a, doc->filepath, ARC_BUF_SIZE);
|
||||
} else if (ScanCtx.archive_mode == ARC_MODE_RECURSE) {
|
||||
|
||||
a = archive_read_new();
|
||||
archive_read_support_filter_all(a);
|
||||
archive_read_support_format_all(a);
|
||||
|
||||
ret = archive_read_open(
|
||||
a, &data,
|
||||
vfile_open_callback,
|
||||
vfile_read_callback,
|
||||
vfile_close_callback
|
||||
);
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
|
||||
if (ret != ARCHIVE_OK) {
|
||||
LOG_ERRORF(doc->filepath, "(arc.c) [%d] %s", ret, archive_error_string(a))
|
||||
archive_read_free(a);
|
||||
return;
|
||||
}
|
||||
|
||||
if (ScanCtx.archive_mode == ARC_MODE_LIST) {
|
||||
|
||||
dyn_buffer_t buf = dyn_buffer_create();
|
||||
|
||||
while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
|
||||
if (S_ISREG(archive_entry_stat(entry)->st_mode)) {
|
||||
|
||||
char *path = (char *) archive_entry_pathname(entry);
|
||||
|
||||
dyn_buffer_append_string(&buf, path);
|
||||
dyn_buffer_write_char(&buf, '\n');
|
||||
}
|
||||
}
|
||||
dyn_buffer_write_char(&buf, '\0');
|
||||
|
||||
meta_line_t *meta_list = malloc(sizeof(meta_line_t) + buf.cur);
|
||||
meta_list->key = MetaContent;
|
||||
strcpy(meta_list->strval, buf.buf);
|
||||
APPEND_META(doc, meta_list);
|
||||
dyn_buffer_destroy(&buf);
|
||||
|
||||
} else {
|
||||
|
||||
parse_job_t *sub_job = malloc(sizeof(parse_job_t) + PATH_MAX * 2);
|
||||
|
||||
sub_job->vfile.close = NULL;
|
||||
sub_job->vfile.read = arc_read;
|
||||
sub_job->vfile.arc = a;
|
||||
sub_job->vfile.filepath = sub_job->filepath;
|
||||
sub_job->vfile.is_fs_file = FALSE;
|
||||
memcpy(sub_job->parent, doc->uuid, sizeof(uuid_t));
|
||||
|
||||
while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
|
||||
sub_job->info = *archive_entry_stat(entry);
|
||||
if (S_ISREG(sub_job->info.st_mode)) {
|
||||
sprintf(sub_job->filepath, "%s#/%s", f->filepath, archive_entry_pathname(entry));
|
||||
sub_job->base = (int) (strrchr(sub_job->filepath, '/') - sub_job->filepath) + 1;
|
||||
|
||||
char *p = strrchr(sub_job->filepath, '.');
|
||||
if (p != NULL) {
|
||||
sub_job->ext = (int) (p - sub_job->filepath + 1);
|
||||
} else {
|
||||
sub_job->ext = (int) strlen(sub_job->filepath);
|
||||
}
|
||||
|
||||
parse(sub_job);
|
||||
}
|
||||
}
|
||||
|
||||
free(sub_job);
|
||||
}
|
||||
|
||||
archive_read_free(a);
|
||||
}
|
||||
@@ -1,13 +0,0 @@
|
||||
#ifndef SIST2_ARC_H
|
||||
#define SIST2_ARC_H
|
||||
|
||||
#include "src/sist.h"
|
||||
#define ARC_BUF_SIZE 8192
|
||||
|
||||
int should_parse_filtered_file(const char *filepath, int ext);
|
||||
|
||||
void parse_archive(vfile_t *f, document_t *doc);
|
||||
|
||||
int arc_read(struct vfile * f, void *buf, size_t size);
|
||||
|
||||
#endif
|
||||
@@ -1,52 +0,0 @@
|
||||
#include "cbr.h"
|
||||
#include "src/ctx.h"
|
||||
|
||||
unsigned int cbr_mime;
|
||||
unsigned int cbz_mime;
|
||||
|
||||
void cbr_init() {
|
||||
cbr_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbr");
|
||||
cbz_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbz");
|
||||
}
|
||||
|
||||
int is_cbr(unsigned int mime) {
|
||||
return mime == cbr_mime;
|
||||
}
|
||||
|
||||
void parse_cbr(void *buf, size_t buf_len, document_t *doc) {
|
||||
char *out_buf = malloc(buf_len * 2);
|
||||
size_t out_buf_used = 0;
|
||||
|
||||
struct archive *rar_in = archive_read_new();
|
||||
archive_read_support_filter_none(rar_in);
|
||||
archive_read_support_format_rar(rar_in);
|
||||
|
||||
archive_read_open_memory(rar_in, buf, buf_len);
|
||||
|
||||
struct archive *zip_out = archive_write_new();
|
||||
archive_write_set_format_zip(zip_out);
|
||||
archive_write_open_memory(zip_out, out_buf, buf_len * 2, &out_buf_used);
|
||||
|
||||
struct archive_entry *entry;
|
||||
while (archive_read_next_header(rar_in, &entry) == ARCHIVE_OK) {
|
||||
archive_write_header(zip_out, entry);
|
||||
|
||||
char arc_buf[ARC_BUF_SIZE];
|
||||
int len = archive_read_data(rar_in, arc_buf, ARC_BUF_SIZE);
|
||||
while (len > 0) {
|
||||
archive_write_data(zip_out, arc_buf, len);
|
||||
len = archive_read_data(rar_in, arc_buf, ARC_BUF_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
archive_write_close(zip_out);
|
||||
archive_write_free(zip_out);
|
||||
|
||||
archive_read_close(rar_in);
|
||||
archive_read_free(rar_in);
|
||||
|
||||
doc->mime = cbz_mime;
|
||||
parse_pdf(out_buf, out_buf_used, doc);
|
||||
doc->mime = cbr_mime;
|
||||
free(out_buf);
|
||||
}
|
||||
@@ -1,12 +0,0 @@
|
||||
#ifndef SIST2_CBR_H
|
||||
#define SIST2_CBR_H
|
||||
|
||||
#include "src/sist.h"
|
||||
|
||||
void cbr_init();
|
||||
|
||||
int is_cbr(unsigned int mime);
|
||||
|
||||
void parse_cbr(void *buf, size_t buf_len, document_t *doc);
|
||||
|
||||
#endif
|
||||
@@ -1,141 +0,0 @@
|
||||
#include "doc.h"
|
||||
#include "src/ctx.h"
|
||||
|
||||
|
||||
#define STR_STARTS_WITH(x, y) (strncmp(y, x, sizeof(y) - 1) == 0)
|
||||
|
||||
__always_inline
|
||||
static int should_read_part(const char *part) {
|
||||
|
||||
LOG_DEBUGF("doc.c", "Got part : %s", part)
|
||||
|
||||
if (part == NULL) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if ( // Word
|
||||
STR_STARTS_WITH(part, "word/document.xml")
|
||||
|| STR_STARTS_WITH(part, "word/footnotes.xml")
|
||||
|| STR_STARTS_WITH(part, "word/endnotes.xml")
|
||||
|| STR_STARTS_WITH(part, "word/footer")
|
||||
|| STR_STARTS_WITH(part, "word/header")
|
||||
// PowerPoint
|
||||
|| STR_STARTS_WITH(part, "ppt/slides/slide")
|
||||
|| STR_STARTS_WITH(part, "ppt/notesSlides/slide")
|
||||
// Excel
|
||||
|| STR_STARTS_WITH(part, "xl/worksheets/sheet")
|
||||
|| STR_STARTS_WITH(part, "xl/sharedStrings.xml")
|
||||
|| STR_STARTS_WITH(part, "xl/workbook.xml")
|
||||
) {
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
int extract_text(xmlDoc *xml, xmlNode *node, text_buffer_t *buf) {
|
||||
//TODO: Check which nodes are likely to have a 't' child, and ignore nodes that aren't
|
||||
xmlErrorPtr err = xmlGetLastError();
|
||||
if (err != NULL) {
|
||||
if (err->level == XML_ERR_FATAL) {
|
||||
LOG_ERRORF("doc.c", "Got fatal XML error while parsing document: %s", err->message)
|
||||
return -1;
|
||||
} else {
|
||||
LOG_ERRORF("doc.c", "Got recoverable XML error while parsing document: %s", err->message)
|
||||
}
|
||||
}
|
||||
|
||||
for (xmlNode *child = node; child; child = child->next) {
|
||||
if (*child->name == 't' && *(child->name + 1) == '\0') {
|
||||
xmlChar *text = xmlNodeListGetString(xml, child->xmlChildrenNode, 1);
|
||||
|
||||
if (text) {
|
||||
text_buffer_append_string0(buf, (char *) text);
|
||||
text_buffer_append_char(buf, ' ');
|
||||
xmlFree(text);
|
||||
}
|
||||
}
|
||||
|
||||
extract_text(xml, child->children, buf);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int xml_io_read(void *context, char *buffer, int len) {
|
||||
struct archive *a = context;
|
||||
return archive_read_data(a, buffer, len);
|
||||
}
|
||||
|
||||
int xml_io_close(UNUSED(void *context)) {
|
||||
//noop
|
||||
return 0;
|
||||
}
|
||||
|
||||
__always_inline
|
||||
static int read_part(struct archive *a, text_buffer_t *buf, document_t *doc) {
|
||||
|
||||
xmlDoc *xml = xmlReadIO(xml_io_read, xml_io_close, a, "/", NULL, XML_PARSE_RECOVER | XML_PARSE_NOWARNING | XML_PARSE_NOERROR | XML_PARSE_NONET);
|
||||
|
||||
if (xml == NULL) {
|
||||
LOG_ERROR(doc->filepath, "Could not parse XML")
|
||||
return -1;
|
||||
}
|
||||
|
||||
xmlNode *root = xmlDocGetRootElement(xml);
|
||||
if (root == NULL) {
|
||||
LOG_ERROR(doc->filepath, "Empty document")
|
||||
xmlFreeDoc(xml);
|
||||
return -1;
|
||||
}
|
||||
|
||||
extract_text(xml, root, buf);
|
||||
xmlFreeDoc(xml);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void parse_doc(void *mem, size_t mem_len, document_t *doc) {
|
||||
|
||||
if (mem == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
struct archive *a = archive_read_new();
|
||||
archive_read_support_format_zip(a);
|
||||
|
||||
int ret = archive_read_open_memory(a, mem, mem_len);
|
||||
if (ret != ARCHIVE_OK) {
|
||||
LOG_ERRORF(doc->filepath, "Could not read archive: %s", archive_error_string(a))
|
||||
archive_read_free(a);
|
||||
return;
|
||||
}
|
||||
|
||||
text_buffer_t buf = text_buffer_create(ScanCtx.content_size);
|
||||
|
||||
struct archive_entry *entry;
|
||||
while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
|
||||
if (S_ISREG(archive_entry_stat(entry)->st_mode)) {
|
||||
const char *path = archive_entry_pathname(entry);
|
||||
|
||||
if (should_read_part(path)) {
|
||||
ret = read_part(a, &buf, doc);
|
||||
if (ret != 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (buf.dyn_buffer.cur > 0) {
|
||||
text_buffer_terminate_string(&buf);
|
||||
|
||||
meta_line_t *meta = malloc(sizeof(meta_line_t) + buf.dyn_buffer.cur);
|
||||
meta->key = MetaContent;
|
||||
strcpy(meta->strval, buf.dyn_buffer.buf);
|
||||
APPEND_META(doc, meta)
|
||||
}
|
||||
|
||||
archive_read_close(a);
|
||||
archive_read_free(a);
|
||||
text_buffer_destroy(&buf);
|
||||
}
|
||||
@@ -1,8 +0,0 @@
|
||||
#ifndef SIST2_DOC_H
|
||||
#define SIST2_DOC_H
|
||||
|
||||
#include "src/sist.h"
|
||||
|
||||
void parse_doc(void *buf, size_t buf_len, document_t *doc);
|
||||
|
||||
#endif
|
||||
@@ -1,233 +0,0 @@
|
||||
#include "font.h"
|
||||
|
||||
|
||||
#include "src/ctx.h"
|
||||
|
||||
__thread FT_Library ft_lib = NULL;
|
||||
|
||||
|
||||
typedef struct text_dimensions {
|
||||
unsigned int width;
|
||||
unsigned int height;
|
||||
unsigned int baseline;
|
||||
} text_dimensions_t;
|
||||
|
||||
typedef struct glyph {
|
||||
int top;
|
||||
int height;
|
||||
int width;
|
||||
int descent;
|
||||
int ascent;
|
||||
int advance_width;
|
||||
unsigned char *pixmap;
|
||||
} glyph_t;
|
||||
|
||||
|
||||
__always_inline
|
||||
int kerning_offset(char c, char pc, FT_Face face) {
|
||||
FT_Vector kerning;
|
||||
FT_Get_Kerning(face, c, pc, FT_KERNING_DEFAULT, &kerning);
|
||||
|
||||
return (int) (kerning.x / 64);
|
||||
}
|
||||
|
||||
__always_inline
|
||||
glyph_t ft_glyph_to_glyph(FT_GlyphSlot slot) {
|
||||
glyph_t glyph;
|
||||
|
||||
glyph.pixmap = slot->bitmap.buffer;
|
||||
|
||||
glyph.width = (int) slot->bitmap.width;
|
||||
glyph.height = (int) slot->bitmap.rows;
|
||||
glyph.top = slot->bitmap_top;
|
||||
glyph.advance_width = (int) slot->advance.x / 64;
|
||||
|
||||
glyph.descent = MAX(0, glyph.height - glyph.top);
|
||||
glyph.ascent = MAX(0, MAX(glyph.top, glyph.height) - glyph.descent);
|
||||
|
||||
return glyph;
|
||||
}
|
||||
|
||||
text_dimensions_t text_dimension(char *text, FT_Face face) {
|
||||
text_dimensions_t dimensions;
|
||||
|
||||
dimensions.width = 0;
|
||||
|
||||
int num_chars = (int) strlen(text);
|
||||
|
||||
unsigned int max_ascent = 0;
|
||||
int max_descent = 0;
|
||||
|
||||
char pc = 0;
|
||||
for (int i = 0; i < num_chars; i++) {
|
||||
char c = text[i];
|
||||
|
||||
FT_Load_Char(face, c, 0);
|
||||
glyph_t glyph = ft_glyph_to_glyph(face->glyph);
|
||||
|
||||
max_descent = MAX(max_descent, glyph.descent);
|
||||
max_ascent = MAX(max_ascent, MAX(glyph.height, glyph.ascent));
|
||||
|
||||
int kerning_x = kerning_offset(c, pc, face);
|
||||
dimensions.width += MAX(glyph.advance_width, glyph.width) + kerning_x;
|
||||
|
||||
pc = c;
|
||||
}
|
||||
|
||||
dimensions.height = max_ascent + max_descent;
|
||||
dimensions.baseline = max_descent;
|
||||
|
||||
return dimensions;
|
||||
}
|
||||
|
||||
void draw_glyph(glyph_t *glyph, int x, int y, struct text_dimensions text_info, unsigned char *bitmap) {
|
||||
unsigned int src = 0;
|
||||
unsigned int dst = y * text_info.width + x;
|
||||
unsigned int row_offset = text_info.width - glyph->width;
|
||||
unsigned int buf_len = text_info.width * text_info.height;
|
||||
|
||||
for (unsigned int sy = 0; sy < glyph->height; sy++) {
|
||||
for (unsigned int sx = 0; sx < glyph->width; sx++) {
|
||||
if (dst < buf_len) {
|
||||
bitmap[dst] |= glyph->pixmap[src];
|
||||
}
|
||||
src++;
|
||||
dst++;
|
||||
}
|
||||
dst += row_offset;
|
||||
}
|
||||
}
|
||||
|
||||
void bmp_format(dyn_buffer_t *buf, text_dimensions_t dimensions, const unsigned char *bitmap) {
|
||||
|
||||
dyn_buffer_write_short(buf, 0x4D42); // Magic
|
||||
dyn_buffer_write_int(buf, 0); // Size placeholder
|
||||
dyn_buffer_write_int(buf, 0x5157); //Reserved
|
||||
dyn_buffer_write_int(buf, 14 + 40 + 256 * 4); // pixels offset
|
||||
|
||||
dyn_buffer_write_int(buf, 40); // DIB size
|
||||
dyn_buffer_write_int(buf, (int) dimensions.width);
|
||||
dyn_buffer_write_int(buf, (int) dimensions.height);
|
||||
dyn_buffer_write_short(buf, 1); // Color planes
|
||||
dyn_buffer_write_short(buf, 8); // bits per pixel
|
||||
dyn_buffer_write_int(buf, 0); // compression
|
||||
dyn_buffer_write_int(buf, 0); // Ignored
|
||||
dyn_buffer_write_int(buf, 3800); // hres
|
||||
dyn_buffer_write_int(buf, 3800); // vres
|
||||
dyn_buffer_write_int(buf, 256); // Color count
|
||||
dyn_buffer_write_int(buf, 0); // Ignored
|
||||
|
||||
// RGBA32 Color table (Grayscale)
|
||||
for (int i = 255; i >= 0; i--) {
|
||||
dyn_buffer_write_int(buf, i + (i << 8) + (i << 16));
|
||||
}
|
||||
|
||||
// Pixel array: write from bottom to top, with rows padded to multiples of 4-bytes
|
||||
for (int y = (int) dimensions.height - 1; y >= 0; y--) {
|
||||
for (unsigned int x = 0; x < dimensions.width; x++) {
|
||||
dyn_buffer_write_char(buf, (char) bitmap[y * dimensions.width + x]);
|
||||
}
|
||||
while (buf->cur % 4 != 0) {
|
||||
dyn_buffer_write_char(buf, 0);
|
||||
}
|
||||
}
|
||||
|
||||
// Size
|
||||
*(int *) ((char *) buf->buf + 2) = buf->cur;
|
||||
}
|
||||
|
||||
void parse_font(const char *buf, size_t buf_len, document_t *doc) {
|
||||
if (ft_lib == NULL) {
|
||||
FT_Init_FreeType(&ft_lib);
|
||||
}
|
||||
|
||||
if (buf == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
FT_Face face;
|
||||
FT_Error err = FT_New_Memory_Face(ft_lib, (unsigned char *) buf, buf_len, 0, &face);
|
||||
if (err != 0) {
|
||||
LOG_ERRORF(doc->filepath, "(font.c) FT_New_Memory_Face() returned error code [%d] %s", err, ft_error_string(err));
|
||||
return;
|
||||
}
|
||||
|
||||
char font_name[1024];
|
||||
|
||||
if (face->style_name == NULL || *(face->style_name) == '?') {
|
||||
if (face->family_name == NULL) {
|
||||
strcpy(font_name, "(null)");
|
||||
} else {
|
||||
strcpy(font_name, face->family_name);
|
||||
}
|
||||
} else {
|
||||
snprintf(font_name, sizeof(font_name), "%s %s", face->family_name, face->style_name);
|
||||
}
|
||||
|
||||
meta_line_t *meta_name = malloc(sizeof(meta_line_t) + strlen(font_name));
|
||||
meta_name->key = MetaFontName;
|
||||
strcpy(meta_name->strval, font_name);
|
||||
APPEND_META(doc, meta_name)
|
||||
|
||||
if (ScanCtx.tn_size <= 0) {
|
||||
FT_Done_Face(face);
|
||||
return;
|
||||
}
|
||||
|
||||
int pixel = 64;
|
||||
int num_chars = (int) strlen(font_name);
|
||||
|
||||
err = FT_Set_Pixel_Sizes(face, 0, pixel);
|
||||
if (err != 0) {
|
||||
LOG_WARNINGF(doc->filepath, "(font.c) FT_Set_Pixel_Sizes() returned error code [%d] %s", err, ft_error_string(err))
|
||||
FT_Done_Face(face);
|
||||
return;
|
||||
}
|
||||
|
||||
text_dimensions_t dimensions = text_dimension(font_name, face);
|
||||
unsigned char *bitmap = calloc(dimensions.width * dimensions.height, 1);
|
||||
|
||||
FT_Vector pen;
|
||||
pen.x = 0;
|
||||
|
||||
char pc = 0;
|
||||
for (int i = 0; i < num_chars; i++) {
|
||||
char c = font_name[i];
|
||||
|
||||
err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER);
|
||||
if (err != 0) {
|
||||
c = c >= 'a' && c <= 'z' ? c - 32 : c + 32;
|
||||
err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER);
|
||||
if (err != 0) {
|
||||
LOG_WARNINGF(doc->filepath, "(font.c) FT_Load_Char() returned error code [%d] %s", err, ft_error_string(err));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
glyph_t glyph = ft_glyph_to_glyph(face->glyph);
|
||||
|
||||
pen.x += kerning_offset(c, pc, face);
|
||||
if (pen.x <= 0) {
|
||||
pen.x = ABS(glyph.advance_width - glyph.width);
|
||||
}
|
||||
pen.y = dimensions.height - glyph.ascent - dimensions.baseline;
|
||||
|
||||
draw_glyph(&glyph, pen.x, pen.y, dimensions, bitmap);
|
||||
|
||||
pen.x += glyph.advance_width;
|
||||
pc = c;
|
||||
}
|
||||
|
||||
dyn_buffer_t bmp_data = dyn_buffer_create();
|
||||
bmp_format(&bmp_data, dimensions, bitmap);
|
||||
|
||||
store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) bmp_data.buf, bmp_data.cur);
|
||||
|
||||
dyn_buffer_destroy(&bmp_data);
|
||||
free(bitmap);
|
||||
|
||||
FT_Done_Face(face);
|
||||
}
|
||||
|
||||
void cleanup_font() {
|
||||
FT_Done_FreeType(ft_lib);
|
||||
}
|
||||
@@ -1,10 +0,0 @@
|
||||
#ifndef SIST2_FONT_H
|
||||
#define SIST2_FONT_H
|
||||
|
||||
#include "src/sist.h"
|
||||
|
||||
|
||||
void parse_font(const char * buf, size_t buf_len, document_t *doc);
|
||||
void cleanup_font();
|
||||
|
||||
#endif
|
||||
@@ -1,402 +0,0 @@
|
||||
#include "src/sist.h"
|
||||
#include "src/ctx.h"
|
||||
|
||||
#define MIN_SIZE 32
|
||||
#define AVIO_BUF_SIZE 8192
|
||||
|
||||
__always_inline
|
||||
static AVCodecContext *alloc_jpeg_encoder(int dstW, int dstH, float qscale) {
|
||||
|
||||
AVCodec *jpeg_codec = avcodec_find_encoder(AV_CODEC_ID_MJPEG);
|
||||
AVCodecContext *jpeg = avcodec_alloc_context3(jpeg_codec);
|
||||
jpeg->width = dstW;
|
||||
jpeg->height = dstH;
|
||||
jpeg->time_base.den = 1000000;
|
||||
jpeg->time_base.num = 1;
|
||||
jpeg->i_quant_factor = qscale;
|
||||
|
||||
jpeg->pix_fmt = AV_PIX_FMT_YUVJ420P;
|
||||
int ret = avcodec_open2(jpeg, jpeg_codec, NULL);
|
||||
|
||||
if (ret != 0) {
|
||||
printf("Could not open jpeg encoder: %s!\n", av_err2str(ret));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return jpeg;
|
||||
}
|
||||
|
||||
__always_inline
|
||||
AVFrame *scale_frame(const AVCodecContext *decoder, const AVFrame *frame, int size) {
|
||||
|
||||
int dstW;
|
||||
int dstH;
|
||||
if (frame->width <= size && frame->height <= size) {
|
||||
dstW = frame->width;
|
||||
dstH = frame->height;
|
||||
} else {
|
||||
double ratio = (double) frame->width / frame->height;
|
||||
if (frame->width > frame->height) {
|
||||
dstW = size;
|
||||
dstH = (int) (size / ratio);
|
||||
} else {
|
||||
dstW = (int) (size * ratio);
|
||||
dstH = size;
|
||||
}
|
||||
}
|
||||
|
||||
if (dstW <= MIN_SIZE || dstH <= MIN_SIZE) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
AVFrame *scaled_frame = av_frame_alloc();
|
||||
|
||||
struct SwsContext *ctx = sws_getContext(
|
||||
decoder->width, decoder->height, decoder->pix_fmt,
|
||||
dstW, dstH, AV_PIX_FMT_YUVJ420P,
|
||||
SWS_FAST_BILINEAR, 0, 0, 0
|
||||
);
|
||||
|
||||
int dst_buf_len = av_image_get_buffer_size(AV_PIX_FMT_YUV420P, dstW, dstH, 1);
|
||||
uint8_t *dst_buf = (uint8_t *) av_malloc(dst_buf_len);
|
||||
|
||||
av_image_fill_arrays(scaled_frame->data, scaled_frame->linesize, dst_buf, AV_PIX_FMT_YUV420P, dstW, dstH, 1);
|
||||
|
||||
sws_scale(ctx,
|
||||
(const uint8_t *const *) frame->data, frame->linesize,
|
||||
0, decoder->height,
|
||||
scaled_frame->data, scaled_frame->linesize
|
||||
);
|
||||
|
||||
scaled_frame->width = dstW;
|
||||
scaled_frame->height = dstH;
|
||||
scaled_frame->format = AV_PIX_FMT_YUV420P;
|
||||
|
||||
sws_freeContext(ctx);
|
||||
|
||||
return scaled_frame;
|
||||
}
|
||||
|
||||
__always_inline
|
||||
static AVFrame *read_frame(AVFormatContext *pFormatCtx, AVCodecContext *decoder, int stream_idx, document_t *doc) {
|
||||
AVFrame *frame = av_frame_alloc();
|
||||
|
||||
AVPacket avPacket;
|
||||
av_init_packet(&avPacket);
|
||||
|
||||
int receive_ret = -EAGAIN;
|
||||
while (receive_ret == -EAGAIN) {
|
||||
// Get video frame
|
||||
while (1) {
|
||||
int read_frame_ret = av_read_frame(pFormatCtx, &avPacket);
|
||||
|
||||
if (read_frame_ret != 0) {
|
||||
if (read_frame_ret != AVERROR_EOF) {
|
||||
LOG_WARNINGF(doc->filepath,
|
||||
"(media.c) avcodec_read_frame() returned error code [%d] %s",
|
||||
read_frame_ret, av_err2str(read_frame_ret)
|
||||
)
|
||||
}
|
||||
av_frame_free(&frame);
|
||||
av_packet_unref(&avPacket);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
//Ignore audio/other frames
|
||||
if (avPacket.stream_index != stream_idx) {
|
||||
av_packet_unref(&avPacket);
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Feed it to decoder
|
||||
int decode_ret = avcodec_send_packet(decoder, &avPacket);
|
||||
if (decode_ret != 0) {
|
||||
LOG_ERRORF(doc->filepath,
|
||||
"(media.c) avcodec_send_packet() returned error code [%d] %s",
|
||||
decode_ret, av_err2str(decode_ret)
|
||||
)
|
||||
av_frame_free(&frame);
|
||||
av_packet_unref(&avPacket);
|
||||
return NULL;
|
||||
}
|
||||
av_packet_unref(&avPacket);
|
||||
receive_ret = avcodec_receive_frame(decoder, frame);
|
||||
}
|
||||
return frame;
|
||||
}
|
||||
|
||||
#define APPEND_TAG_META(doc, tag_, keyname) \
|
||||
text_buffer_t tex = text_buffer_create(-1); \
|
||||
text_buffer_append_string0(&tex, tag_->value); \
|
||||
text_buffer_terminate_string(&tex); \
|
||||
meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur); \
|
||||
meta_tag->key = keyname; \
|
||||
strcpy(meta_tag->strval, tex.dyn_buffer.buf); \
|
||||
APPEND_META(doc, meta_tag) \
|
||||
text_buffer_destroy(&tex);
|
||||
|
||||
__always_inline
|
||||
static void append_audio_meta(AVFormatContext *pFormatCtx, document_t *doc) {
|
||||
|
||||
AVDictionaryEntry *tag = NULL;
|
||||
while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
|
||||
char key[256];
|
||||
strncpy(key, tag->key, sizeof(key));
|
||||
|
||||
char *ptr = key;
|
||||
for (; *ptr; ++ptr) *ptr = (char) tolower(*ptr);
|
||||
|
||||
if (strcmp(key, "artist") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaArtist)
|
||||
} else if (strcmp(key, "genre") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaGenre)
|
||||
} else if (strcmp(key, "title") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaTitle)
|
||||
} else if (strcmp(key, "album_artist") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaAlbumArtist)
|
||||
} else if (strcmp(key, "album") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaAlbum)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__always_inline
|
||||
static void
|
||||
append_video_meta(AVFormatContext *pFormatCtx, AVFrame *frame, document_t *doc, int include_audio_tags, int is_video) {
|
||||
|
||||
if (is_video) {
|
||||
meta_line_t *meta_duration = malloc(sizeof(meta_line_t));
|
||||
meta_duration->key = MetaMediaDuration;
|
||||
meta_duration->longval = pFormatCtx->duration / AV_TIME_BASE;
|
||||
APPEND_META(doc, meta_duration)
|
||||
|
||||
meta_line_t *meta_bitrate = malloc(sizeof(meta_line_t));
|
||||
meta_bitrate->key = MetaMediaBitrate;
|
||||
meta_bitrate->longval = pFormatCtx->bit_rate;
|
||||
APPEND_META(doc, meta_bitrate)
|
||||
}
|
||||
|
||||
AVDictionaryEntry *tag = NULL;
|
||||
if (is_video) {
|
||||
while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
|
||||
if (include_audio_tags && strcmp(tag->key, "title") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaTitle)
|
||||
} else if (strcmp(tag->key, "comment") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaContent)
|
||||
} else if (include_audio_tags && strcmp(tag->key, "artist") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaArtist)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// EXIF metadata
|
||||
while ((tag = av_dict_get(frame->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
|
||||
if (include_audio_tags && strcmp(tag->key, "Artist") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaArtist)
|
||||
} else if (strcmp(tag->key, "ImageDescription") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaContent)
|
||||
} else if (strcmp(tag->key, "Make") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaExifMake)
|
||||
} else if (strcmp(tag->key, "Model") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaExifModel)
|
||||
} else if (strcmp(tag->key, "Software") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaExifSoftware)
|
||||
} else if (strcmp(tag->key, "FNumber") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaExifFNumber)
|
||||
} else if (strcmp(tag->key, "FocalLength") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaExifFocalLength)
|
||||
} else if (strcmp(tag->key, "UserComment") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaExifUserComment)
|
||||
} else if (strcmp(tag->key, "ISOSpeedRatings") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaExifIsoSpeedRatings)
|
||||
} else if (strcmp(tag->key, "ExposureTime") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaExifExposureTime)
|
||||
} else if (strcmp(tag->key, "DateTime") == 0) {
|
||||
APPEND_TAG_META(doc, tag, MetaExifDateTime)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void parse_media(AVFormatContext *pFormatCtx, document_t *doc) {
|
||||
|
||||
int video_stream = -1;
|
||||
int audio_stream = -1;
|
||||
|
||||
avformat_find_stream_info(pFormatCtx, NULL);
|
||||
|
||||
for (int i = (int) pFormatCtx->nb_streams - 1; i >= 0; i--) {
|
||||
AVStream *stream = pFormatCtx->streams[i];
|
||||
|
||||
if (stream->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
|
||||
if (audio_stream == -1) {
|
||||
meta_line_t *meta_audio = malloc(sizeof(meta_line_t));
|
||||
meta_audio->key = MetaMediaAudioCodec;
|
||||
meta_audio->intval = stream->codecpar->codec_id;
|
||||
APPEND_META(doc, meta_audio)
|
||||
|
||||
append_audio_meta(pFormatCtx, doc);
|
||||
audio_stream = i;
|
||||
}
|
||||
} else if (stream->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
|
||||
|
||||
if (video_stream == -1) {
|
||||
meta_line_t *meta_vid = malloc(sizeof(meta_line_t));
|
||||
meta_vid->key = MetaMediaVideoCodec;
|
||||
meta_vid->intval = stream->codecpar->codec_id;
|
||||
APPEND_META(doc, meta_vid)
|
||||
|
||||
meta_line_t *meta_w = malloc(sizeof(meta_line_t));
|
||||
meta_w->key = MetaWidth;
|
||||
meta_w->intval = stream->codecpar->width;
|
||||
APPEND_META(doc, meta_w)
|
||||
|
||||
meta_line_t *meta_h = malloc(sizeof(meta_line_t));
|
||||
meta_h->key = MetaHeight;
|
||||
meta_h->intval = stream->codecpar->height;
|
||||
APPEND_META(doc, meta_h)
|
||||
|
||||
video_stream = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (video_stream != -1 && ScanCtx.tn_size > 0) {
|
||||
AVStream *stream = pFormatCtx->streams[video_stream];
|
||||
|
||||
if (stream->codecpar->width <= MIN_SIZE || stream->codecpar->height <= MIN_SIZE) {
|
||||
avformat_close_input(&pFormatCtx);
|
||||
avformat_free_context(pFormatCtx);
|
||||
return;
|
||||
}
|
||||
|
||||
// Decoder
|
||||
AVCodec *video_codec = avcodec_find_decoder(stream->codecpar->codec_id);
|
||||
AVCodecContext *decoder = avcodec_alloc_context3(video_codec);
|
||||
avcodec_parameters_to_context(decoder, stream->codecpar);
|
||||
avcodec_open2(decoder, video_codec, NULL);
|
||||
|
||||
//Seek
|
||||
if (stream->nb_frames > 1 && stream->codecpar->codec_id != AV_CODEC_ID_GIF) {
|
||||
int seek_ret = 0;
|
||||
for (int i = 20; i >= 0; i--) {
|
||||
seek_ret = av_seek_frame(pFormatCtx, video_stream,
|
||||
stream->duration * 0.10, 0);
|
||||
if (seek_ret == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
AVFrame *frame = read_frame(pFormatCtx, decoder, video_stream, doc);
|
||||
if (frame == NULL) {
|
||||
avcodec_free_context(&decoder);
|
||||
avformat_close_input(&pFormatCtx);
|
||||
avformat_free_context(pFormatCtx);
|
||||
return;
|
||||
}
|
||||
|
||||
append_video_meta(pFormatCtx, frame, doc, audio_stream == -1, stream->nb_frames > 1);
|
||||
|
||||
// Scale frame
|
||||
AVFrame *scaled_frame = scale_frame(decoder, frame, ScanCtx.tn_size);
|
||||
|
||||
if (scaled_frame == NULL) {
|
||||
av_frame_free(&frame);
|
||||
avcodec_free_context(&decoder);
|
||||
avformat_close_input(&pFormatCtx);
|
||||
avformat_free_context(pFormatCtx);
|
||||
return;
|
||||
}
|
||||
|
||||
// Encode frame to jpeg
|
||||
AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height, ScanCtx.tn_qscale);
|
||||
avcodec_send_frame(jpeg_encoder, scaled_frame);
|
||||
|
||||
AVPacket jpeg_packet;
|
||||
av_init_packet(&jpeg_packet);
|
||||
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
|
||||
|
||||
// Save thumbnail
|
||||
store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) jpeg_packet.data,
|
||||
jpeg_packet.size);
|
||||
|
||||
av_packet_unref(&jpeg_packet);
|
||||
av_frame_free(&frame);
|
||||
av_free(*scaled_frame->data);
|
||||
av_frame_free(&scaled_frame);
|
||||
avcodec_free_context(&jpeg_encoder);
|
||||
avcodec_free_context(&decoder);
|
||||
}
|
||||
|
||||
avformat_close_input(&pFormatCtx);
|
||||
avformat_free_context(pFormatCtx);
|
||||
}
|
||||
|
||||
void parse_media_filename(const char *filepath, document_t *doc) {
|
||||
|
||||
AVFormatContext *pFormatCtx = avformat_alloc_context();
|
||||
if (pFormatCtx == NULL) {
|
||||
LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()")
|
||||
return;
|
||||
}
|
||||
int res = avformat_open_input(&pFormatCtx, filepath, NULL, NULL);
|
||||
if (res < 0) {
|
||||
LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res))
|
||||
avformat_close_input(&pFormatCtx);
|
||||
avformat_free_context(pFormatCtx);
|
||||
return;
|
||||
}
|
||||
|
||||
parse_media(pFormatCtx, doc);
|
||||
}
|
||||
|
||||
|
||||
int vfile_read(void *ptr, uint8_t *buf, int buf_size) {
|
||||
struct vfile *f = ptr;
|
||||
|
||||
int ret = f->read(f, buf, buf_size);
|
||||
|
||||
if (ret == 0) {
|
||||
return AVERROR_EOF;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
void parse_media_vfile(struct vfile *f, document_t *doc) {
|
||||
|
||||
AVFormatContext *pFormatCtx = avformat_alloc_context();
|
||||
if (pFormatCtx == NULL) {
|
||||
LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()")
|
||||
return;
|
||||
}
|
||||
|
||||
unsigned char *buffer = (unsigned char *) av_malloc(AVIO_BUF_SIZE);
|
||||
AVIOContext *io_ctx = avio_alloc_context(buffer, AVIO_BUF_SIZE, 0, f, vfile_read, NULL, NULL);
|
||||
|
||||
pFormatCtx->pb = io_ctx;
|
||||
pFormatCtx->flags |= AVFMT_FLAG_CUSTOM_IO;
|
||||
|
||||
int res = avformat_open_input(&pFormatCtx, "", NULL, NULL);
|
||||
if (res == -5) {
|
||||
// Tried to parse media that requires seek
|
||||
av_free(io_ctx->buffer);
|
||||
avio_context_free(&io_ctx);
|
||||
avformat_close_input(&pFormatCtx);
|
||||
avformat_free_context(pFormatCtx);
|
||||
return;
|
||||
} else if (res < 0) {
|
||||
LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res))
|
||||
av_free(io_ctx->buffer);
|
||||
avio_context_free(&io_ctx);
|
||||
avformat_close_input(&pFormatCtx);
|
||||
avformat_free_context(pFormatCtx);
|
||||
return;
|
||||
}
|
||||
|
||||
parse_media(pFormatCtx, doc);
|
||||
av_free(io_ctx->buffer);
|
||||
avio_context_free(&io_ctx);
|
||||
}
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
#ifndef SIST2_MEDIA_H
|
||||
#define SIST2_MEDIA_H
|
||||
|
||||
|
||||
#include "src/sist.h"
|
||||
|
||||
#define MIN_VIDEO_SIZE 1024 * 64
|
||||
#define MIN_IMAGE_SIZE 1024 * 2
|
||||
|
||||
void parse_media_filename(const char * filepath, document_t *doc);
|
||||
|
||||
void parse_media_vfile(struct vfile *f, document_t *doc);
|
||||
|
||||
#endif
|
||||
@@ -1,7 +1,7 @@
|
||||
#ifndef SIST2_MIME_H
|
||||
#define SIST2_MIME_H
|
||||
|
||||
#include "src/sist.h"
|
||||
#include "../sist.h"
|
||||
|
||||
#define MAJOR_MIME(mime_id) (mime_id & 0x0FFF0000) >> 16
|
||||
|
||||
@@ -25,6 +25,9 @@
|
||||
#define DOC_MASK 0x04000000
|
||||
#define IS_DOC(mime_id) (mime_id & DOC_MASK) == DOC_MASK
|
||||
|
||||
#define MOBI_MASK 0x02000000
|
||||
#define IS_MOBI(mime_id) (mime_id & MOBI_MASK) == MOBI_MASK
|
||||
|
||||
enum major_mime {
|
||||
MimeInvalid = 0,
|
||||
MimeModel = 1,
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// **Generated by mime.py**
|
||||
#ifndef MIME_GENERATED_C
|
||||
#define MIME_GENERATED_C
|
||||
#include <glib-2.0/glib.h>
|
||||
#include <glib.h>
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
@@ -53,386 +53,387 @@ enum mime {
|
||||
application_step=655405,
|
||||
application_streamingmedia=655406,
|
||||
application_vda=655407,
|
||||
application_vnd_fdf=655408,
|
||||
application_vnd_font_fontforge_sfd=655409,
|
||||
application_vnd_hp_hpgl=655410,
|
||||
application_vnd_iccprofile=655411,
|
||||
application_vnd_lotus_1_2_3=655412,
|
||||
application_vnd_ms_cab_compressed=655413,
|
||||
application_vnd_ms_excel=655414,
|
||||
application_vnd_ms_fontobject=655415,
|
||||
application_vnd_ms_opentype=655416 | 0x20000000,
|
||||
application_vnd_ms_pki_certstore=655417,
|
||||
application_vnd_ms_pki_pko=655418,
|
||||
application_vnd_ms_pki_seccat=655419,
|
||||
application_vnd_ms_powerpoint=655420,
|
||||
application_vnd_ms_project=655421,
|
||||
application_vnd_oasis_opendocument_base=655422,
|
||||
application_vnd_oasis_opendocument_formula=655423,
|
||||
application_vnd_oasis_opendocument_graphics=655424,
|
||||
application_vnd_oasis_opendocument_presentation=655425,
|
||||
application_vnd_oasis_opendocument_spreadsheet=655426,
|
||||
application_vnd_oasis_opendocument_text=655427,
|
||||
application_vnd_openxmlformats_officedocument_presentationml_presentation=655428 | 0x04000000,
|
||||
application_vnd_openxmlformats_officedocument_spreadsheetml_sheet=655429 | 0x04000000,
|
||||
application_vnd_openxmlformats_officedocument_wordprocessingml_document=655430 | 0x04000000,
|
||||
application_vnd_symbian_install=655431,
|
||||
application_vnd_tcpdump_pcap=655432,
|
||||
application_vnd_wap_wmlc=655433,
|
||||
application_vnd_wap_wmlscriptc=655434,
|
||||
application_vnd_xara=655435,
|
||||
application_vocaltec_media_desc=655436,
|
||||
application_vocaltec_media_file=655437,
|
||||
application_warc=655438,
|
||||
application_winhelp=655439,
|
||||
application_wordperfect=655440,
|
||||
application_wordperfect6_0=655441,
|
||||
application_wordperfect6_1=655442,
|
||||
application_x_123=655443,
|
||||
application_x_7z_compressed=655444 | 0x10000000,
|
||||
application_x_aim=655445,
|
||||
application_x_apple_diskimage=655446,
|
||||
application_x_arc=655447 | 0x10000000,
|
||||
application_x_archive=655448,
|
||||
application_x_atari_7800_rom=655449,
|
||||
application_x_authorware_bin=655450,
|
||||
application_x_authorware_map=655451,
|
||||
application_x_authorware_seg=655452,
|
||||
application_x_avira_qua=655453,
|
||||
application_x_bcpio=655454,
|
||||
application_x_bittorrent=655455,
|
||||
application_x_bsh=655456,
|
||||
application_x_bytecode_python=655457,
|
||||
application_x_bzip=655458,
|
||||
application_x_bzip2=655459 | 0x08000000,
|
||||
application_x_cbr=655460,
|
||||
application_x_cbz=655461 | 0x40000000,
|
||||
application_x_cdlink=655462,
|
||||
application_x_chat=655463,
|
||||
application_x_chrome_extension=655464,
|
||||
application_x_cocoa=655465,
|
||||
application_x_conference=655466,
|
||||
application_x_coredump=655467,
|
||||
application_x_cpio=655468,
|
||||
application_x_dbf=655469,
|
||||
application_x_dbt=655470,
|
||||
application_x_debian_package=655471,
|
||||
application_x_deepv=655472,
|
||||
application_x_director=655473,
|
||||
application_x_dmp=655474,
|
||||
application_x_dosdriver=655475,
|
||||
application_x_dosexec=655476,
|
||||
application_x_dvi=655477,
|
||||
application_x_elc=655478,
|
||||
application_vnd_amazon_mobi8_ebook=655408 | 0x02000000,
|
||||
application_vnd_fdf=655409,
|
||||
application_vnd_font_fontforge_sfd=655410,
|
||||
application_vnd_hp_hpgl=655411,
|
||||
application_vnd_iccprofile=655412,
|
||||
application_vnd_lotus_1_2_3=655413,
|
||||
application_vnd_ms_cab_compressed=655414,
|
||||
application_vnd_ms_excel=655415,
|
||||
application_vnd_ms_fontobject=655416,
|
||||
application_vnd_ms_opentype=655417 | 0x20000000,
|
||||
application_vnd_ms_pki_certstore=655418,
|
||||
application_vnd_ms_pki_pko=655419,
|
||||
application_vnd_ms_pki_seccat=655420,
|
||||
application_vnd_ms_powerpoint=655421,
|
||||
application_vnd_ms_project=655422,
|
||||
application_vnd_oasis_opendocument_base=655423,
|
||||
application_vnd_oasis_opendocument_formula=655424,
|
||||
application_vnd_oasis_opendocument_graphics=655425,
|
||||
application_vnd_oasis_opendocument_presentation=655426,
|
||||
application_vnd_oasis_opendocument_spreadsheet=655427,
|
||||
application_vnd_oasis_opendocument_text=655428,
|
||||
application_vnd_openxmlformats_officedocument_presentationml_presentation=655429 | 0x04000000,
|
||||
application_vnd_openxmlformats_officedocument_spreadsheetml_sheet=655430 | 0x04000000,
|
||||
application_vnd_openxmlformats_officedocument_wordprocessingml_document=655431 | 0x04000000,
|
||||
application_vnd_symbian_install=655432,
|
||||
application_vnd_tcpdump_pcap=655433,
|
||||
application_vnd_wap_wmlc=655434,
|
||||
application_vnd_wap_wmlscriptc=655435,
|
||||
application_vnd_xara=655436,
|
||||
application_vocaltec_media_desc=655437,
|
||||
application_vocaltec_media_file=655438,
|
||||
application_warc=655439,
|
||||
application_winhelp=655440,
|
||||
application_wordperfect=655441,
|
||||
application_wordperfect6_0=655442,
|
||||
application_wordperfect6_1=655443,
|
||||
application_x_123=655444,
|
||||
application_x_7z_compressed=655445 | 0x10000000,
|
||||
application_x_aim=655446,
|
||||
application_x_apple_diskimage=655447,
|
||||
application_x_arc=655448 | 0x10000000,
|
||||
application_x_archive=655449,
|
||||
application_x_atari_7800_rom=655450,
|
||||
application_x_authorware_bin=655451,
|
||||
application_x_authorware_map=655452,
|
||||
application_x_authorware_seg=655453,
|
||||
application_x_avira_qua=655454,
|
||||
application_x_bcpio=655455,
|
||||
application_x_bittorrent=655456,
|
||||
application_x_bsh=655457,
|
||||
application_x_bytecode_python=655458,
|
||||
application_x_bzip=655459,
|
||||
application_x_bzip2=655460 | 0x08000000,
|
||||
application_x_cbr=655461,
|
||||
application_x_cbz=655462 | 0x40000000,
|
||||
application_x_cdlink=655463,
|
||||
application_x_chat=655464,
|
||||
application_x_chrome_extension=655465,
|
||||
application_x_cocoa=655466,
|
||||
application_x_conference=655467,
|
||||
application_x_coredump=655468,
|
||||
application_x_cpio=655469,
|
||||
application_x_dbf=655470,
|
||||
application_x_dbt=655471,
|
||||
application_x_debian_package=655472,
|
||||
application_x_deepv=655473,
|
||||
application_x_director=655474,
|
||||
application_x_dmp=655475,
|
||||
application_x_dosdriver=655476,
|
||||
application_x_dosexec=655477,
|
||||
application_x_dvi=655478,
|
||||
application_x_elc=655479,
|
||||
application_x_empty=1,
|
||||
application_x_envoy=655480,
|
||||
application_x_esrehber=655481,
|
||||
application_x_excel=655482,
|
||||
application_x_executable=655483,
|
||||
application_x_font_gdos=655484,
|
||||
application_x_font_pf2=655485,
|
||||
application_x_font_pfm=655486,
|
||||
application_x_font_sfn=655487,
|
||||
application_x_font_ttf=655488 | 0x20000000,
|
||||
application_x_fptapplication_x_dbt=655489,
|
||||
application_x_freelance=655490,
|
||||
application_x_gamecube_rom=655491,
|
||||
application_x_gdbm=655492,
|
||||
application_x_gettext_translation=655493,
|
||||
application_x_git=655494,
|
||||
application_x_gsp=655495,
|
||||
application_x_gss=655496,
|
||||
application_x_gtar=655497,
|
||||
application_x_gzip=655498,
|
||||
application_x_hdf=655499,
|
||||
application_x_helpfile=655500,
|
||||
application_x_httpd_imap=655501,
|
||||
application_x_ima=655502,
|
||||
application_x_innosetup=655503,
|
||||
application_x_internett_signup=655504,
|
||||
application_x_inventor=655505,
|
||||
application_x_ip2=655506,
|
||||
application_x_java_applet=655507,
|
||||
application_x_java_commerce=655508,
|
||||
application_x_java_image=655509,
|
||||
application_x_java_jmod=655510,
|
||||
application_x_java_keystore=655511,
|
||||
application_x_kdelnk=655512,
|
||||
application_x_koan=655513,
|
||||
application_x_latex=655514,
|
||||
application_x_livescreen=655515,
|
||||
application_x_lotus=655516,
|
||||
application_x_lz4=655517 | 0x08000000,
|
||||
application_x_lz4_json=655518,
|
||||
application_x_lzh=655519,
|
||||
application_x_lzh_compressed=655520,
|
||||
application_x_lzip=655521 | 0x08000000,
|
||||
application_x_lzma=655522 | 0x08000000,
|
||||
application_x_lzop=655523 | 0x08000000,
|
||||
application_x_lzx=655524,
|
||||
application_x_mach_binary=655525,
|
||||
application_x_mach_executable=655526,
|
||||
application_x_magic_cap_package_1_0=655527,
|
||||
application_x_mathcad=655528,
|
||||
application_x_maxis_dbpf=655529,
|
||||
application_x_meme=655530,
|
||||
application_x_midi=655531,
|
||||
application_x_mif=655532,
|
||||
application_x_mix_transfer=655533,
|
||||
application_x_mobipocket_ebook=655534,
|
||||
application_x_ms_compress_szdd=655535,
|
||||
application_x_ms_pdb=655536,
|
||||
application_x_ms_reader=655537,
|
||||
application_x_msaccess=655538,
|
||||
application_x_n64_rom=655539,
|
||||
application_x_navi_animation=655540,
|
||||
application_x_navidoc=655541,
|
||||
application_x_navimap=655542,
|
||||
application_x_navistyle=655543,
|
||||
application_x_nes_rom=655544,
|
||||
application_x_netcdf=655545,
|
||||
application_x_newton_compatible_pkg=655546,
|
||||
application_x_nintendo_ds_rom=655547,
|
||||
application_x_object=655548,
|
||||
application_x_omc=655549,
|
||||
application_x_omcdatamaker=655550,
|
||||
application_x_omcregerator=655551,
|
||||
application_x_pagemaker=655552,
|
||||
application_x_pcl=655553,
|
||||
application_x_pgp_keyring=655554,
|
||||
application_x_pixclscript=655555,
|
||||
application_x_pkcs7_certreqresp=655556,
|
||||
application_x_pkcs7_signature=655557,
|
||||
application_x_project=655558,
|
||||
application_x_qpro=655559,
|
||||
application_x_rar=655560 | 0x10000000,
|
||||
application_x_rpm=655561,
|
||||
application_x_sdp=655562,
|
||||
application_x_sea=655563,
|
||||
application_x_seelogo=655564,
|
||||
application_x_setupscript=655565,
|
||||
application_x_shar=655566,
|
||||
application_x_sharedlib=655567,
|
||||
application_x_shockwave_flash=655568,
|
||||
application_x_snappy_framed=655569,
|
||||
application_x_sprite=655570,
|
||||
application_x_sqlite3=655571,
|
||||
application_x_stargallery_thm=655572,
|
||||
application_x_stuffit=655573,
|
||||
application_x_sv4cpio=655574,
|
||||
application_x_sv4crc=655575,
|
||||
application_x_tar=655576 | 0x10000000,
|
||||
application_x_tbook=655577,
|
||||
application_x_terminfo=655578,
|
||||
application_x_terminfo2=655579,
|
||||
application_x_tex_tfm=655580,
|
||||
application_x_texinfo=655581,
|
||||
application_x_ustar=655582,
|
||||
application_x_visio=655583,
|
||||
application_x_vnd_audioexplosion_mzz=655584,
|
||||
application_x_vnd_ls_xpix=655585,
|
||||
application_x_vrml=655586,
|
||||
application_x_wais_source=655587,
|
||||
application_x_wine_extension_ini=655588,
|
||||
application_x_wintalk=655589,
|
||||
application_x_world=655590,
|
||||
application_x_wri=655591,
|
||||
application_x_x509_ca_cert=655592,
|
||||
application_x_xz=655593 | 0x08000000,
|
||||
application_x_zip=655594,
|
||||
application_x_zstd=655595 | 0x08000000,
|
||||
application_xml=655596,
|
||||
application_zip=655597 | 0x10000000,
|
||||
application_zlib=655598,
|
||||
audio_basic=458991 | 0x80000000,
|
||||
audio_it=458992,
|
||||
audio_make=458993,
|
||||
audio_mid=458994,
|
||||
audio_midi=458995,
|
||||
audio_mp4=458996,
|
||||
audio_mpeg=458997,
|
||||
audio_ogg=458998,
|
||||
audio_s3m=458999,
|
||||
audio_tsp_audio=459000,
|
||||
audio_tsplayer=459001,
|
||||
audio_vnd_qcelp=459002,
|
||||
audio_voxware=459003,
|
||||
audio_x_aiff=459004,
|
||||
audio_x_flac=459005,
|
||||
audio_x_gsm=459006,
|
||||
audio_x_hx_aac_adts=459007,
|
||||
audio_x_jam=459008,
|
||||
audio_x_liveaudio=459009,
|
||||
audio_x_m4a=459010,
|
||||
audio_x_midi=459011,
|
||||
audio_x_mod=459012,
|
||||
audio_x_mp4a_latm=459013,
|
||||
audio_x_mpeg_3=459014,
|
||||
audio_x_mpequrl=459015,
|
||||
audio_x_nspaudio=459016,
|
||||
audio_x_pn_realaudio=459017,
|
||||
audio_x_psid=459018,
|
||||
audio_x_realaudio=459019,
|
||||
audio_x_s3m=459020,
|
||||
audio_x_twinvq=459021,
|
||||
audio_x_twinvq_plugin=459022,
|
||||
audio_x_voc=459023,
|
||||
audio_x_wav=459024,
|
||||
audio_x_xbox_executable=459025 | 0x80000000,
|
||||
audio_x_xbox360_executable=459026 | 0x80000000,
|
||||
audio_xm=459027,
|
||||
font_otf=327956 | 0x20000000,
|
||||
font_sfnt=327957 | 0x20000000,
|
||||
font_woff=327958 | 0x20000000,
|
||||
font_woff2=327959 | 0x20000000,
|
||||
image_bmp=524568,
|
||||
image_cmu_raster=524569,
|
||||
image_fif=524570,
|
||||
image_florian=524571,
|
||||
image_g3fax=524572,
|
||||
image_gif=524573,
|
||||
image_heic=524574,
|
||||
image_ief=524575,
|
||||
image_jpeg=524576,
|
||||
image_jutvision=524577,
|
||||
image_naplps=524578,
|
||||
image_pict=524579,
|
||||
image_png=524580,
|
||||
image_svg=524581 | 0x80000000,
|
||||
image_svg_xml=524582 | 0x80000000,
|
||||
image_tiff=524583,
|
||||
image_vnd_adobe_photoshop=524584 | 0x80000000,
|
||||
image_vnd_djvu=524585 | 0x80000000,
|
||||
image_vnd_fpx=524586,
|
||||
image_vnd_microsoft_icon=524587,
|
||||
image_vnd_rn_realflash=524588,
|
||||
image_vnd_rn_realpix=524589,
|
||||
image_vnd_wap_wbmp=524590,
|
||||
image_vnd_xiff=524591,
|
||||
image_webp=524592,
|
||||
image_wmf=524593,
|
||||
image_x_3ds=524594,
|
||||
image_x_award_bioslogo=524595,
|
||||
image_x_cmu_raster=524596,
|
||||
image_x_cur=524597,
|
||||
image_x_dwg=524598,
|
||||
image_x_eps=524599,
|
||||
image_x_exr=524600,
|
||||
image_x_gem=524601,
|
||||
image_x_icns=524602,
|
||||
image_x_icon=524603 | 0x80000000,
|
||||
image_x_jg=524604,
|
||||
image_x_jps=524605,
|
||||
image_x_ms_bmp=524606,
|
||||
image_x_niff=524607,
|
||||
image_x_pcx=524608,
|
||||
image_x_pict=524609,
|
||||
image_x_portable_bitmap=524610,
|
||||
image_x_portable_graymap=524611,
|
||||
image_x_portable_pixmap=524612,
|
||||
image_x_quicktime=524613,
|
||||
image_x_rgb=524614,
|
||||
image_x_tga=524615,
|
||||
image_x_tiff=524616,
|
||||
image_x_win_bitmap=524617,
|
||||
image_x_xcf=524618 | 0x80000000,
|
||||
image_x_xpixmap=524619 | 0x80000000,
|
||||
image_x_xwindowdump=524620,
|
||||
message_news=196941,
|
||||
message_rfc822=196942,
|
||||
model_vnd_dwf=65871,
|
||||
model_vnd_gdl=65872,
|
||||
model_vnd_gs_gdl=65873,
|
||||
model_vrml=65874,
|
||||
model_x_pov=65875,
|
||||
text_PGP=590164,
|
||||
text_asp=590165,
|
||||
text_css=590166,
|
||||
text_html=590167,
|
||||
text_javascript=590168,
|
||||
text_mcf=590169,
|
||||
text_pascal=590170,
|
||||
text_plain=590171,
|
||||
text_richtext=590172,
|
||||
text_rtf=590173,
|
||||
text_scriplet=590174,
|
||||
text_tab_separated_values=590175,
|
||||
text_troff=590176,
|
||||
text_uri_list=590177,
|
||||
text_vnd_abc=590178,
|
||||
text_vnd_fmi_flexstor=590179,
|
||||
text_vnd_wap_wml=590180,
|
||||
text_vnd_wap_wmlscript=590181,
|
||||
text_webviewhtml=590182,
|
||||
text_x_Algol68=590183,
|
||||
text_x_asm=590184,
|
||||
text_x_audiosoft_intra=590185,
|
||||
text_x_awk=590186,
|
||||
text_x_bcpl=590187,
|
||||
text_x_c=590188,
|
||||
text_x_c__=590189,
|
||||
text_x_component=590190,
|
||||
text_x_diff=590191,
|
||||
text_x_fortran=590192,
|
||||
text_x_java=590193,
|
||||
text_x_la_asf=590194,
|
||||
text_x_lisp=590195,
|
||||
text_x_m=590196,
|
||||
text_x_m4=590197,
|
||||
text_x_makefile=590198,
|
||||
text_x_ms_regedit=590199,
|
||||
text_x_msdos_batch=590200,
|
||||
text_x_objective_c=590201,
|
||||
text_x_pascal=590202,
|
||||
text_x_perl=590203,
|
||||
text_x_php=590204,
|
||||
text_x_po=590205,
|
||||
text_x_python=590206,
|
||||
text_x_ruby=590207,
|
||||
text_x_sass=590208,
|
||||
text_x_scss=590209,
|
||||
text_x_server_parsed_html=590210,
|
||||
text_x_setext=590211,
|
||||
text_x_sgml=590212,
|
||||
text_x_shellscript=590213,
|
||||
text_x_speech=590214,
|
||||
text_x_tcl=590215,
|
||||
text_x_tex=590216,
|
||||
text_x_uil=590217,
|
||||
text_x_uuencode=590218,
|
||||
text_x_vcalendar=590219,
|
||||
text_x_vcard=590220,
|
||||
text_xml=590221,
|
||||
video_MP2T=393614,
|
||||
video_animaflex=393615,
|
||||
video_avi=393616,
|
||||
video_avs_video=393617,
|
||||
video_mp4=393618,
|
||||
video_mpeg=393619,
|
||||
video_quicktime=393620,
|
||||
video_vdo=393621,
|
||||
video_vivo=393622,
|
||||
video_vnd_rn_realvideo=393623,
|
||||
video_vosaic=393624,
|
||||
video_webm=393625,
|
||||
video_x_amt_demorun=393626,
|
||||
video_x_amt_showrun=393627,
|
||||
video_x_atomic3d_feature=393628,
|
||||
video_x_dl=393629,
|
||||
video_x_dv=393630,
|
||||
video_x_fli=393631,
|
||||
video_x_flv=393632,
|
||||
video_x_isvideo=393633,
|
||||
video_x_jng=393634 | 0x80000000,
|
||||
video_x_m4v=393635,
|
||||
video_x_matroska=393636,
|
||||
video_x_mng=393637,
|
||||
video_x_motion_jpeg=393638,
|
||||
video_x_ms_asf=393639,
|
||||
video_x_msvideo=393640,
|
||||
video_x_qtc=393641,
|
||||
video_x_sgi_movie=393642,
|
||||
x_epoc_x_sisx_app=721323,
|
||||
application_x_envoy=655481,
|
||||
application_x_esrehber=655482,
|
||||
application_x_excel=655483,
|
||||
application_x_executable=655484,
|
||||
application_x_font_gdos=655485,
|
||||
application_x_font_pf2=655486,
|
||||
application_x_font_pfm=655487,
|
||||
application_x_font_sfn=655488,
|
||||
application_x_font_ttf=655489 | 0x20000000,
|
||||
application_x_fptapplication_x_dbt=655490,
|
||||
application_x_freelance=655491,
|
||||
application_x_gamecube_rom=655492,
|
||||
application_x_gdbm=655493,
|
||||
application_x_gettext_translation=655494,
|
||||
application_x_git=655495,
|
||||
application_x_gsp=655496,
|
||||
application_x_gss=655497,
|
||||
application_x_gtar=655498,
|
||||
application_x_gzip=655499,
|
||||
application_x_hdf=655500,
|
||||
application_x_helpfile=655501,
|
||||
application_x_httpd_imap=655502,
|
||||
application_x_ima=655503,
|
||||
application_x_innosetup=655504,
|
||||
application_x_internett_signup=655505,
|
||||
application_x_inventor=655506,
|
||||
application_x_ip2=655507,
|
||||
application_x_java_applet=655508,
|
||||
application_x_java_commerce=655509,
|
||||
application_x_java_image=655510,
|
||||
application_x_java_jmod=655511,
|
||||
application_x_java_keystore=655512,
|
||||
application_x_kdelnk=655513,
|
||||
application_x_koan=655514,
|
||||
application_x_latex=655515,
|
||||
application_x_livescreen=655516,
|
||||
application_x_lotus=655517,
|
||||
application_x_lz4=655518 | 0x08000000,
|
||||
application_x_lz4_json=655519,
|
||||
application_x_lzh=655520,
|
||||
application_x_lzh_compressed=655521,
|
||||
application_x_lzip=655522 | 0x08000000,
|
||||
application_x_lzma=655523 | 0x08000000,
|
||||
application_x_lzop=655524 | 0x08000000,
|
||||
application_x_lzx=655525,
|
||||
application_x_mach_binary=655526,
|
||||
application_x_mach_executable=655527,
|
||||
application_x_magic_cap_package_1_0=655528,
|
||||
application_x_mathcad=655529,
|
||||
application_x_maxis_dbpf=655530,
|
||||
application_x_meme=655531,
|
||||
application_x_midi=655532,
|
||||
application_x_mif=655533,
|
||||
application_x_mix_transfer=655534,
|
||||
application_x_mobipocket_ebook=655535 | 0x02000000,
|
||||
application_x_ms_compress_szdd=655536,
|
||||
application_x_ms_pdb=655537,
|
||||
application_x_ms_reader=655538,
|
||||
application_x_msaccess=655539,
|
||||
application_x_n64_rom=655540,
|
||||
application_x_navi_animation=655541,
|
||||
application_x_navidoc=655542,
|
||||
application_x_navimap=655543,
|
||||
application_x_navistyle=655544,
|
||||
application_x_nes_rom=655545,
|
||||
application_x_netcdf=655546,
|
||||
application_x_newton_compatible_pkg=655547,
|
||||
application_x_nintendo_ds_rom=655548,
|
||||
application_x_object=655549,
|
||||
application_x_omc=655550,
|
||||
application_x_omcdatamaker=655551,
|
||||
application_x_omcregerator=655552,
|
||||
application_x_pagemaker=655553,
|
||||
application_x_pcl=655554,
|
||||
application_x_pgp_keyring=655555,
|
||||
application_x_pixclscript=655556,
|
||||
application_x_pkcs7_certreqresp=655557,
|
||||
application_x_pkcs7_signature=655558,
|
||||
application_x_project=655559,
|
||||
application_x_qpro=655560,
|
||||
application_x_rar=655561 | 0x10000000,
|
||||
application_x_rpm=655562,
|
||||
application_x_sdp=655563,
|
||||
application_x_sea=655564,
|
||||
application_x_seelogo=655565,
|
||||
application_x_setupscript=655566,
|
||||
application_x_shar=655567,
|
||||
application_x_sharedlib=655568,
|
||||
application_x_shockwave_flash=655569,
|
||||
application_x_snappy_framed=655570,
|
||||
application_x_sprite=655571,
|
||||
application_x_sqlite3=655572,
|
||||
application_x_stargallery_thm=655573,
|
||||
application_x_stuffit=655574,
|
||||
application_x_sv4cpio=655575,
|
||||
application_x_sv4crc=655576,
|
||||
application_x_tar=655577 | 0x10000000,
|
||||
application_x_tbook=655578,
|
||||
application_x_terminfo=655579,
|
||||
application_x_terminfo2=655580,
|
||||
application_x_tex_tfm=655581,
|
||||
application_x_texinfo=655582,
|
||||
application_x_ustar=655583,
|
||||
application_x_visio=655584,
|
||||
application_x_vnd_audioexplosion_mzz=655585,
|
||||
application_x_vnd_ls_xpix=655586,
|
||||
application_x_vrml=655587,
|
||||
application_x_wais_source=655588,
|
||||
application_x_wine_extension_ini=655589,
|
||||
application_x_wintalk=655590,
|
||||
application_x_world=655591,
|
||||
application_x_wri=655592,
|
||||
application_x_x509_ca_cert=655593,
|
||||
application_x_xz=655594 | 0x08000000,
|
||||
application_x_zip=655595,
|
||||
application_x_zstd=655596 | 0x08000000,
|
||||
application_xml=655597,
|
||||
application_zip=655598 | 0x10000000,
|
||||
application_zlib=655599,
|
||||
audio_basic=458992 | 0x80000000,
|
||||
audio_it=458993,
|
||||
audio_make=458994,
|
||||
audio_mid=458995,
|
||||
audio_midi=458996,
|
||||
audio_mp4=458997,
|
||||
audio_mpeg=458998,
|
||||
audio_ogg=458999,
|
||||
audio_s3m=459000,
|
||||
audio_tsp_audio=459001,
|
||||
audio_tsplayer=459002,
|
||||
audio_vnd_qcelp=459003,
|
||||
audio_voxware=459004,
|
||||
audio_x_aiff=459005,
|
||||
audio_x_flac=459006,
|
||||
audio_x_gsm=459007,
|
||||
audio_x_hx_aac_adts=459008,
|
||||
audio_x_jam=459009,
|
||||
audio_x_liveaudio=459010,
|
||||
audio_x_m4a=459011,
|
||||
audio_x_midi=459012,
|
||||
audio_x_mod=459013,
|
||||
audio_x_mp4a_latm=459014,
|
||||
audio_x_mpeg_3=459015,
|
||||
audio_x_mpequrl=459016,
|
||||
audio_x_nspaudio=459017,
|
||||
audio_x_pn_realaudio=459018,
|
||||
audio_x_psid=459019,
|
||||
audio_x_realaudio=459020,
|
||||
audio_x_s3m=459021,
|
||||
audio_x_twinvq=459022,
|
||||
audio_x_twinvq_plugin=459023,
|
||||
audio_x_voc=459024,
|
||||
audio_x_wav=459025,
|
||||
audio_x_xbox_executable=459026 | 0x80000000,
|
||||
audio_x_xbox360_executable=459027 | 0x80000000,
|
||||
audio_xm=459028,
|
||||
font_otf=327957 | 0x20000000,
|
||||
font_sfnt=327958 | 0x20000000,
|
||||
font_woff=327959 | 0x20000000,
|
||||
font_woff2=327960 | 0x20000000,
|
||||
image_bmp=524569,
|
||||
image_cmu_raster=524570,
|
||||
image_fif=524571,
|
||||
image_florian=524572,
|
||||
image_g3fax=524573,
|
||||
image_gif=524574,
|
||||
image_heic=524575,
|
||||
image_ief=524576,
|
||||
image_jpeg=524577,
|
||||
image_jutvision=524578,
|
||||
image_naplps=524579,
|
||||
image_pict=524580,
|
||||
image_png=524581,
|
||||
image_svg=524582 | 0x80000000,
|
||||
image_svg_xml=524583 | 0x80000000,
|
||||
image_tiff=524584,
|
||||
image_vnd_adobe_photoshop=524585 | 0x80000000,
|
||||
image_vnd_djvu=524586 | 0x80000000,
|
||||
image_vnd_fpx=524587,
|
||||
image_vnd_microsoft_icon=524588,
|
||||
image_vnd_rn_realflash=524589,
|
||||
image_vnd_rn_realpix=524590,
|
||||
image_vnd_wap_wbmp=524591,
|
||||
image_vnd_xiff=524592,
|
||||
image_webp=524593,
|
||||
image_wmf=524594,
|
||||
image_x_3ds=524595,
|
||||
image_x_award_bioslogo=524596,
|
||||
image_x_cmu_raster=524597,
|
||||
image_x_cur=524598,
|
||||
image_x_dwg=524599,
|
||||
image_x_eps=524600,
|
||||
image_x_exr=524601,
|
||||
image_x_gem=524602,
|
||||
image_x_icns=524603,
|
||||
image_x_icon=524604 | 0x80000000,
|
||||
image_x_jg=524605,
|
||||
image_x_jps=524606,
|
||||
image_x_ms_bmp=524607,
|
||||
image_x_niff=524608,
|
||||
image_x_pcx=524609,
|
||||
image_x_pict=524610,
|
||||
image_x_portable_bitmap=524611,
|
||||
image_x_portable_graymap=524612,
|
||||
image_x_portable_pixmap=524613,
|
||||
image_x_quicktime=524614,
|
||||
image_x_rgb=524615,
|
||||
image_x_tga=524616,
|
||||
image_x_tiff=524617,
|
||||
image_x_win_bitmap=524618,
|
||||
image_x_xcf=524619 | 0x80000000,
|
||||
image_x_xpixmap=524620 | 0x80000000,
|
||||
image_x_xwindowdump=524621,
|
||||
message_news=196942,
|
||||
message_rfc822=196943,
|
||||
model_vnd_dwf=65872,
|
||||
model_vnd_gdl=65873,
|
||||
model_vnd_gs_gdl=65874,
|
||||
model_vrml=65875,
|
||||
model_x_pov=65876,
|
||||
text_PGP=590165,
|
||||
text_asp=590166,
|
||||
text_css=590167,
|
||||
text_html=590168,
|
||||
text_javascript=590169,
|
||||
text_mcf=590170,
|
||||
text_pascal=590171,
|
||||
text_plain=590172,
|
||||
text_richtext=590173,
|
||||
text_rtf=590174,
|
||||
text_scriplet=590175,
|
||||
text_tab_separated_values=590176,
|
||||
text_troff=590177,
|
||||
text_uri_list=590178,
|
||||
text_vnd_abc=590179,
|
||||
text_vnd_fmi_flexstor=590180,
|
||||
text_vnd_wap_wml=590181,
|
||||
text_vnd_wap_wmlscript=590182,
|
||||
text_webviewhtml=590183,
|
||||
text_x_Algol68=590184,
|
||||
text_x_asm=590185,
|
||||
text_x_audiosoft_intra=590186,
|
||||
text_x_awk=590187,
|
||||
text_x_bcpl=590188,
|
||||
text_x_c=590189,
|
||||
text_x_c__=590190,
|
||||
text_x_component=590191,
|
||||
text_x_diff=590192,
|
||||
text_x_fortran=590193,
|
||||
text_x_java=590194,
|
||||
text_x_la_asf=590195,
|
||||
text_x_lisp=590196,
|
||||
text_x_m=590197,
|
||||
text_x_m4=590198,
|
||||
text_x_makefile=590199,
|
||||
text_x_ms_regedit=590200,
|
||||
text_x_msdos_batch=590201,
|
||||
text_x_objective_c=590202,
|
||||
text_x_pascal=590203,
|
||||
text_x_perl=590204,
|
||||
text_x_php=590205,
|
||||
text_x_po=590206,
|
||||
text_x_python=590207,
|
||||
text_x_ruby=590208,
|
||||
text_x_sass=590209,
|
||||
text_x_scss=590210,
|
||||
text_x_server_parsed_html=590211,
|
||||
text_x_setext=590212,
|
||||
text_x_sgml=590213,
|
||||
text_x_shellscript=590214,
|
||||
text_x_speech=590215,
|
||||
text_x_tcl=590216,
|
||||
text_x_tex=590217,
|
||||
text_x_uil=590218,
|
||||
text_x_uuencode=590219,
|
||||
text_x_vcalendar=590220,
|
||||
text_x_vcard=590221,
|
||||
text_xml=590222,
|
||||
video_MP2T=393615,
|
||||
video_animaflex=393616,
|
||||
video_avi=393617,
|
||||
video_avs_video=393618,
|
||||
video_mp4=393619,
|
||||
video_mpeg=393620,
|
||||
video_quicktime=393621,
|
||||
video_vdo=393622,
|
||||
video_vivo=393623,
|
||||
video_vnd_rn_realvideo=393624,
|
||||
video_vosaic=393625,
|
||||
video_webm=393626,
|
||||
video_x_amt_demorun=393627,
|
||||
video_x_amt_showrun=393628,
|
||||
video_x_atomic3d_feature=393629,
|
||||
video_x_dl=393630,
|
||||
video_x_dv=393631,
|
||||
video_x_fli=393632,
|
||||
video_x_flv=393633,
|
||||
video_x_isvideo=393634,
|
||||
video_x_jng=393635 | 0x80000000,
|
||||
video_x_m4v=393636,
|
||||
video_x_matroska=393637,
|
||||
video_x_mng=393638,
|
||||
video_x_motion_jpeg=393639,
|
||||
video_x_ms_asf=393640,
|
||||
video_x_msvideo=393641,
|
||||
video_x_qtc=393642,
|
||||
video_x_sgi_movie=393643,
|
||||
x_epoc_x_sisx_app=721324,
|
||||
};
|
||||
char *mime_get_mime_text(unsigned int mime_id) {switch (mime_id) {
|
||||
case application_arj: return "application/arj";
|
||||
@@ -610,6 +611,7 @@ case application_x_mif: return "application/x-mif";
|
||||
case application_x_mix_transfer: return "application/x-mix-transfer";
|
||||
case application_xml: return "application/xml";
|
||||
case application_x_mobipocket_ebook: return "application/x-mobipocket-ebook";
|
||||
case application_vnd_amazon_mobi8_ebook: return "application/vnd.amazon.mobi8-ebook";
|
||||
case application_x_msaccess: return "application/x-msaccess";
|
||||
case application_x_ms_compress_szdd: return "application/x-ms-compress-szdd";
|
||||
case application_x_ms_pdb: return "application/x-ms-pdb";
|
||||
@@ -1052,6 +1054,8 @@ g_hash_table_insert(ext_table, "mif", (gpointer)application_x_mif);
|
||||
g_hash_table_insert(ext_table, "nix", (gpointer)application_x_mix_transfer);
|
||||
g_hash_table_insert(ext_table, "opf", (gpointer)application_xml);
|
||||
g_hash_table_insert(ext_table, "mobi", (gpointer)application_x_mobipocket_ebook);
|
||||
g_hash_table_insert(ext_table, "azw", (gpointer)application_vnd_amazon_mobi8_ebook);
|
||||
g_hash_table_insert(ext_table, "azw3", (gpointer)application_vnd_amazon_mobi8_ebook);
|
||||
g_hash_table_insert(ext_table, "accdb", (gpointer)application_x_msaccess);
|
||||
g_hash_table_insert(ext_table, "fon", (gpointer)application_x_ms_compress_szdd);
|
||||
g_hash_table_insert(ext_table, "pdb", (gpointer)application_x_ms_pdb);
|
||||
@@ -1556,6 +1560,7 @@ g_hash_table_insert(mime_table, "application/x-mif", (gpointer)application_x_mif
|
||||
g_hash_table_insert(mime_table, "application/x-mix-transfer", (gpointer)application_x_mix_transfer);
|
||||
g_hash_table_insert(mime_table, "application/xml", (gpointer)application_xml);
|
||||
g_hash_table_insert(mime_table, "application/x-mobipocket-ebook", (gpointer)application_x_mobipocket_ebook);
|
||||
g_hash_table_insert(mime_table, "application/vnd.amazon.mobi8-ebook", (gpointer)application_vnd_amazon_mobi8_ebook);
|
||||
g_hash_table_insert(mime_table, "application/x-msaccess", (gpointer)application_x_msaccess);
|
||||
g_hash_table_insert(mime_table, "application/x-ms-compress-szdd", (gpointer)application_x_ms_compress_szdd);
|
||||
g_hash_table_insert(mime_table, "application/x-ms-pdb", (gpointer)application_x_ms_pdb);
|
||||
|
||||
@@ -1,7 +1,15 @@
|
||||
#include "parse.h"
|
||||
|
||||
#include "src/sist.h"
|
||||
#include "src/ctx.h"
|
||||
#include "mime.h"
|
||||
#include "src/io/serialize.h"
|
||||
|
||||
__thread magic_t Magic = NULL;
|
||||
#include <magic.h>
|
||||
|
||||
|
||||
#define MIN_VIDEO_SIZE 1024 * 64
|
||||
#define MIN_IMAGE_SIZE 1024 * 2
|
||||
|
||||
int fs_read(struct vfile *f, void *buf, size_t size) {
|
||||
|
||||
@@ -24,31 +32,10 @@ void fs_close(struct vfile *f) {
|
||||
}
|
||||
}
|
||||
|
||||
void *read_all(parse_job_t *job, const char *buf, int bytes_read) {
|
||||
|
||||
void *full_buf;
|
||||
|
||||
if (job->info.st_size <= bytes_read) {
|
||||
full_buf = malloc(job->info.st_size);
|
||||
memcpy(full_buf, buf, job->info.st_size);
|
||||
} else {
|
||||
full_buf = malloc(job->info.st_size);
|
||||
memcpy(full_buf, buf, bytes_read);
|
||||
|
||||
int ret = job->vfile.read(&job->vfile, full_buf + bytes_read, job->info.st_size - bytes_read);
|
||||
if (ret < 0) {
|
||||
free(full_buf);
|
||||
|
||||
if (job->vfile.is_fs_file) {
|
||||
LOG_ERRORF(job->filepath, "read(): [%d] %s", errno, strerror(errno))
|
||||
} else {
|
||||
LOG_ERRORF(job->filepath, "(virtual) read(): [%d] %s", ret, archive_error_string(job->vfile.arc))
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
void fs_reset(struct vfile *f) {
|
||||
if (f->fd != -1) {
|
||||
lseek(f->fd, 0, SEEK_SET);
|
||||
}
|
||||
|
||||
return full_buf;
|
||||
}
|
||||
|
||||
void parse(void *arg) {
|
||||
@@ -56,16 +43,12 @@ void parse(void *arg) {
|
||||
parse_job_t *job = arg;
|
||||
document_t doc;
|
||||
|
||||
int inc_ts = incremental_get(ScanCtx.original_table, job->info.st_ino);
|
||||
if (inc_ts != 0 && inc_ts == job->info.st_mtim.tv_sec) {
|
||||
incremental_mark_file_for_copy(ScanCtx.copy_table, job->info.st_ino);
|
||||
int inc_ts = incremental_get(ScanCtx.original_table, job->vfile.info.st_ino);
|
||||
if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) {
|
||||
incremental_mark_file_for_copy(ScanCtx.copy_table, job->vfile.info.st_ino);
|
||||
return;
|
||||
}
|
||||
|
||||
if (Magic == NULL) {
|
||||
Magic = magic_open(MAGIC_MIME_TYPE);
|
||||
magic_load(Magic, NULL);
|
||||
}
|
||||
|
||||
doc.filepath = job->filepath;
|
||||
doc.ext = (short) job->ext;
|
||||
@@ -73,9 +56,9 @@ void parse(void *arg) {
|
||||
doc.meta_head = NULL;
|
||||
doc.meta_tail = NULL;
|
||||
doc.mime = 0;
|
||||
doc.size = job->info.st_size;
|
||||
doc.ino = job->info.st_ino;
|
||||
doc.mtime = job->info.st_mtim.tv_sec;
|
||||
doc.size = job->vfile.info.st_size;
|
||||
doc.ino = job->vfile.info.st_ino;
|
||||
doc.mtime = job->vfile.info.st_mtim.tv_sec;
|
||||
|
||||
uuid_generate(doc.uuid);
|
||||
char *buf[PARSE_BUF_SIZE];
|
||||
@@ -86,7 +69,7 @@ void parse(void *arg) {
|
||||
LOG_DEBUGF(job->filepath, "Starting parse job {%s}", uuid_str)
|
||||
}
|
||||
|
||||
if (job->info.st_size == 0) {
|
||||
if (job->vfile.info.st_size == 0) {
|
||||
doc.mime = MIME_EMPTY;
|
||||
} else if (*(job->filepath + job->ext) != '\0' && (job->ext - job->base != 1)) {
|
||||
doc.mime = mime_get_mime_by_ext(ScanCtx.ext_table, job->filepath + job->ext);
|
||||
@@ -109,7 +92,10 @@ void parse(void *arg) {
|
||||
return;
|
||||
}
|
||||
|
||||
const char *magic_mime_str = magic_buffer(Magic, buf, bytes_read);
|
||||
magic_t magic = magic_open(MAGIC_MIME_TYPE);
|
||||
magic_load(magic, NULL);
|
||||
|
||||
const char *magic_mime_str = magic_buffer(magic, buf, bytes_read);
|
||||
if (magic_mime_str != NULL) {
|
||||
doc.mime = mime_get_mime_by_string(ScanCtx.mime_table, magic_mime_str);
|
||||
|
||||
@@ -120,8 +106,9 @@ void parse(void *arg) {
|
||||
}
|
||||
}
|
||||
|
||||
magic_close(Magic);
|
||||
Magic = NULL;
|
||||
job->vfile.reset(&job->vfile);
|
||||
|
||||
magic_close(magic);
|
||||
}
|
||||
|
||||
int mmime = MAJOR_MIME(doc.mime);
|
||||
@@ -131,50 +118,30 @@ void parse(void *arg) {
|
||||
} else if ((mmime == MimeVideo && doc.size >= MIN_VIDEO_SIZE) ||
|
||||
(mmime == MimeImage && doc.size >= MIN_IMAGE_SIZE) || mmime == MimeAudio) {
|
||||
|
||||
if (job->vfile.is_fs_file) {
|
||||
parse_media_filename(job->filepath, &doc);
|
||||
} else {
|
||||
parse_media_vfile(&job->vfile, &doc);
|
||||
}
|
||||
parse_media(&ScanCtx.media_ctx, &job->vfile, &doc);
|
||||
|
||||
} else if (IS_PDF(doc.mime)) {
|
||||
void *pdf_buf = read_all(job, (char *) buf, bytes_read);
|
||||
parse_pdf(pdf_buf, doc.size, &doc);
|
||||
parse_ebook(&ScanCtx.ebook_ctx, &job->vfile, mime_get_mime_text(doc.mime), &doc);
|
||||
|
||||
if (pdf_buf != buf && pdf_buf != NULL) {
|
||||
free(pdf_buf);
|
||||
}
|
||||
|
||||
} else if (mmime == MimeText && ScanCtx.content_size > 0) {
|
||||
parse_text(bytes_read, &job->vfile, (char *) buf, &doc);
|
||||
} else if (mmime == MimeText && ScanCtx.text_ctx.content_size > 0) {
|
||||
parse_text(&ScanCtx.text_ctx, &job->vfile, &doc);
|
||||
|
||||
} else if (IS_FONT(doc.mime)) {
|
||||
void *font_buf = read_all(job, (char *) buf, bytes_read);
|
||||
parse_font(font_buf, doc.size, &doc);
|
||||
parse_font(&ScanCtx.font_ctx, &job->vfile, &doc);
|
||||
|
||||
if (font_buf != buf && font_buf != NULL) {
|
||||
free(font_buf);
|
||||
}
|
||||
} else if (
|
||||
ScanCtx.archive_mode != ARC_MODE_SKIP && (
|
||||
ScanCtx.arc_ctx.mode != ARC_MODE_SKIP && (
|
||||
IS_ARC(doc.mime) ||
|
||||
(IS_ARC_FILTER(doc.mime) && should_parse_filtered_file(doc.filepath, doc.ext))
|
||||
)) {
|
||||
parse_archive(&job->vfile, &doc);
|
||||
} else if (ScanCtx.content_size > 0 && IS_DOC(doc.mime)) {
|
||||
void *doc_buf = read_all(job, (char *) buf, bytes_read);
|
||||
parse_doc(doc_buf, doc.size, &doc);
|
||||
parse_archive(&ScanCtx.arc_ctx, &job->vfile, &doc);
|
||||
} else if (ScanCtx.ooxml_ctx.content_size > 0 && IS_DOC(doc.mime)) {
|
||||
parse_ooxml(&ScanCtx.ooxml_ctx, &job->vfile, &doc);
|
||||
|
||||
if (doc_buf != buf && doc_buf != NULL) {
|
||||
free(doc_buf);
|
||||
}
|
||||
} else if (is_cbr(doc.mime)) {
|
||||
void *cbr_buf = read_all(job, (char *) buf, bytes_read);
|
||||
parse_cbr(cbr_buf, doc.size, &doc);
|
||||
|
||||
if (cbr_buf != buf && cbr_buf != NULL) {
|
||||
free(cbr_buf);
|
||||
}
|
||||
} else if (is_cbr(&ScanCtx.cbr_ctx, doc.mime)) {
|
||||
parse_cbr(&ScanCtx.cbr_ctx, &job->vfile, &doc);
|
||||
} else if (IS_MOBI(doc.mime)) {
|
||||
parse_mobi(&ScanCtx.mobi_ctx, &job->vfile, &doc);
|
||||
}
|
||||
|
||||
//Parent meta
|
||||
@@ -184,7 +151,7 @@ void parse(void *arg) {
|
||||
|
||||
meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + UUID_STR_LEN + 1);
|
||||
meta_parent->key = MetaParent;
|
||||
strcpy(meta_parent->strval, tmp);
|
||||
strcpy(meta_parent->str_val, tmp);
|
||||
APPEND_META((&doc), meta_parent)
|
||||
}
|
||||
|
||||
@@ -194,7 +161,5 @@ void parse(void *arg) {
|
||||
}
|
||||
|
||||
void cleanup_parse() {
|
||||
if (Magic != NULL) {
|
||||
magic_close(Magic);
|
||||
}
|
||||
// noop
|
||||
}
|
||||
|
||||
@@ -1,12 +1,13 @@
|
||||
#ifndef SIST2_PARSE_H
|
||||
#define SIST2_PARSE_H
|
||||
|
||||
#include "src/sist.h"
|
||||
#include "../sist.h"
|
||||
|
||||
#define PARSE_BUF_SIZE 4096
|
||||
|
||||
int fs_read(struct vfile *f, void *buf, size_t size);
|
||||
void fs_close(struct vfile *f);
|
||||
void fs_reset(struct vfile *f);
|
||||
|
||||
void parse(void *arg);
|
||||
|
||||
|
||||
@@ -1,331 +0,0 @@
|
||||
#include "pdf.h"
|
||||
#include "src/ctx.h"
|
||||
|
||||
#define MIN_OCR_SIZE 350
|
||||
#define MIN_OCR_LEN 10
|
||||
__thread text_buffer_t thread_buffer;
|
||||
|
||||
|
||||
int render_cover(fz_context *ctx, document_t *doc, fz_document *fzdoc) {
|
||||
|
||||
int err = 0;
|
||||
fz_page *cover = NULL;
|
||||
|
||||
fz_var(cover);
|
||||
fz_var(err);
|
||||
fz_try(ctx)
|
||||
cover = fz_load_page(ctx, fzdoc, 0);
|
||||
fz_catch(ctx)
|
||||
err = 1;
|
||||
|
||||
if (err != 0) {
|
||||
fz_drop_page(ctx, cover);
|
||||
LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, ctx->error.message)
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
fz_rect bounds = fz_bound_page(ctx, cover);
|
||||
|
||||
float scale;
|
||||
float w = (float) bounds.x1 - bounds.x0;
|
||||
float h = (float) bounds.y1 - bounds.y0;
|
||||
if (w > h) {
|
||||
scale = (float) ScanCtx.tn_size / w;
|
||||
} else {
|
||||
scale = (float) ScanCtx.tn_size / h;
|
||||
}
|
||||
fz_matrix m = fz_scale(scale, scale);
|
||||
|
||||
bounds = fz_transform_rect(bounds, m);
|
||||
fz_irect bbox = fz_round_rect(bounds);
|
||||
fz_pixmap *pixmap = fz_new_pixmap_with_bbox(ctx, ctx->colorspace->rgb, bbox, NULL, 0);
|
||||
|
||||
fz_clear_pixmap_with_value(ctx, pixmap, 0xFF);
|
||||
fz_device *dev = fz_new_draw_device(ctx, m, pixmap);
|
||||
|
||||
fz_var(err);
|
||||
fz_try(ctx)
|
||||
{
|
||||
pthread_mutex_lock(&ScanCtx.mupdf_mu);
|
||||
fz_run_page(ctx, cover, dev, fz_identity, NULL);
|
||||
}
|
||||
fz_always(ctx)
|
||||
{
|
||||
fz_close_device(ctx, dev);
|
||||
fz_drop_device(ctx, dev);
|
||||
pthread_mutex_unlock(&ScanCtx.mupdf_mu);
|
||||
}
|
||||
fz_catch(ctx)
|
||||
err = ctx->error.errcode;
|
||||
|
||||
if (err != 0) {
|
||||
LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, ctx->error.message)
|
||||
fz_drop_page(ctx, cover);
|
||||
fz_drop_pixmap(ctx, pixmap);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
fz_buffer *fzbuf = NULL;
|
||||
fz_var(fzbuf);
|
||||
fz_var(err);
|
||||
|
||||
fz_try(ctx)
|
||||
fzbuf = fz_new_buffer_from_pixmap_as_png(ctx, pixmap, fz_default_color_params);
|
||||
fz_catch(ctx)
|
||||
err = ctx->error.errcode;
|
||||
|
||||
if (err == 0) {
|
||||
unsigned char *tn_buf;
|
||||
size_t tn_len = fz_buffer_storage(ctx, fzbuf, &tn_buf);
|
||||
store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) tn_buf, tn_len);
|
||||
}
|
||||
|
||||
fz_drop_buffer(ctx, fzbuf);
|
||||
fz_drop_pixmap(ctx, pixmap);
|
||||
fz_drop_page(ctx, cover);
|
||||
|
||||
if (err != 0) {
|
||||
LOG_WARNINGF(doc->filepath, "fz_new_buffer_from_pixmap_as_png() returned error code [%d] %s", err,
|
||||
ctx->error.message)
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
void fz_err_callback(void *user, UNUSED(const char *message)) {
|
||||
if (LogCtx.verbose) {
|
||||
document_t *doc = (document_t *) user;
|
||||
LOG_WARNINGF(doc->filepath, "FZ: %s", message)
|
||||
}
|
||||
}
|
||||
|
||||
__always_inline
|
||||
static void init_ctx(fz_context *ctx, document_t *doc) {
|
||||
fz_disable_icc(ctx);
|
||||
fz_register_document_handlers(ctx);
|
||||
|
||||
ctx->warn.print_user = doc;
|
||||
ctx->warn.print = fz_err_callback;
|
||||
ctx->error.print_user = doc;
|
||||
ctx->error.print = fz_err_callback;
|
||||
}
|
||||
|
||||
__always_inline
|
||||
static int read_stext_block(fz_stext_block *block, text_buffer_t *tex) {
|
||||
if (block->type != FZ_STEXT_BLOCK_TEXT) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
fz_stext_line *line = block->u.t.first_line;
|
||||
while (line != NULL) {
|
||||
fz_stext_char *c = line->first_char;
|
||||
while (c != NULL) {
|
||||
if (text_buffer_append_char(tex, c->c) == TEXT_BUF_FULL) {
|
||||
return TEXT_BUF_FULL;
|
||||
}
|
||||
c = c->next;
|
||||
}
|
||||
line = line->next;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define IS_VALID_BPP(d) (d==1 || d==2 || d==4 || d==8 || d==16 || d==24 || d==32)
|
||||
|
||||
void fill_image(fz_context *ctx, UNUSED(fz_device *dev),
|
||||
fz_image *img, UNUSED(fz_matrix ctm), UNUSED(float alpha),
|
||||
UNUSED(fz_color_params color_params)) {
|
||||
|
||||
int l2factor = 0;
|
||||
|
||||
if (img->w > MIN_OCR_SIZE && img->h > MIN_OCR_SIZE && IS_VALID_BPP(img->n)) {
|
||||
|
||||
fz_pixmap *pix = img->get_pixmap(ctx, img, NULL, img->w, img->h, &l2factor);
|
||||
|
||||
if (pix->h > MIN_OCR_SIZE && img->h > MIN_OCR_SIZE && img->xres != 0) {
|
||||
TessBaseAPI *api = TessBaseAPICreate();
|
||||
TessBaseAPIInit3(api, ScanCtx.tesseract_path, ScanCtx.tesseract_lang);
|
||||
|
||||
TessBaseAPISetImage(api, pix->samples, pix->w, pix->h, pix->n, pix->stride);
|
||||
TessBaseAPISetSourceResolution(api, pix->xres);
|
||||
|
||||
char *text = TessBaseAPIGetUTF8Text(api);
|
||||
size_t len = strlen(text);
|
||||
if (len >= MIN_OCR_LEN) {
|
||||
text_buffer_append_string(&thread_buffer, text, len - 1);
|
||||
LOG_DEBUGF(
|
||||
"pdf.c",
|
||||
"(OCR) %dx%d got %dB from tesseract (%s), buffer:%dB",
|
||||
pix->w, pix->h, len, ScanCtx.tesseract_lang, thread_buffer.dyn_buffer.cur
|
||||
)
|
||||
}
|
||||
|
||||
TessBaseAPIEnd(api);
|
||||
TessBaseAPIDelete(api);
|
||||
}
|
||||
fz_drop_pixmap(ctx, pix);
|
||||
}
|
||||
}
|
||||
|
||||
void parse_pdf(const void *buf, size_t buf_len, document_t *doc) {
|
||||
|
||||
if (buf == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
static int mu_is_initialized = 0;
|
||||
if (!mu_is_initialized) {
|
||||
pthread_mutex_init(&ScanCtx.mupdf_mu, NULL);
|
||||
mu_is_initialized = 1;
|
||||
}
|
||||
fz_context *ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED);
|
||||
|
||||
init_ctx(ctx, doc);
|
||||
|
||||
int err = 0;
|
||||
|
||||
fz_document *fzdoc = NULL;
|
||||
fz_stream *stream = NULL;
|
||||
fz_var(fzdoc);
|
||||
fz_var(stream);
|
||||
fz_var(err);
|
||||
|
||||
fz_try(ctx)
|
||||
{
|
||||
stream = fz_open_memory(ctx, buf, buf_len);
|
||||
fzdoc = fz_open_document_with_stream(ctx, mime_get_mime_text(doc->mime), stream);
|
||||
}
|
||||
fz_catch(ctx)
|
||||
err = ctx->error.errcode;
|
||||
|
||||
if (err != 0) {
|
||||
fz_drop_stream(ctx, stream);
|
||||
fz_drop_document(ctx, fzdoc);
|
||||
fz_drop_context(ctx);
|
||||
return;
|
||||
}
|
||||
|
||||
char title[4096] = {'\0',};
|
||||
fz_try(ctx)
|
||||
fz_lookup_metadata(ctx, fzdoc, FZ_META_INFO_TITLE, title, sizeof(title));
|
||||
fz_catch(ctx)
|
||||
;
|
||||
|
||||
if (strlen(title) > 0) {
|
||||
meta_line_t *meta_content = malloc(sizeof(meta_line_t) + strlen(title));
|
||||
meta_content->key = MetaTitle;
|
||||
strcpy(meta_content->strval, title);
|
||||
APPEND_META(doc, meta_content)
|
||||
}
|
||||
|
||||
int page_count = -1;
|
||||
fz_var(err);
|
||||
fz_try(ctx)
|
||||
page_count = fz_count_pages(ctx, fzdoc);
|
||||
fz_catch(ctx)
|
||||
err = ctx->error.errcode;
|
||||
|
||||
if (err) {
|
||||
LOG_WARNINGF(doc->filepath, "fz_count_pages() returned error code [%d] %s", err, ctx->error.message)
|
||||
fz_drop_stream(ctx, stream);
|
||||
fz_drop_document(ctx, fzdoc);
|
||||
fz_drop_context(ctx);
|
||||
return;
|
||||
}
|
||||
|
||||
if (ScanCtx.tn_size > 0) {
|
||||
err = render_cover(ctx, doc, fzdoc);
|
||||
}
|
||||
|
||||
if (err == TRUE) {
|
||||
fz_drop_stream(ctx, stream);
|
||||
fz_drop_document(ctx, fzdoc);
|
||||
fz_drop_context(ctx);
|
||||
return;
|
||||
}
|
||||
|
||||
if (ScanCtx.content_size > 0) {
|
||||
fz_stext_options opts = {0};
|
||||
thread_buffer = text_buffer_create(ScanCtx.content_size);
|
||||
|
||||
for (int current_page = 0; current_page < page_count; current_page++) {
|
||||
fz_page *page = NULL;
|
||||
fz_var(err);
|
||||
fz_try(ctx)
|
||||
page = fz_load_page(ctx, fzdoc, current_page);
|
||||
fz_catch(ctx)
|
||||
err = ctx->error.errcode;
|
||||
if (err != 0) {
|
||||
LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, ctx->error.message)
|
||||
text_buffer_destroy(&thread_buffer);
|
||||
fz_drop_page(ctx, page);
|
||||
fz_drop_stream(ctx, stream);
|
||||
fz_drop_document(ctx, fzdoc);
|
||||
fz_drop_context(ctx);
|
||||
return;
|
||||
}
|
||||
|
||||
fz_stext_page *stext = fz_new_stext_page(ctx, fz_bound_page(ctx, page));
|
||||
fz_device *dev = fz_new_stext_device(ctx, stext, &opts);
|
||||
dev->stroke_path = NULL;
|
||||
dev->stroke_text = NULL;
|
||||
dev->clip_text = NULL;
|
||||
dev->clip_stroke_path = NULL;
|
||||
dev->clip_stroke_text = NULL;
|
||||
|
||||
if (ScanCtx.tesseract_lang != NULL) {
|
||||
dev->fill_image = fill_image;
|
||||
}
|
||||
|
||||
fz_var(err);
|
||||
fz_try(ctx)
|
||||
fz_run_page(ctx, page, dev, fz_identity, NULL);
|
||||
fz_always(ctx)
|
||||
{
|
||||
fz_close_device(ctx, dev);
|
||||
fz_drop_device(ctx, dev);
|
||||
}
|
||||
fz_catch(ctx)
|
||||
err = ctx->error.errcode;
|
||||
|
||||
if (err != 0) {
|
||||
LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, ctx->error.message)
|
||||
text_buffer_destroy(&thread_buffer);
|
||||
fz_drop_page(ctx, page);
|
||||
fz_drop_stext_page(ctx, stext);
|
||||
fz_drop_stream(ctx, stream);
|
||||
fz_drop_document(ctx, fzdoc);
|
||||
fz_drop_context(ctx);
|
||||
return;
|
||||
}
|
||||
|
||||
fz_stext_block *block = stext->first_block;
|
||||
while (block != NULL) {
|
||||
int ret = read_stext_block(block, &thread_buffer);
|
||||
if (ret == TEXT_BUF_FULL) {
|
||||
break;
|
||||
}
|
||||
block = block->next;
|
||||
}
|
||||
fz_drop_stext_page(ctx, stext);
|
||||
fz_drop_page(ctx, page);
|
||||
|
||||
if (thread_buffer.dyn_buffer.cur >= thread_buffer.dyn_buffer.size) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
text_buffer_terminate_string(&thread_buffer);
|
||||
|
||||
meta_line_t *meta_content = malloc(sizeof(meta_line_t) + thread_buffer.dyn_buffer.cur);
|
||||
meta_content->key = MetaContent;
|
||||
memcpy(meta_content->strval, thread_buffer.dyn_buffer.buf, thread_buffer.dyn_buffer.cur);
|
||||
APPEND_META(doc, meta_content)
|
||||
|
||||
text_buffer_destroy(&thread_buffer);
|
||||
}
|
||||
|
||||
fz_drop_stream(ctx, stream);
|
||||
fz_drop_document(ctx, fzdoc);
|
||||
fz_drop_context(ctx);
|
||||
}
|
||||
@@ -1,9 +0,0 @@
|
||||
#ifndef SIST2_PDF_H
|
||||
#define SIST2_PDF_H
|
||||
|
||||
#include "src/sist.h"
|
||||
|
||||
|
||||
void parse_pdf(const void *buf, size_t buf_len, document_t *doc);
|
||||
|
||||
#endif
|
||||
@@ -1,37 +0,0 @@
|
||||
#include "text.h"
|
||||
#include "src/ctx.h"
|
||||
|
||||
void parse_text(int bytes_read, struct vfile *f, char *buf, document_t *doc) {
|
||||
|
||||
char *intermediate_buf;
|
||||
int intermediate_buf_len;
|
||||
|
||||
if (bytes_read == doc->size || bytes_read >= ScanCtx.content_size) {
|
||||
int to_copy = MIN(bytes_read, ScanCtx.content_size);
|
||||
intermediate_buf = malloc(to_copy);
|
||||
intermediate_buf_len = to_copy;
|
||||
memcpy(intermediate_buf, buf, to_copy);
|
||||
|
||||
} else {
|
||||
int to_read = MIN(ScanCtx.content_size, doc->size) - bytes_read;
|
||||
|
||||
intermediate_buf = malloc(to_read + bytes_read);
|
||||
intermediate_buf_len = to_read + bytes_read;
|
||||
if (bytes_read != 0) {
|
||||
memcpy(intermediate_buf, buf, bytes_read);
|
||||
}
|
||||
|
||||
f->read(f, intermediate_buf + bytes_read, to_read);
|
||||
}
|
||||
text_buffer_t tex = text_buffer_create(ScanCtx.content_size);
|
||||
text_buffer_append_string(&tex, intermediate_buf, intermediate_buf_len);
|
||||
text_buffer_terminate_string(&tex);
|
||||
|
||||
meta_line_t *meta = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur);
|
||||
meta->key = MetaContent;
|
||||
strcpy(meta->strval, tex.dyn_buffer.buf);
|
||||
APPEND_META(doc, meta)
|
||||
|
||||
free(intermediate_buf);
|
||||
text_buffer_destroy(&tex);
|
||||
}
|
||||
@@ -1,8 +0,0 @@
|
||||
#ifndef SIST2_TEXT_H
|
||||
#define SIST2_TEXT_H
|
||||
|
||||
#include "src/sist.h"
|
||||
|
||||
void parse_text(int bytes_read, struct vfile *f, char *buf, document_t *doc);
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user