Support for cbr documents

This commit is contained in:
simon 2020-02-22 13:11:19 -05:00
parent d19a75926b
commit 398f1aead4
12 changed files with 85 additions and 9 deletions

View File

@ -26,6 +26,7 @@ add_executable(
src/parsing/arc.c src/parsing/arc.h
src/parsing/doc.c src/parsing/doc.h
src/log.c src/log.h
src/parsing/cbr.h src/parsing/cbr.c
# argparse
argparse/argparse.h argparse/argparse.c

View File

@ -92,7 +92,7 @@ docker stop sist2
File type | Library | Content | Thumbnail | Metadata
:---|:---|:---|:---|:---
pdf,xps,cbz,fb2,epub | MuPDF | text+ocr | yes, `png` | title |
pdf,xps,cbz,cbr,fb2,epub | MuPDF | text+ocr | yes, `png` | title |
`audio/*` | ffmpeg | - | yes, `jpeg` | ID3 tags |
`video/*` | ffmpeg | - | yes, `jpeg` | title, comment, artist |
`image/*` | ffmpeg | - | yes, `jpeg` | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190) |
@ -120,7 +120,7 @@ To check if a media file can be parsed without *seek*, execute `cat file.mp4 | f
### OCR
You can enable OCR support for pdf,xps,cbz,fb2,epub file types with the
You can enable OCR support for pdf,xps,cbz,cbr,fb2,epub file types with the
`--ocr <lang>` option. Download the language data files with your
package manager (`apt install tesseract-ocr-eng`) or directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files).

View File

@ -6,7 +6,7 @@
#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
static const char *const Version = "1.2.13";
static const char *const Version = "1.2.14";
static const char *const usage[] = {
"sist2 scan [OPTION]... PATH",
"sist2 index [OPTION]... INDEX",
@ -59,6 +59,8 @@ void sist2_scan(scan_args_t *args) {
ScanCtx.mime_table = mime_get_mime_table();
ScanCtx.ext_table = mime_get_ext_table();
cbr_init();
char store_path[PATH_MAX];
snprintf(store_path, PATH_MAX, "%sthumbs", ScanCtx.index.path);
mkdir(store_path, S_IWUSR | S_IRUSR | S_IXUSR);

View File

@ -1,8 +1,6 @@
#include "arc.h"
#include "src/ctx.h"
#define ARC_BUF_SIZE 8192
int should_parse_filtered_file(const char *filepath, int ext) {
char tmp[PATH_MAX * 2];

View File

@ -2,6 +2,7 @@
#define SIST2_ARC_H
#include "src/sist.h"
#define ARC_BUF_SIZE 8192
int should_parse_filtered_file(const char *filepath, int ext);

53
src/parsing/cbr.c Normal file
View File

@ -0,0 +1,53 @@
#import "cbr.h"
#import "src/ctx.h"
unsigned int cbr_mime;
unsigned int cbz_mime;
void cbr_init() {
cbr_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbr");
cbz_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbz");
}
int is_cbr(unsigned int mime) {
return mime == cbr_mime;
}
void parse_cbr(void *buf, size_t buf_len, document_t *doc) {
char *out_buf = malloc(buf_len * 2);
size_t out_buf_used = 0;
struct archive *rar_in = archive_read_new();
archive_read_support_filter_none(rar_in);
archive_read_support_format_rar(rar_in);
archive_read_support_format_rar5(rar_in);
archive_read_open_memory(rar_in, buf, buf_len);
struct archive *zip_out = archive_write_new();
archive_write_set_format_zip(zip_out);
archive_write_open_memory(zip_out, out_buf, buf_len * 2, &out_buf_used);
struct archive_entry *entry;
while (archive_read_next_header(rar_in, &entry) == ARCHIVE_OK) {
archive_write_header(zip_out, entry);
char arc_buf[ARC_BUF_SIZE];
int len = archive_read_data(rar_in, arc_buf, ARC_BUF_SIZE);
while (len > 0) {
archive_write_data(zip_out, arc_buf, len);
len = archive_read_data(rar_in, arc_buf, ARC_BUF_SIZE);
}
}
archive_write_close(zip_out);
archive_write_free(zip_out);
archive_read_close(rar_in);
archive_read_free(rar_in);
doc->mime = cbz_mime;
parse_pdf(out_buf, out_buf_used, doc);
doc->mime = cbr_mime;
free(out_buf);
}

12
src/parsing/cbr.h Normal file
View File

@ -0,0 +1,12 @@
#ifndef SIST2_CBR_H
#define SIST2_CBR_H
#include "src/sist.h"
void cbr_init();
int is_cbr(unsigned int mime);
void parse_cbr(void *buf, size_t buf_len, document_t *doc);
#endif

View File

@ -149,6 +149,13 @@ void parse(void *arg) {
if (doc_buf != buf && doc_buf != NULL) {
free(doc_buf);
}
} else if (is_cbr(doc.mime)) {
void *cbr_buf = read_all(job, (char *) buf, bytes_read);
parse_cbr(cbr_buf, doc.size, &doc);
if (cbr_buf != buf && cbr_buf != NULL) {
free(cbr_buf);
}
}
//Parent meta

View File

@ -59,6 +59,7 @@
#include "parsing/font.h"
#include "parsing/arc.h"
#include "parsing/doc.h"
#include "parsing/cbr.h"
#include "cli.h"
#include "log.h"
#include "utf8.h/utf8.h"

File diff suppressed because one or more lines are too long

View File

@ -377,6 +377,7 @@ function makeThumbnail(mimeCategory, hit, imgWrapper, small) {
|| hit["_source"]["mime"] === "application/pdf"
|| hit["_source"]["mime"] === "application/epub+zip"
|| hit["_source"]["mime"] === "application/x-cbz"
|| hit["_source"]["mime"] === "application/x-cbr"
|| hit["_source"].hasOwnProperty("font_name")
) {
thumbnail = document.createElement("img");

View File

@ -11,7 +11,7 @@
<nav class="navbar navbar-expand-lg">
<a class="navbar-brand" href="/">sist2</a>
<span class="badge badge-pill version">v1.2.13</span>
<span class="badge badge-pill version">v1.2.14</span>
<span class="tagline">Lightning-fast file system indexer and search tool </span>
<a style="margin-left: auto" id="theme" class="btn" title="Toggle theme" href="/">Theme</a>
</nav>