Initial commit (squashed)

This commit is contained in:
2019-09-27 22:56:07 -04:00
commit 564a17a8fa
75 changed files with 7518 additions and 0 deletions

39
src/ctx.h Normal file
View File

@@ -0,0 +1,39 @@
#ifndef SIST2_CTX_H
#define SIST2_CTX_H
#include "sist.h"
struct {
struct index_t index;
GHashTable *mime_table;
GHashTable *ext_table;
tpool_t *pool;
int tn_size;
int threads;
int content_size;
float tn_qscale;
int stat_tn_size;
int stat_index_size;
GHashTable *original_table;
GHashTable *copy_table;
} ScanCtx;
struct {
char *es_url;
} IndexCtx;
struct {
char *es_url;
int index_count;
struct index_t indices[16];
} WebCtx;
#endif

208
src/index/elastic.c Normal file
View File

@@ -0,0 +1,208 @@
#include "elastic.h"
#include "src/ctx.h"
#include <stdlib.h>
#include "web.h"
#include <stdio.h>
#include <string.h>
#include <cJSON/cJSON.h>
#include <src/ctx.h>
#include "static_generated.c"
#define BULK_INDEX_SIZE 100
typedef struct es_indexer {
int queued;
char *es_url;
es_bulk_line_t *line_head;
es_bulk_line_t *line_tail;
} es_indexer_t;
static es_indexer_t *Indexer;
void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
cJSON *line = cJSON_CreateObject();
cJSON_AddStringToObject(line, "_id", uuid_str);
cJSON_AddStringToObject(line, "_index", "sist2");
cJSON_AddStringToObject(line, "_type", "_doc");
cJSON_AddItemToObject(line, "_source", document);
char *json = cJSON_PrintUnformatted(line);
printf("%s\n", json);
cJSON_free(line);
}
void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
char *json = cJSON_PrintUnformatted(document);
size_t json_len = strlen(json);
es_bulk_line_t *bulk_line = malloc(sizeof(es_bulk_line_t) + json_len + 2);
memcpy(bulk_line->line, json, json_len);
memcpy(bulk_line->uuid_str, uuid_str, UUID_STR_LEN);
*(bulk_line->line + json_len) = '\n';
*(bulk_line->line + json_len + 1) = '\0';
bulk_line->next = NULL;
cJSON_free(json);
elastic_index_line(bulk_line);
}
void elastic_flush() {
if (Indexer == NULL) {
Indexer = create_indexer(IndexCtx.es_url);
}
es_bulk_line_t *line = Indexer->line_head;
int count = 0;
size_t buf_size = 0;
size_t buf_cur = 0;
char *buf = malloc(1);
while (line != NULL) {
char action_str[512];
snprintf(action_str, 512, "{\"index\":{\"_id\": \"%s\"}}\n", line->uuid_str);
size_t action_str_len = strlen(action_str);
size_t line_len = strlen(line->line);
buf = realloc(buf, buf_size + line_len + action_str_len);
buf_size += line_len + action_str_len;
memcpy(buf + buf_cur, action_str, action_str_len);
buf_cur += action_str_len;
memcpy(buf + buf_cur, line->line, line_len);
buf_cur += line_len;
es_bulk_line_t *tmp = line;
line = line->next;
free(tmp);
count++;
}
buf = realloc(buf, buf_size + 1);
*(buf+buf_cur) = '\0';
Indexer->line_head = NULL;
Indexer->line_tail = NULL;
Indexer->queued = 0;
char bulk_url[4096];
snprintf(bulk_url, 4096, "%s/sist2/_bulk", Indexer->es_url);
response_t *r = web_post(bulk_url, buf, "Content-Type: application/x-ndjson");
printf("Indexed %3d documents (%zukB) <%d>\n", count, buf_cur / 1024, r->status_code);
free_response(r);
}
void elastic_index_line(es_bulk_line_t *line) {
if (Indexer == NULL) {
Indexer = create_indexer(IndexCtx.es_url);
}
if (Indexer->line_head == NULL) {
Indexer->line_head = line;
Indexer->line_tail = Indexer->line_head;
} else {
Indexer->line_tail->next = line;
Indexer->line_tail = line;
}
Indexer->queued += 1;
if (Indexer->queued >= BULK_INDEX_SIZE) {
elastic_flush();
}
}
es_indexer_t *create_indexer(const char *url) {
size_t url_len = strlen(url);
char *es_url = malloc(url_len);
strcpy(es_url, url);
es_indexer_t *indexer = malloc(sizeof(es_indexer_t));
indexer->es_url = es_url;
indexer->queued = 0;
indexer->line_head = NULL;
indexer->line_tail = NULL;
return indexer;
}
void destroy_indexer() {
char url[4096];
snprintf(url, sizeof(url), "%s/sist2/_refresh", IndexCtx.es_url);
response_t *r = web_post(url, "", NULL);
printf("Refresh index <%d>\n", r->status_code);
free_response(r);
snprintf(url, sizeof(url), "%s/sist2/_forcemerge", IndexCtx.es_url);
r = web_post(url, "", NULL);
printf("Merge index <%d>\n", r->status_code);
free_response(r);
if (Indexer != NULL) {
free(Indexer->es_url);
free(Indexer);
}
}
void elastic_init(int force_reset) {
// Check if index exists
char url[4096];
snprintf(url, 4096, "%s/sist2", IndexCtx.es_url);
response_t *r = web_get(url);
int index_exists = r->status_code == 200;
free_response(r);
if (!index_exists || force_reset) {
r = web_delete(url);
printf("Delete index <%d>\n", r->status_code);
free_response(r);
snprintf(url, 4096, "%s/sist2", IndexCtx.es_url);
r = web_put(url, "", NULL);
printf("Create index <%d>\n", r->status_code);
free_response(r);
snprintf(url, 4096, "%s/sist2/_close", IndexCtx.es_url);
r = web_post(url, "", NULL);
printf("Close index <%d>\n", r->status_code);
free_response(r);
snprintf(url, 4096, "%s/sist2/_settings", IndexCtx.es_url);
r = web_put(url, settings_json, "Content-Type: application/json");
printf("Update settings <%d>\n", r->status_code);
free_response(r);
snprintf(url, 4096, "%s/sist2/_mappings", IndexCtx.es_url);
r = web_put(url, mappings_json, "Content-Type: application/json");
printf("Update mappings <%d>\n", r->status_code);
free_response(r);
snprintf(url, 4096, "%s/sist2/_open", IndexCtx.es_url);
r = web_post(url, "", NULL);
printf("Open index <%d>\n", r->status_code);
free_response(r);
}
}
cJSON *elastic_get_document(const char *uuid_str) {
char url[4096];
snprintf(url, 4096, "%s/sist2/_source/%s", WebCtx.es_url, uuid_str);
response_t *r = web_get(url);
return cJSON_Parse(r->body);
}

33
src/index/elastic.h Normal file
View File

@@ -0,0 +1,33 @@
#ifndef SIST2_ELASTIC_H
#define SIST2_ELASTIC_H
#include "src/sist.h"
typedef struct es_bulk_line {
struct es_bulk_line *next;
char uuid_str[UUID_STR_LEN];
char line[0];
} es_bulk_line_t;
/**
* Note: indexer is *not* thread safe
*/
typedef struct es_indexer es_indexer_t;
void elastic_index_line(es_bulk_line_t *line);
void elastic_flush();
void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]);
void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]);
es_indexer_t *create_indexer(const char* es_url);
void destroy_indexer();
void elastic_init(int force_reset);
cJSON *elastic_get_document(const char *uuid_str);
#endif

File diff suppressed because one or more lines are too long

130
src/index/web.c Normal file
View File

@@ -0,0 +1,130 @@
#include "web.h"
size_t write_cb(char *ptr, size_t size, size_t nmemb, void *user_data) {
size_t real_size = size * nmemb;
dyn_buffer_t *buf = user_data;
dyn_buffer_write(buf, ptr, real_size);
return real_size;
}
void free_response(response_t *resp) {
free(resp->body);
free(resp);
}
response_t *web_get(const char *url) {
response_t *resp = malloc(sizeof(response_t));
CURL *curl;
dyn_buffer_t buffer = dyn_buffer_create();
curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) (&buffer));
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
curl_easy_perform(curl);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
curl_easy_cleanup(curl);
resp->body = buffer.buf;
resp->size = buffer.cur;
return resp;
}
response_t *web_post(const char *url, const char *data, const char *header) {
response_t *resp = malloc(sizeof(response_t));
CURL *curl;
dyn_buffer_t buffer = dyn_buffer_create();
curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) (&buffer));
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_POST, 1);
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
if (header != NULL) {
struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, header);
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
}
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data);
int r1 = curl_easy_perform(curl);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
curl_easy_cleanup(curl);
resp->body = buffer.buf;
resp->size = buffer.cur;
return resp;
}
response_t *web_put(const char *url, const char *data, const char *header) {
response_t *resp = malloc(sizeof(response_t));
CURL *curl;
dyn_buffer_t buffer = dyn_buffer_create();
curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) (&buffer));
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "PUT");
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
curl_easy_setopt(curl, CURLOPT_DNS_USE_GLOBAL_CACHE, 0);
curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURLOPT_DNS_LOCAL_IP4 );
if (header != NULL) {
struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, header);
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
}
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data);
curl_easy_perform(curl);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
curl_easy_cleanup(curl);
resp->body = buffer.buf;
resp->size = buffer.cur;
return resp;
}
response_t *web_delete(const char *url) {
response_t *resp = malloc(sizeof(response_t));
CURL *curl;
dyn_buffer_t buffer = dyn_buffer_create();
curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) (&buffer));
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "DELETE");
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, "");
curl_easy_perform(curl);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
curl_easy_cleanup(curl);
resp->body = buffer.buf;
resp->size = buffer.cur;
return resp;
}

19
src/index/web.h Normal file
View File

@@ -0,0 +1,19 @@
#ifndef SIST2_WEB_H
#define SIST2_WEB_H
#include "src/sist.h"
typedef struct response {
char *body;
size_t size;
int status_code;
} response_t;
response_t *web_get(const char *url);
response_t *web_post(const char * url, const char * data, const char* header);
response_t *web_put(const char *url, const char *data, const char *header);
response_t *web_delete(const char *url);
void free_response(response_t *resp);
#endif

327
src/io/serialize.c Normal file
View File

@@ -0,0 +1,327 @@
#include "src/ctx.h"
#include "serialize.h"
static __thread int IndexFd = -1;
typedef struct {
unsigned char uuid[16];
unsigned long ino;
unsigned long size;
unsigned int mime;
int mtime;
short base;
short ext;
} line_t;
void skip_meta(FILE *file) {
enum metakey key = getc(file);
while (key != '\n') {
if (IS_META_INT(key)) {
fseek(file, sizeof(int), SEEK_CUR);
} else if (IS_META_LONG(key)) {
fseek(file, sizeof(long), SEEK_CUR);
} else {
while ((getc(file))) {}
}
key = getc(file);
}
}
void write_index_descriptor(char *path, index_descriptor_t *desc) {
cJSON *json = cJSON_CreateObject();
cJSON_AddStringToObject(json, "uuid", desc->uuid);
cJSON_AddStringToObject(json, "version", desc->version);
cJSON_AddStringToObject(json, "root", desc->root);
cJSON_AddStringToObject(json, "name", desc->name);
cJSON_AddStringToObject(json, "rewrite_url", desc->rewrite_url);
cJSON_AddNumberToObject(json, "timestamp", (double) desc->timestamp);
int fd = open(path, O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR);
if (fd == -1) {
perror(path);
}
char *str = cJSON_Print(json);
write(fd, str, strlen(str));
free(str);
close(fd);
cJSON_Delete(json);
}
index_descriptor_t read_index_descriptor(char *path) {
struct stat info;
stat(path, &info);
int fd = open(path, O_RDONLY);
char *buf = malloc(info.st_size + 1);
read(fd, buf, info.st_size);
*(buf + info.st_size) = '\0';
close(fd);
cJSON *json = cJSON_Parse(buf);
index_descriptor_t descriptor;
descriptor.timestamp = (long) cJSON_GetObjectItem(json, "timestamp")->valuedouble;
strcpy(descriptor.root, cJSON_GetObjectItem(json, "root")->valuestring);
strcpy(descriptor.name, cJSON_GetObjectItem(json, "name")->valuestring);
strcpy(descriptor.rewrite_url, cJSON_GetObjectItem(json, "rewrite_url")->valuestring);
descriptor.root_len = (int)strlen(descriptor.root);
strcpy(descriptor.version, cJSON_GetObjectItem(json, "version")->valuestring);
strcpy(descriptor.uuid, cJSON_GetObjectItem(json, "uuid")->valuestring);
cJSON_Delete(json);
free(buf);
return descriptor;
}
char *get_meta_key_text(enum metakey meta_key) {
switch (meta_key) {
case MetaContent:
return "content";
case MetaWidth:
return "width";
case MetaHeight:
return "height";
case MetaMediaDuration:
return "duration";
case MetaMediaAudioCodec:
return "audioc";
case MetaMediaVideoCodec:
return "videoc";
case MetaMediaBitrate:
return "bitrate";
case MetaArtist:
return "artist";
case MetaAlbum:
return "album";
case MetaAlbumArtist:
return "album_artist";
case MetaGenre:
return "genre";
case MetaTitle:
return "title";
case MetaFontName:
return "font_name";
default:
return NULL;
}
}
void write_document(document_t *doc) {
if (IndexFd == -1) {
char dstfile[PATH_MAX];
pid_t tid = syscall(SYS_gettid);
snprintf(dstfile, PATH_MAX, "%s_index_%d", ScanCtx.index.path, tid);
IndexFd = open(dstfile, O_CREAT | O_WRONLY | O_APPEND, S_IRUSR | S_IWUSR);
if (IndexFd == -1) {
perror("open");
}
}
dyn_buffer_t buf = dyn_buffer_create();
// Ignore root directory in the file path
doc->ext = (short)(doc->ext - ScanCtx.index.desc.root_len);
doc->base = (short)(doc->base - ScanCtx.index.desc.root_len);
doc->filepath += ScanCtx.index.desc.root_len;
dyn_buffer_write(&buf, doc, sizeof(line_t));
dyn_buffer_write_str(&buf, doc->filepath);
meta_line_t *meta = doc->meta_head;
while (meta != NULL) {
dyn_buffer_write_char(&buf, meta->key);
if (IS_META_INT(meta->key)) {
dyn_buffer_write_int(&buf, meta->intval);
} else if (IS_META_LONG(meta->key)) {
dyn_buffer_write_long(&buf, meta->longval);
} else {
dyn_buffer_write_str(&buf, meta->strval);
}
meta_line_t *tmp = meta;
meta = meta->next;
free(tmp);
}
dyn_buffer_write_char(&buf, '\n');
write(IndexFd, buf.buf, buf.cur);
ScanCtx.stat_index_size += buf.cur;
dyn_buffer_destroy(&buf);
}
void serializer_cleanup() {
close(IndexFd);
}
void read_index(const char *path, const char index_id[UUID_STR_LEN], index_func func) {
line_t line;
dyn_buffer_t buf = dyn_buffer_create();
FILE *file = fopen(path, "rb");
while (1) {
buf.cur = 0;
fread((void *) &line, 1, sizeof(line_t), file);
if (feof(file)) {
break;
}
cJSON *document = cJSON_CreateObject();
cJSON_AddStringToObject(document, "index", index_id);
char uuid_str[UUID_STR_LEN];
uuid_unparse(line.uuid, uuid_str);
cJSON_AddStringToObject(document, "mime", mime_get_mime_text(line.mime));
cJSON_AddNumberToObject(document, "size", (double)line.size);
cJSON_AddNumberToObject(document, "mtime", line.mtime);
int c;
while ((c = getc(file)) != 0) {
dyn_buffer_write_char(&buf, (char) c);
}
dyn_buffer_write_char(&buf, '\0');
cJSON_AddStringToObject(document, "extension", buf.buf + line.ext);
if (*(buf.buf + line.ext - 1) == '.') {
*(buf.buf + line.ext - 1) = '\0';
} else {
*(buf.buf + line.ext) = '\0';
}
cJSON_AddStringToObject(document, "name", buf.buf + line.base);
*(buf.buf + line.base - 1) = '\0';
cJSON_AddStringToObject(document, "path", buf.buf);
enum metakey key = getc(file);
while (key != '\n') {
switch (key) {
case MetaWidth:
case MetaHeight:
case MetaMediaDuration:
case MetaMediaBitrate: {
int value;
fread(&value, sizeof(int), 1, file);
cJSON_AddNumberToObject(document, get_meta_key_text(key), value);
break;
}
case MetaMediaAudioCodec:
case MetaMediaVideoCodec: {
int value;
fread(&value, sizeof(int), 1, file);
const AVCodecDescriptor *desc = avcodec_descriptor_get(value);
if (desc != NULL) {
cJSON_AddStringToObject(document, get_meta_key_text(key), desc->name);
}
break;
}
case MetaContent:
case MetaArtist:
case MetaAlbum:
case MetaAlbumArtist:
case MetaGenre:
case MetaFontName:
case MetaTitle: {
buf.cur = 0;
while ((c = getc(file)) != 0) {
dyn_buffer_write_char(&buf, (char) c);
}
dyn_buffer_write_char(&buf, '\0');
cJSON_AddStringToObject(document, get_meta_key_text(key), buf.buf);
break;
}
}
key = getc(file);
}
func(document, uuid_str);
cJSON_free(document);
}
fclose(file);
}
void incremental_read(GHashTable *table, const char *filepath) {
FILE *file = fopen(filepath, "rb");
line_t line;
while (1) {
fread((void *) &line, 1, sizeof(line_t), file);
if (feof(file)) {
break;
}
incremental_put(table, line.ino, line.mtime);
while ((getc(file))) {}
skip_meta(file);
}
fclose(file);
}
/**
* Copy items from an index that are in the copy_table. Also copies from
* the store.
*/
void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
const char *dst_filepath, GHashTable *copy_table) {
FILE *file = fopen(filepath, "rb");
FILE *dst_file = fopen(dst_filepath, "ab");
line_t line;
while (1) {
fread((void *) &line, 1, sizeof(line_t), file);
if (feof(file)) {
break;
}
if (incremental_get(copy_table, line.ino)) {
fwrite(&line, sizeof(line), 1, dst_file);
size_t buf_len;
char *buf = store_read(store, (char *) line.uuid, 16, &buf_len);
store_write(dst_store, (char *) line.uuid, 16, buf, buf_len);
char c;
while ((c = (char) getc(file))) {
fwrite(&c, sizeof(c), 1, dst_file);
}
fwrite("\0", sizeof(c), 1, dst_file);
enum metakey key;
while (1) {
key = getc(file);
if (key == '\n') {
break;
}
fwrite(&key, sizeof(char), 1, dst_file);
if (IS_META_INT(key)) {
int val;
fread(&val, sizeof(val), 1, file);
fwrite(&val, sizeof(val), 1, dst_file);
} else if (IS_META_LONG(key)) {
long val;
fread(&val, sizeof(val), 1, file);
fwrite(&val, sizeof(val), 1, dst_file);
} else {
while ((c = (char) getc(file))) {
fwrite(&c, sizeof(c), 1, dst_file);
}
fwrite("\0", sizeof(c), 1, dst_file);
}
}
} else {
skip_meta(file);
}
}
fclose(file);
}

27
src/io/serialize.h Normal file
View File

@@ -0,0 +1,27 @@
#ifndef SIST2_SERIALIZE_H
#define SIST2_SERIALIZE_H
#include "src/sist.h"
#include <sys/syscall.h>
typedef void(*index_func)(cJSON *, const char[UUID_STR_LEN]);
void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
const char *dst_filepath, GHashTable *copy_table);
void write_document(document_t *doc);
void read_index(const char *path, const char[UUID_STR_LEN], index_func);
void incremental_read(GHashTable *table, const char *filepath);
/**
* Must be called after write_document
*/
void serializer_cleanup();
void write_index_descriptor(char *path, index_descriptor_t *desc);
index_descriptor_t read_index_descriptor(char *path);
#endif

105
src/io/store.c Normal file
View File

@@ -0,0 +1,105 @@
#include "store.h"
#include "src/ctx.h"
store_t *store_create(char *path) {
store_t *store = malloc(sizeof(struct store_t));
pthread_rwlock_init(&store->lock, NULL);
mdb_env_create(&store->env);
int open_ret = mdb_env_open(store->env,
path,
MDB_WRITEMAP | MDB_MAPASYNC,
S_IRUSR | S_IWUSR
);
if (open_ret != 0) {
fprintf(stderr, "Error while opening store: %s", mdb_strerror(open_ret));
exit(1);
}
store->size = (size_t) 1024 * 1024 * 5;
ScanCtx.stat_tn_size = store->size;
mdb_env_set_mapsize(store->env, store->size);
// Open dbi
MDB_txn *txn;
int r3 = mdb_txn_begin(store->env, NULL, 0, &txn);
int r4 = mdb_dbi_open(txn, NULL, 0, &store->dbi);
mdb_txn_commit(txn);
return store;
}
void store_destroy(store_t *store) {
pthread_rwlock_destroy(&store->lock);
mdb_close(store->env, store->dbi);
mdb_env_close(store->env);
free(store);
}
void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len) {
MDB_val mdb_key;
mdb_key.mv_data = key;
mdb_key.mv_size = key_len;
MDB_val mdb_value;
mdb_value.mv_data = buf;
mdb_value.mv_size = buf_len;
MDB_txn *txn;
pthread_rwlock_rdlock(&store->lock);
mdb_txn_begin(store->env, NULL, 0, &txn);
int put_ret = mdb_put(txn, store->dbi, &mdb_key, &mdb_value, 0);
if (put_ret == MDB_MAP_FULL) {
mdb_txn_abort(txn);
pthread_rwlock_unlock(&store->lock);
// Cannot resize when there is a opened transaction.
// Resize take effect on the next commit.
pthread_rwlock_wrlock(&store->lock);
store->size += 1024 * 1024 * 5;
mdb_env_set_mapsize(store->env, store->size);
mdb_txn_begin(store->env, NULL, 0, &txn);
put_ret = mdb_put(txn, store->dbi, &mdb_key, &mdb_value, 0);
ScanCtx.stat_tn_size = store->size;
}
mdb_txn_commit(txn);
pthread_rwlock_unlock(&store->lock);
if (put_ret != 0) {
printf("%s\n", mdb_strerror(put_ret));
}
}
char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen) {
char *buf = NULL;
MDB_val mdb_key;
mdb_key.mv_data = key;
mdb_key.mv_size = key_len;
MDB_val mdb_value;
MDB_txn *txn;
mdb_txn_begin(store->env, NULL, MDB_RDONLY, &txn);
int get_ret = mdb_get(txn, store->dbi, &mdb_key, &mdb_value);
if (get_ret == MDB_NOTFOUND) {
*ret_vallen = 0;
} else {
*ret_vallen = mdb_value.mv_size;
buf = malloc(mdb_value.mv_size);
memcpy(buf, mdb_value.mv_data, mdb_value.mv_size);
}
mdb_txn_abort(txn);
return buf;
}

24
src/io/store.h Normal file
View File

@@ -0,0 +1,24 @@
#ifndef SIST2_STORE_H
#define SIST2_STORE_H
#include <pthread.h>
#include <lmdb.h>
typedef struct store_t {
MDB_dbi dbi;
MDB_env *env;
size_t size;
pthread_rwlock_t lock;
} store_t;
#include "src/sist.h"
store_t *store_create(char *path);
void store_destroy(store_t *store);
void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len);
char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen);
#endif

34
src/io/walk.c Normal file
View File

@@ -0,0 +1,34 @@
#include "walk.h"
#include "src/ctx.h"
parse_job_t *create_parse_job(const char *filepath, const struct stat *info, int base) {
int len = (int) strlen(filepath);
parse_job_t *job = malloc(sizeof(parse_job_t) + len);
memcpy(&(job->filepath), filepath, len + 1);
job->base = base;
char *p = strrchr(filepath + base, '.');
if (p != NULL) {
job->ext = (int)(p - filepath + 1);
} else {
job->ext = len;
}
memcpy(&(job->info), info, sizeof(struct stat));
return job;
}
int handle_entry(const char *filepath, const struct stat *info, const int typeflag, struct FTW *ftw) {
if (typeflag == FTW_F && S_ISREG(info->st_mode)) {
parse_job_t *job = create_parse_job(filepath, info, ftw->base);
tpool_add_work(ScanCtx.pool, parse, job);
}
return 0;
}
int walk_directory_tree(const char *dirpath) {
return nftw(dirpath, handle_entry, 15, FTW_PHYS);
}

10
src/io/walk.h Normal file
View File

@@ -0,0 +1,10 @@
#ifndef WALK_H
#define WALK_H
#define _XOPEN_SOURCE 500
#include "src/sist.h"
int walk_directory_tree(const char *);
#endif

333
src/main.c Normal file
View File

@@ -0,0 +1,333 @@
#include "sist.h"
#include "ctx.h"
static const char *const Version = "1.0.0";
static const char *const usage[] = {
"sist2 scan [OPTION]... PATH",
"sist2 index [OPTION]... INDEX",
"sist2 web [OPTION]... INDEX...",
NULL,
};
void init_dir(const char *dirpath) {
char path[PATH_MAX];
snprintf(path, PATH_MAX, "%sdescriptor.json", dirpath);
uuid_t uuid;
uuid_generate_time_safe(uuid);
uuid_unparse(uuid, ScanCtx.index.desc.uuid);
time(&ScanCtx.index.desc.timestamp);
strcpy(ScanCtx.index.desc.version, Version);
write_index_descriptor(path, &ScanCtx.index.desc);
}
void scan_print_header() {
printf("sist2 V%s\n", Version);
printf("---------------------\n");
printf("threads\t\t%d\n", ScanCtx.threads);
printf("tn_qscale\t%.1f/31.0\n", ScanCtx.tn_qscale);
printf("tn_size\t\t%dpx\n", ScanCtx.tn_size);
printf("output\t\t%s\n", ScanCtx.index.path);
}
void sist2_scan(const char *path, const char *incremental_from) {
av_log_set_level(AV_LOG_QUIET);
strcpy(ScanCtx.index.desc.root, abspath(path));
ScanCtx.index.desc.root_len = (short) strlen(ScanCtx.index.desc.root);
init_dir(ScanCtx.index.path);
ScanCtx.mime_table = mime_get_mime_table();
ScanCtx.ext_table = mime_get_ext_table();
char store_path[PATH_MAX];
snprintf(store_path, PATH_MAX, "%sthumbs", ScanCtx.index.path);
mkdir(store_path, S_IWUSR | S_IRUSR | S_IXUSR);
ScanCtx.index.store = store_create(store_path);
scan_print_header();
if (incremental_from != NULL) {
incremental_from = abspath(incremental_from);
ScanCtx.original_table = incremental_get_table();
ScanCtx.copy_table = incremental_get_table();
DIR *dir = opendir(incremental_from);
if (dir == NULL) {
perror("opendir");
return;
}
struct dirent *de;
while ((de = readdir(dir)) != NULL) {
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
char file_path[PATH_MAX];
snprintf(file_path, PATH_MAX, "%s/%s", incremental_from, de->d_name);
incremental_read(ScanCtx.original_table, file_path);
}
}
closedir(dir);
printf("Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table));
}
walk_directory_tree(ScanCtx.index.desc.root);
tpool_wait(ScanCtx.pool);
if (incremental_from != NULL) {
char dst_path[PATH_MAX];
snprintf(store_path, PATH_MAX, "%sthumbs", incremental_from);
snprintf(dst_path, PATH_MAX, "%s_index_original", ScanCtx.index.path);
store_t *source = store_create(store_path);
DIR *dir = opendir(incremental_from);
if (dir == NULL) {
perror("opendir");
return;
}
struct dirent *de;
while ((de = readdir(dir)) != NULL) {
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
char file_path[PATH_MAX];
snprintf(file_path, PATH_MAX, "%s/%s", incremental_from, de->d_name);
incremental_copy(source, ScanCtx.index.store, file_path, dst_path, ScanCtx.copy_table);
}
}
closedir(dir);
store_destroy(source);
}
store_destroy(ScanCtx.index.store);
tpool_destroy(ScanCtx.pool);
}
void sist2_index(const char *path, int print_index, int arg_force_reset) {
if (!print_index) {
elastic_init(arg_force_reset);
}
char *index_path = abspath(path);
char descriptor_path[PATH_MAX];
snprintf(descriptor_path, PATH_MAX, "%s/descriptor.json", index_path);
index_descriptor_t desc = read_index_descriptor(descriptor_path);
if (strcmp(desc.version, Version) != 0) {
fprintf(stderr, "Version mismatch! Index is v%s but executable is v%s\n", desc.version, Version);
return;
}
DIR *dir = opendir(index_path);
if (dir == NULL) {
perror("opendir");
return;
}
index_func f;
if (print_index) {
f = print_json;
} else {
f = index_json;
}
struct dirent *de;
while ((de = readdir(dir)) != NULL) {
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
char file_path[PATH_MAX];
snprintf(file_path, PATH_MAX, "%s/%s", index_path, de->d_name);
read_index(file_path, desc.uuid, f);
}
}
if (!print_index) {
elastic_flush();
destroy_indexer();
}
}
void sist2_web(const char *indices[], int index_count, const char *host, const char *port) {
for (int i = 0; i < index_count; i++) {
char *abs_path = abspath(indices[i]);
char path_tmp[PATH_MAX];
snprintf(path_tmp, PATH_MAX, "%sthumbs", abs_path);
WebCtx.indices[i].store = store_create(path_tmp);
snprintf(path_tmp, PATH_MAX, "%sdescriptor.json", abs_path);
WebCtx.indices[i].desc = read_index_descriptor(path_tmp);
strcpy(WebCtx.indices[i].path, abs_path);
printf("Loaded index: %s\n", WebCtx.indices[i].desc.name);
}
WebCtx.index_count = index_count;
serve(host, port);
}
int main(int argc, const char *argv[]) {
curl_global_init(CURL_GLOBAL_NOTHING);
float arg_quality = 0;
int arg_size = 0;
int arg_content_size = 0;
int arg_threads = 0;
char *arg_incremental = NULL;
char *arg_output = NULL;
char *arg_rewrite_url = NULL;
char *arg_name = NULL;
char *arg_es_url = NULL;
int arg_print_index = 0;
int arg_force_reset = 0;
char *arg_web_host = NULL;
char *arg_web_port = NULL;
struct argparse_option options[] = {
OPT_HELP(),
OPT_GROUP("Scan options"),
OPT_INTEGER('t', "threads", &arg_threads, "Number of threads. DEFAULT=1"),
OPT_FLOAT('q', "quality", &arg_quality,
"Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=15"),
OPT_INTEGER(0, "size", &arg_size, "Thumbnail size, in pixels. DEFAULT=200"),
OPT_INTEGER(0, "content-size", &arg_content_size,
"Number of bytes to be extracted from text documents. DEFAULT=4096"),
OPT_STRING(0, "incremental", &arg_incremental, "Reuse an existing index and only scan modified files."),
OPT_STRING('o', "output", &arg_output, "Output directory. DEFAULT=index.sist2/"),
OPT_STRING(0, "rewrite-url", &arg_rewrite_url, "Serve files from this url instead of from disk."),
OPT_STRING(0, "name", &arg_name, "Index display name. DEFAULT: (name of the directory)"),
OPT_GROUP("Index options"),
OPT_STRING(0, "es-url", &arg_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
OPT_BOOLEAN('p', "print", &arg_print_index, "Just print JSON documents to stdout."),
OPT_BOOLEAN('f', "force-reset", &arg_force_reset, "Reset Elasticsearch mappings and settings. "
"(You must use this option the first time you use the index command)"),
OPT_GROUP("Web options"),
OPT_STRING(0, "es-url", &arg_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
OPT_STRING(0, "bind", &arg_web_host, "Listen on this address. DEFAULT=localhost"),
OPT_STRING(0, "port", &arg_web_port, "Listen on this port. DEFAULT=4090"),
OPT_END(),
};
struct argparse argparse;
argparse_init(&argparse, options, usage, 0);
argparse_describe(
&argparse,
"\nLightning-fast file system indexer and search tool.",
"\nMade by simon987 <me@simon987.net>. Released under GPL-3.0"
);
argc = argparse_parse(&argparse, argc, argv);
//Set defaults
if (arg_quality == 0) {
arg_quality = 15;
} else if (arg_quality < 1 || arg_quality > 31) {
fprintf(stderr, "Invalid quality: %f\n", arg_quality);
return 1;
}
if (arg_size == 0) {
arg_size = 200;
} else if (arg_size <= 0) {
fprintf(stderr, "Invalid size: %d\n", arg_size);
return 1;
}
if (arg_content_size == 0) {
arg_content_size = 4096;
} else if (arg_content_size <= 0) {
fprintf(stderr, "Invalid content-size: %d\n", arg_content_size);
return 1;
}
if (arg_threads == 0) {
arg_threads = 1;
} else if (arg_threads < 0) {
fprintf(stderr, "Invalid threads: %d\n", arg_threads);
return 1;
}
if (arg_output == NULL) {
arg_output = "index.sist2/";
}
if (arg_es_url == NULL) {
arg_es_url = "http://localhost:9200";
}
if (arg_web_host == NULL) {
arg_web_host = "localhost";
}
if (arg_web_port == NULL) {
arg_web_port = "4090";
}
// Commands
if (argc == 0) {
argparse_usage(&argparse);
} else if (strcmp(argv[0], "scan") == 0) {
if (argc < 2) {
fprintf(stderr, "Required positional argument: PATH.\n");
argparse_usage(&argparse);
return 1;
}
if (arg_name == NULL) {
arg_name = g_path_get_basename(argv[1]);
}
int ret = mkdir(arg_output, S_IRUSR | S_IWUSR | S_IXUSR);
if (ret != 0) {
fprintf(stderr, "Invalid output: '%s' (%s).\n", arg_output, strerror(errno));
return 1;
}
ScanCtx.tn_qscale = arg_quality;
ScanCtx.tn_size = arg_size;
ScanCtx.content_size = arg_content_size;
ScanCtx.pool = tpool_create(arg_threads, serializer_cleanup);
ScanCtx.threads = arg_threads;
strncpy(ScanCtx.index.path, arg_output, sizeof(ScanCtx.index.path));
strncpy(ScanCtx.index.desc.name, arg_name, sizeof(ScanCtx.index.desc.name));
if (arg_rewrite_url == NULL) {
strcpy(ScanCtx.index.desc.rewrite_url, "");
} else {
strcpy(ScanCtx.index.desc.rewrite_url, arg_rewrite_url);
}
sist2_scan(argv[1], arg_incremental);
} else if (strcmp(argv[0], "index") == 0) {
if (argc < 2) {
fprintf(stderr, "Required positional argument: PATH.\n");
argparse_usage(&argparse);
return 1;
}
IndexCtx.es_url = arg_es_url;
sist2_index(argv[1], arg_print_index, arg_force_reset);
} else if (strcmp(argv[0], "web") == 0) {
if (argc < 2) {
fprintf(stderr, "Required positional argument: PATH.\n");
argparse_usage(&argparse);
return 1;
}
WebCtx.es_url = arg_es_url;
sist2_web(argv + 1, argc - 1, arg_web_host, arg_web_port);
} else {
fprintf(stderr, "Invalid command: '%s'\n", argv[0]);
argparse_usage(&argparse);
return 1;
}
printf("\n");
return 0;
}

211
src/parsing/font.c Normal file
View File

@@ -0,0 +1,211 @@
#include "font.h"
#include "ft2build.h"
#include "freetype/freetype.h"
#include "src/ctx.h"
__thread FT_Library library = NULL;
typedef struct text_dimensions {
unsigned int width;
unsigned int height;
unsigned int baseline;
} text_dimensions_t;
typedef struct glyph {
unsigned int top;
unsigned int height;
unsigned int width;
unsigned int descent;
unsigned int ascent;
unsigned int advance_width;
unsigned char *pixmap;
} glyph_t;
__always_inline
int kerning_offset(char c, char pc, FT_Face face) {
FT_Vector kerning;
FT_Get_Kerning(face, c, pc, FT_KERNING_DEFAULT, &kerning);
return (int) (kerning.x / 64);
}
__always_inline
glyph_t ft_glyph_to_glyph(FT_GlyphSlot slot) {
glyph_t glyph;
glyph.pixmap = slot->bitmap.buffer;
glyph.width = slot->bitmap.width;
glyph.height = slot->bitmap.rows;
glyph.top = slot->bitmap_top;
glyph.advance_width = slot->advance.x / 64;
glyph.descent = MAX(0, glyph.height - glyph.top);
glyph.ascent = MAX(0, MAX(glyph.top, glyph.height) - glyph.descent);
return glyph;
}
__always_inline
glyph_t get_glyph(char character, FT_Face face) {
}
text_dimensions_t text_dimension(char *text, FT_Face face) {
text_dimensions_t dimensions;
dimensions.width = 0;
int num_chars = (int) strlen(text);
unsigned int max_ascent = 0;
unsigned int max_descent = 0;
char pc = 0;
for (int i = 0; i < num_chars; i++) {
char c = text[i];
FT_Load_Char(face, c, 0);
glyph_t glyph = ft_glyph_to_glyph(face->glyph);
max_descent = MAX(max_descent, glyph.descent);
max_ascent = MAX(max_ascent, glyph.ascent);
int kerning_x = kerning_offset(c, pc, face);
dimensions.width += MAX(glyph.advance_width, glyph.width) + kerning_x;
pc = c;
}
dimensions.height = max_ascent + max_descent;
dimensions.baseline = max_descent;
return dimensions;
}
void draw_glyph(glyph_t *glyph, int x, int y, struct text_dimensions text_info, unsigned char *bitmap) {
unsigned int src = 0;
unsigned int dst = y * text_info.width + x;
unsigned int row_offset = text_info.width - glyph->width;
unsigned int buf_len = text_info.width * text_info.height;
for (unsigned int sy = 0; sy < glyph->height; sy++) {
for (unsigned int sx = 0; sx < glyph->width; sx++) {
if (dst < buf_len) {
bitmap[dst] |= glyph->pixmap[src];
}
src++;
dst++;
}
dst += row_offset;
}
}
void bmp_format(dyn_buffer_t *buf, text_dimensions_t dimensions, const unsigned char *bitmap) {
dyn_buffer_write_short(buf, 0x4D42); // Magic
dyn_buffer_write_int(buf, 0); // Size placeholder
dyn_buffer_write_int(buf, 0x5157); //Reserved
dyn_buffer_write_int(buf, 14 + 40 + 256 * 4); // pixels offset
dyn_buffer_write_int(buf, 40); // DIB size
dyn_buffer_write_int(buf, (int) dimensions.width);
dyn_buffer_write_int(buf, (int) dimensions.height);
dyn_buffer_write_short(buf, 1); // Color planes
dyn_buffer_write_short(buf, 8); // bits per pixel
dyn_buffer_write_int(buf, 0); // compression
dyn_buffer_write_int(buf, 0); // Ignored
dyn_buffer_write_int(buf, 3800); // hres
dyn_buffer_write_int(buf, 3800); // vres
dyn_buffer_write_int(buf, 256); // Color count
dyn_buffer_write_int(buf, 0); // Ignored
// RGBA32 Color table (Grayscale)
for (int i = 255; i >= 0; i--) {
dyn_buffer_write_int(buf, i + (i << 8) + (i << 16));
}
// Pixel array: write from bottom to top, with rows padded to multiples of 4-bytes
for (int y = (int) dimensions.height - 1; y >= 0; y--) {
for (unsigned int x = 0; x < dimensions.width; x++) {
dyn_buffer_write_char(buf, (char) bitmap[y * dimensions.width + x]);
}
while (buf->cur % 4 != 0) {
dyn_buffer_write_char(buf, 0);
}
}
// Size
*(int *) ((char *) buf->buf + 2) = buf->cur;
}
void parse_font(const char *buf, size_t buf_len, document_t *doc) {
if (library == NULL) {
FT_Init_FreeType(&library);
}
FT_Face face;
FT_Error err = FT_New_Memory_Face(library, (unsigned char *) buf, buf_len, 0, &face);
if (err != 0) {
return;
}
char font_name[1024];
if (face->style_name == NULL || *(face->style_name) == '?') {
strcpy(font_name, face->family_name);
} else {
snprintf(font_name, sizeof(font_name), "%s %s", face->family_name, face->style_name);
}
meta_line_t *meta_name = malloc(sizeof(meta_line_t) + strlen(font_name));
meta_name->key = MetaFontName;
strcpy(meta_name->strval, font_name);
APPEND_META(doc, meta_name)
int pixel = 64;
int num_chars = (int) strlen(font_name);
err = FT_Set_Pixel_Sizes(face, 0, pixel);
if (err != 0) {
return;
}
text_dimensions_t dimensions = text_dimension(font_name, face);
unsigned char *bitmap = calloc(dimensions.width * dimensions.height, 1);
FT_Vector pen;
pen.x = 0;
char pc = 0;
for (int i = 0; i < num_chars; i++) {
char c = font_name[i];
err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER);
if (err != 0) {
continue;
}
glyph_t glyph = ft_glyph_to_glyph(face->glyph);
pen.x += kerning_offset(c, pc, face);
pen.y = dimensions.height - glyph.ascent - dimensions.baseline;
draw_glyph(&glyph, pen.x, pen.y, dimensions, bitmap);
pen.x += glyph.advance_width;
pc = c;
}
dyn_buffer_t bmp_data = dyn_buffer_create();
bmp_format(&bmp_data, dimensions, bitmap);
store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) bmp_data.buf, bmp_data.cur);
dyn_buffer_destroy(&bmp_data);
free(bitmap);
FT_Done_Face(face);
}

9
src/parsing/font.h Normal file
View File

@@ -0,0 +1,9 @@
#ifndef SIST2_FONT_H
#define SIST2_FONT_H
#include "src/sist.h"
void parse_font(const char * buf, size_t buf_len, document_t *doc);
#endif

269
src/parsing/media.c Normal file
View File

@@ -0,0 +1,269 @@
#include "src/sist.h"
#include "src/ctx.h"
AVCodecContext *alloc_jpeg_encoder(int dstW, int dstH, float qscale) {
AVCodec *jpeg_codec = avcodec_find_encoder(AV_CODEC_ID_MJPEG);
AVCodecContext *jpeg = avcodec_alloc_context3(jpeg_codec);
jpeg->width = dstW;
jpeg->height = dstH;
jpeg->time_base.den = 1000000;
jpeg->time_base.num = 1;
jpeg->i_quant_factor = qscale;
jpeg->pix_fmt = AV_PIX_FMT_YUVJ420P;
int ret = avcodec_open2(jpeg, jpeg_codec, NULL);
if (ret != 0) {
printf("Could not open jpeg encoder: %s!\n", av_err2str(ret));
return NULL;
}
return jpeg;
}
AVFrame *scale_frame(const AVCodecContext *decoder, const AVFrame *frame, int size) {
AVFrame *scaled_frame = av_frame_alloc();
int dstW;
int dstH;
if (frame->width <= size && frame->height <= size) {
dstW = frame->width;
dstH = frame->height;
} else {
double ratio = (double) frame->width / frame->height;
if (frame->width > frame->height) {
dstW = size;
dstH = (int) (size / ratio);
} else {
dstW = (int) (size * ratio);
dstH = size;
}
}
struct SwsContext *ctx = sws_getContext(
decoder->width, decoder->height, decoder->pix_fmt,
dstW, dstH, AV_PIX_FMT_YUVJ420P,
SWS_FAST_BILINEAR, 0, 0, 0
);
int dst_buf_len = avpicture_get_size(AV_PIX_FMT_YUVJ420P, dstW, dstH);
uint8_t *dst_buf = (uint8_t *) av_malloc(dst_buf_len);
avpicture_fill((AVPicture *) scaled_frame, dst_buf, AV_PIX_FMT_YUVJ420P, dstW, dstH);
sws_scale(ctx,
(const uint8_t *const *) frame->data, frame->linesize,
0, decoder->height,
scaled_frame->data, scaled_frame->linesize
);
scaled_frame->width = dstW;
scaled_frame->height = dstH;
scaled_frame->format = AV_PIX_FMT_YUV420P;
sws_freeContext(ctx);
return scaled_frame;
}
AVFrame *read_frame(AVFormatContext *pFormatCtx, AVCodecContext *decoder, int stream_idx) {
AVFrame *frame = av_frame_alloc();
AVPacket avPacket;
av_init_packet(&avPacket);
int receive_ret = -EAGAIN;
while (receive_ret == -EAGAIN) {
// Get video frame
while (1) {
int read_frame_ret = av_read_frame(pFormatCtx, &avPacket);
if (read_frame_ret != 0) {
if (read_frame_ret != AVERROR_EOF) {
fprintf(stderr, "Error reading frame: %s\n", av_err2str(read_frame_ret));
}
av_frame_free(&frame);
av_packet_unref(&avPacket);
return NULL;
}
//Ignore audio/other frames
if (avPacket.stream_index != stream_idx) {
av_packet_unref(&avPacket);
continue;
}
break;
}
// Feed it to decoder
int decode_ret = avcodec_send_packet(decoder, &avPacket);
if (decode_ret != 0) {
printf("Error decoding frame: %s\n", av_err2str(decode_ret));
}
av_packet_unref(&avPacket);
receive_ret = avcodec_receive_frame(decoder, frame);
}
return frame;
}
void append_audio_meta(AVFormatContext *pFormatCtx, document_t *doc) {
AVDictionaryEntry *tag = NULL;
while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
char *key = tag->key;
for (; *key; ++key) *key = (char) tolower(*key);
if (strcmp(tag->key, "artist") == 0) {
size_t len = strlen(tag->value);
meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + len);
meta_tag->key = MetaArtist;
memcpy(meta_tag->strval, tag->value, len);
APPEND_META(doc, meta_tag)
} else if (strcmp(tag->key, "genre") == 0) {
size_t len = strlen(tag->value);
meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + len);
meta_tag->key = MetaGenre;
memcpy(meta_tag->strval, tag->value, len);
APPEND_META(doc, meta_tag)
} else if (strcmp(tag->key, "title") == 0) {
size_t len = strlen(tag->value);
meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + len);
meta_tag->key = MetaTitle;
memcpy(meta_tag->strval, tag->value, len);
APPEND_META(doc, meta_tag)
} else if (strcmp(tag->key, "album_artist") == 0) {
size_t len = strlen(tag->value);
meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + len);
meta_tag->key = MetaAlbumArtist;
memcpy(meta_tag->strval, tag->value, len);
APPEND_META(doc, meta_tag)
} else if (strcmp(tag->key, "album") == 0) {
size_t len = strlen(tag->value);
meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + len);
meta_tag->key = MetaAlbum;
memcpy(meta_tag->strval, tag->value, len);
APPEND_META(doc, meta_tag)
}
}
}
void parse_media(const char *filepath, document_t *doc) {
int video_stream = -1;
AVFormatContext *pFormatCtx = avformat_alloc_context();
int res = avformat_open_input(&pFormatCtx, filepath, NULL, NULL);
if (res < 0) {
printf("ERR%s %s\n", filepath, av_err2str(res));
return;
}
avformat_find_stream_info(pFormatCtx, NULL);
for (int i = (int) pFormatCtx->nb_streams - 1; i >= 0; i--) {
AVStream *stream = pFormatCtx->streams[i];
if (stream->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
meta_line_t *meta_audio = malloc(sizeof(meta_line_t));
meta_audio->key = MetaMediaAudioCodec;
meta_audio->intval = stream->codecpar->codec_id;
APPEND_META(doc, meta_audio)
append_audio_meta(pFormatCtx, doc);
} else if (stream->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
meta_line_t *meta_vid = malloc(sizeof(meta_line_t));
meta_vid->key = MetaMediaVideoCodec;
meta_vid->intval = stream->codecpar->codec_id;
APPEND_META(doc, meta_vid)
meta_line_t *meta_w = malloc(sizeof(meta_line_t));
meta_w->key = MetaWidth;
meta_w->intval = stream->codecpar->width;
APPEND_META(doc, meta_w)
meta_line_t *meta_h = malloc(sizeof(meta_line_t));
meta_h->key = MetaHeight;
meta_h->intval = stream->codecpar->height;
APPEND_META(doc, meta_h)
video_stream = i;
}
}
if (video_stream != -1) {
AVStream *stream = pFormatCtx->streams[video_stream];
if (stream->nb_frames > 1) {
//This is a video (not a still image)
meta_line_t *meta_duration = malloc(sizeof(meta_line_t));
meta_duration->key = MetaMediaDuration;
meta_duration->longval = pFormatCtx->duration / AV_TIME_BASE;
APPEND_META(doc, meta_duration)
meta_line_t *meta_bitrate = malloc(sizeof(meta_line_t));
meta_bitrate->key = MetaMediaBitrate;
meta_bitrate->intval = pFormatCtx->bit_rate;
APPEND_META(doc, meta_bitrate)
}
if (stream->codecpar->width <= 20 || stream->codecpar->height <= 20) {
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
return;
}
// Decoder
AVCodec *video_codec = avcodec_find_decoder(stream->codecpar->codec_id);
AVCodecContext *decoder = avcodec_alloc_context3(video_codec);
avcodec_parameters_to_context(decoder, stream->codecpar);
avcodec_open2(decoder, video_codec, NULL);
//Seek
if (stream->nb_frames > 1 && stream->codecpar->codec_id != AV_CODEC_ID_GIF) {
int seek_ret = 0;
for (int i = 20; i >= 0; i--) {
seek_ret = av_seek_frame(pFormatCtx, video_stream,
stream->duration * 0.10, 0);
if (seek_ret == 0) {
break;
}
}
}
AVFrame *frame = read_frame(pFormatCtx, decoder, video_stream);
if (frame == NULL) {
avcodec_free_context(&decoder);
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
return;
}
// Scale frame
AVFrame *scaled_frame = scale_frame(decoder, frame, ScanCtx.tn_size);
// Encode frame to jpeg
AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height, ScanCtx.tn_qscale);
avcodec_send_frame(jpeg_encoder, scaled_frame);
AVPacket jpeg_packet;
av_init_packet(&jpeg_packet);
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
// Save thumbnail
store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) jpeg_packet.data, jpeg_packet.size);
av_packet_unref(&jpeg_packet);
av_frame_free(&frame);
av_free(*scaled_frame->data);
av_frame_free(&scaled_frame);
avcodec_free_context(&jpeg_encoder);
avcodec_free_context(&decoder);
}
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
}

11
src/parsing/media.h Normal file
View File

@@ -0,0 +1,11 @@
#ifndef SIST2_MEDIA_H
#define SIST2_MEDIA_H
#include "src/sist.h"
#define MIN_VIDEO_SIZE 1024 * 64
void parse_media(const char * filepath, document_t *doc);
#endif

20
src/parsing/mime.c Normal file
View File

@@ -0,0 +1,20 @@
#include "mime.h"
unsigned int mime_get_mime_by_ext(GHashTable *ext_table, const char * ext) {
char lower[64];
char *p = lower;
while ((*ext)) {
*p++ = (char)tolower(*ext++);
}
*p = '\0';
return (size_t) g_hash_table_lookup(ext_table, lower);
}
unsigned int mime_get_mime_by_string(GHashTable *mime_table, const char * str) {
const char * ptr = str;
while (*ptr == ' ' || *ptr == '[') {
ptr++;
}
return (size_t) g_hash_table_lookup(mime_table, ptr);
}

45
src/parsing/mime.h Normal file
View File

@@ -0,0 +1,45 @@
#ifndef SIST2_MIME_H
#define SIST2_MIME_H
#include "src/sist.h"
#define MAJOR_MIME(mime_id) (mime_id & 0x0FFF0000) >> 16
#define MIME_EMPTY 1
#define DONT_PARSE 0x80000000
#define SHOULD_PARSE(mime_id) (mime_id & DONT_PARSE) != DONT_PARSE
#define PDF_MASK 0x40000000
#define IS_PDF(mime_id) (mime_id & PDF_MASK) == PDF_MASK
#define FONT_MASK 0x20000000
#define IS_FONT(mime_id) (mime_id & FONT_MASK) == FONT_MASK
enum major_mime {
MimeInvalid = 0,
MimeModel = 1,
MimeExample = 2,
MimeMessage = 3,
MimeMultipart = 4,
MimeFont = 5,
MimeVideo = 6,
MimeAudio = 7,
MimeImage = 8,
MimeText = 9,
MimeApplication = 10,
};
enum mime;
GHashTable *mime_get_mime_table();
GHashTable *mime_get_ext_table();
char *mime_get_mime_text(unsigned int);
unsigned int mime_get_mime_by_ext(GHashTable *ext_table, const char * ext);
unsigned int mime_get_mime_by_string(GHashTable *mime_table, const char * str);
#endif

1552
src/parsing/mime_generated.c Normal file

File diff suppressed because it is too large Load Diff

126
src/parsing/parse.c Normal file
View File

@@ -0,0 +1,126 @@
#include "src/sist.h"
#include "src/ctx.h"
__thread magic_t Magic;
void *read_all(parse_job_t *job, const char *buf, int bytes_read, int *fd) {
void *full_buf;
if (job->info.st_size <= bytes_read) {
full_buf = malloc(job->info.st_size);
memcpy(full_buf, buf, job->info.st_size);
} else {
if (*fd == -1) {
*fd = open(job->filepath, O_RDONLY);
if (*fd == -1) {
perror("open");
printf("%s\n", job->filepath);
free(job);
return NULL;
}
}
full_buf = malloc(job->info.st_size);
memcpy(full_buf, buf, bytes_read);
int ret = read(*fd, full_buf + bytes_read, job->info.st_size - bytes_read);
if (ret == -1) {
perror("read");
}
}
return full_buf;
}
void parse(void *arg) {
parse_job_t *job = arg;
document_t doc;
if (incremental_get(ScanCtx.original_table, job->info.st_ino) == job->info.st_mtim.tv_sec) {
incremental_mark_file_for_copy(ScanCtx.copy_table, job->info.st_ino);
free(job);
return;
}
if (Magic == NULL) {
Magic = magic_open(MAGIC_MIME_TYPE);
magic_load(Magic, NULL);
}
doc.filepath = job->filepath;
doc.ext = (short) job->ext;
doc.base = (short) job->base;
doc.meta_head = NULL;
doc.meta_tail = NULL;
doc.mime = 0;
doc.size = job->info.st_size;
doc.ino = job->info.st_ino;
doc.mtime = job->info.st_mtim.tv_sec;
uuid_generate_time_safe(doc.uuid);
char *buf[PARSE_BUF_SIZE];
if (job->info.st_size == 0) {
doc.mime = MIME_EMPTY;
} else if (*(job->filepath + job->ext) != '\0') {
doc.mime = mime_get_mime_by_ext(ScanCtx.ext_table, job->filepath + job->ext);
}
int fd = -1;
int bytes_read = 0;
if (doc.mime == 0) {
// Get mime type with libmagic
fd = open(job->filepath, O_RDONLY);
if (fd == -1) {
perror("open");
free(job);
return;
}
bytes_read = read(fd, buf, PARSE_BUF_SIZE);
const char *magic_mime_str = magic_buffer(Magic, buf, bytes_read);
if (magic_mime_str != NULL) {
doc.mime = mime_get_mime_by_string(ScanCtx.mime_table, magic_mime_str);
if (doc.mime == 0) {
fprintf(stderr, "Couldn't find mime %s, %s!\n", magic_mime_str, job->filepath + job->base);
}
}
}
int mmime = MAJOR_MIME(doc.mime);
if (!(SHOULD_PARSE(doc.mime))) {
} else if ((mmime == MimeVideo && doc.size >= MIN_VIDEO_SIZE) || mmime == MimeAudio || mmime == MimeImage) {
parse_media(job->filepath, &doc);
} else if (IS_PDF(doc.mime)) {
void *pdf_buf = read_all(job, (char *) buf, bytes_read, &fd);
parse_pdf(pdf_buf, doc.size, &doc);
if (pdf_buf != buf) {
free(pdf_buf);
}
} else if (mmime == MimeText && ScanCtx.content_size > 0) {
parse_text(bytes_read, &fd, (char *) buf, &doc);
} else if (IS_FONT(doc.mime)) {
void *font_buf = read_all(job, (char *) buf, bytes_read, &fd);
parse_font(font_buf, doc.size, &doc);
if (font_buf != buf) {
free(font_buf);
}
}
write_document(&doc);
if (fd != -1) {
close(fd);
}
free(job);
}

10
src/parsing/parse.h Normal file
View File

@@ -0,0 +1,10 @@
#ifndef SIST2_PARSE_H
#define SIST2_PARSE_H
#include "src/sist.h"
#define PARSE_BUF_SIZE 4096
void parse(void *arg);
#endif

114
src/parsing/pdf.c Normal file
View File

@@ -0,0 +1,114 @@
#include "pdf.h"
#include "src/ctx.h"
fz_page *render_cover(fz_context *ctx, document_t *doc, fz_document *fzdoc) {
fz_page *cover = fz_load_page(ctx, fzdoc, 0);
fz_rect bounds = fz_bound_page(ctx, cover);
float scale;
unsigned char *tn_buf;
float w = (float) bounds.x1 - bounds.x0;
float h = (float) bounds.y1 - bounds.y0;
if (w > h) {
scale = (float) ScanCtx.tn_size / w;
} else {
scale = (float) ScanCtx.tn_size / h;
}
fz_matrix m = fz_scale(scale, scale);
fz_pixmap *pixmap;
fz_colorspace *color_space = fz_device_rgb(ctx);
pixmap = fz_new_pixmap_from_page(ctx, cover, m, color_space, 0);
fz_buffer *fzbuf = fz_new_buffer_from_pixmap_as_png(ctx, pixmap, fz_default_color_params);
size_t tn_len = fz_buffer_storage(ctx, fzbuf, &tn_buf);
store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) tn_buf, tn_len);
fz_drop_pixmap(ctx, pixmap);
fz_drop_buffer(ctx, fzbuf);
return cover;
}
void fz_noop_callback(void *user, const char *message) {
}
void parse_pdf(void *buf, size_t buf_len, document_t *doc) {
//TODO error handling
fz_context *ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED);
fz_try(ctx)
{
fz_register_document_handlers(ctx);
ctx->warn.print = fz_noop_callback; //disable warnings
ctx->error.print = fz_noop_callback;
fz_stream *stream = fz_open_memory(ctx, buf, buf_len);
fz_document *fzdoc = fz_open_document_with_stream(ctx, mime_get_mime_text(doc->mime), stream);
int page_count = fz_count_pages(ctx, fzdoc);
fz_page *cover = render_cover(ctx, doc, fzdoc);
fz_stext_options opts;
fz_parse_stext_options(ctx, &opts, "preserve-ligatures");
text_buffer_t text_buf = text_buffer_create(ScanCtx.content_size);
for (int current_page = 0; current_page < page_count; current_page++) {
fz_page *page;
if (current_page == 0) {
page = cover;
} else {
page = fz_load_page(ctx, fzdoc, current_page);
}
fz_stext_page *stext = fz_new_stext_page_from_page(ctx, page, &opts);
fz_stext_block *block = stext->first_block;
while (block != NULL) {
if (block->type != FZ_STEXT_BLOCK_TEXT) {
continue;
}
fz_stext_line *line = block->u.t.first_line;
while (line != NULL) {
fz_stext_char *c = line->first_char;
while (c != NULL) {
if (text_buffer_append_char(&text_buf, c->c) == TEXT_BUF_FULL) {
fz_drop_page(ctx, page);
fz_drop_stext_page(ctx, stext);
goto write_loop_end;
}
c = c->next;
}
line = line->next;
}
block = block->next;
}
fz_drop_page(ctx, page);
fz_drop_stext_page(ctx, stext);
}
write_loop_end:;
text_buffer_terminate_string(&text_buf);
meta_line_t *meta_content = malloc(sizeof(meta_line_t) + text_buf.dyn_buffer.cur);
meta_content->key = MetaContent;
memcpy(meta_content->strval, text_buf.dyn_buffer.buf, text_buf.dyn_buffer.cur);
text_buffer_destroy(&text_buf);
APPEND_META(doc, meta_content)
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
}
fz_catch(ctx)
{
// printf("err");
}
}

9
src/parsing/pdf.h Normal file
View File

@@ -0,0 +1,9 @@
#ifndef SIST2_PDF_H
#define SIST2_PDF_H
#include "src/sist.h"
void parse_pdf(void *buf, size_t buf_len, document_t *doc);
#endif

43
src/parsing/text.c Normal file
View File

@@ -0,0 +1,43 @@
#include "text.h"
#include "src/ctx.h"
void parse_text(int bytes_read, int *fd, char *buf, document_t *doc) {
char *intermediate_buf;
int intermediate_buf_len;
if (bytes_read == doc->size || bytes_read >= ScanCtx.content_size) {
int to_copy = MIN(bytes_read, ScanCtx.content_size);
intermediate_buf = malloc(to_copy);
intermediate_buf_len = to_copy;
memcpy(intermediate_buf, buf, to_copy);
} else {
if (*fd == -1) {
*fd = open(doc->filepath, O_RDONLY);
}
int to_read = MIN(ScanCtx.content_size, doc->size) - bytes_read;
intermediate_buf = malloc(to_read + bytes_read);
intermediate_buf_len = to_read + bytes_read;
if (bytes_read != 0) {
memcpy(intermediate_buf, buf, bytes_read);
}
read(*fd, intermediate_buf + bytes_read, to_read);
}
text_buffer_t text_buf = text_buffer_create(ScanCtx.content_size);
for (int i = 0; i < intermediate_buf_len; i++) {
text_buffer_append_char(&text_buf, *(intermediate_buf + i));
}
text_buffer_terminate_string(&text_buf);
meta_line_t *meta = malloc(sizeof(meta_line_t) + text_buf.dyn_buffer.cur);
meta->key = MetaContent;
strcpy(meta->strval, text_buf.dyn_buffer.buf);
text_buffer_destroy(&text_buf);
free(intermediate_buf);
APPEND_META(doc, meta)
}

8
src/parsing/text.h Normal file
View File

@@ -0,0 +1,8 @@
#ifndef SIST2_TEXT_H
#define SIST2_TEXT_H
#include "src/sist.h"
void parse_text(int bytes_read, int *fd, char *buf, document_t *doc);
#endif

55
src/sist.h Normal file
View File

@@ -0,0 +1,55 @@
#ifndef SIST_H
#define SIST_H
#define UUID_STR_LEN 37
#include <glib-2.0/glib.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <ftw.h>
#include <uuid.h>
#include <magic.h>
#include <libavformat/avformat.h>
#include <libswscale/swscale.h>
#include <libswresample/swresample.h>
#include <libavcodec/avcodec.h>
#include <ctype.h>
#include <mupdf/fitz.h>
#include <mupdf/pdf.h>
#include "argparse/argparse.h"
#include <time.h>
#include <limits.h>
#include <pthread.h>
#include <sys/stat.h>
#include <wordexp.h>
#include <onion/onion.h>
#include <onion/handler.h>
#include <onion/block.h>
#include <onion/shortcuts.h>
#include <curl/curl.h>
#include "cJSON/cJSON.h"
#include "types.h"
#include "tpool.h"
#include "util.h"
#include "src/index/elastic.h"
#include "io/store.h"
#include "io/serialize.h"
#include "io/walk.h"
#include "parsing/parse.h"
#include "parsing/mime.h"
#include "parsing/text.h"
#include "parsing/pdf.h"
#include "parsing/media.h"
#include "parsing/font.h"
#include "index/web.h"
#include "web/serve.h"
;
#endif

206
src/tpool.c Normal file
View File

@@ -0,0 +1,206 @@
#include "tpool.h"
#include "ctx.h"
typedef void (*thread_func_t)(void *arg);
typedef struct tpool_work {
void *arg;
thread_func_t func;
struct tpool_work *next;
} tpool_work_t;
typedef struct tpool {
tpool_work_t *work_head;
tpool_work_t *work_tail;
pthread_mutex_t work_mutex;
pthread_cond_t has_work_cond;
pthread_cond_t working_cond;
int working_cnt;
int thread_cnt;
int work_cnt;
int done_cnt;
int stop;
void (*cleanup_func)();
} tpool_t;
/**
* Create a work object
*/
static tpool_work_t *tpool_work_create(thread_func_t func, void *arg) {
if (func == NULL) {
return NULL;
}
tpool_work_t *work = malloc(sizeof(tpool_work_t));
work->func = func;
work->arg = arg;
work->next = NULL;
return work;
}
/**
* Pop work object from thread pool
*/
static tpool_work_t *tpool_work_get(tpool_t *pool) {
tpool_work_t *work = pool->work_head;
if (work == NULL) {
return NULL;
}
if (work->next == NULL) {
pool->work_head = NULL;
pool->work_tail = NULL;
} else {
pool->work_head = work->next;
}
return work;
}
/**
* Push work object to thread pool
*/
int tpool_add_work(tpool_t *pool, thread_func_t func, void *arg) {
tpool_work_t *work = tpool_work_create(func, arg);
if (work == NULL) {
return 0;
}
pthread_mutex_lock(&(pool->work_mutex));
if (pool->work_head == NULL) {
pool->work_head = work;
pool->work_tail = pool->work_head;
} else {
pool->work_tail->next = work;
pool->work_tail = work;
}
pool->work_cnt++;
pthread_cond_broadcast(&(pool->has_work_cond));
pthread_mutex_unlock(&(pool->work_mutex));
return 1;
}
/**
* Thread worker function
*/
static void *tpool_worker(void *arg) {
tpool_t *pool = arg;
while (1) {
pthread_mutex_lock(&(pool->work_mutex));
if (pool->stop) {
break;
}
if (pool->work_head == NULL) {
pthread_cond_wait(&(pool->has_work_cond), &(pool->work_mutex));
}
tpool_work_t *work = tpool_work_get(pool);
pool->working_cnt++;
pthread_mutex_unlock(&(pool->work_mutex));
if (work != NULL) {
work->func(work->arg);
free(work);
}
pthread_mutex_lock(&(pool->work_mutex));
pool->working_cnt--;
pool->done_cnt++;
progress_bar_print((double)pool->done_cnt / pool->work_cnt, ScanCtx.stat_tn_size, ScanCtx.stat_index_size);
if (pool->working_cnt == 0 && pool->work_head == NULL) {
pthread_cond_signal(&(pool->working_cond));
}
pthread_mutex_unlock(&(pool->work_mutex));
}
pool->cleanup_func();
pool->thread_cnt--;
pthread_cond_signal(&(pool->working_cond));
pthread_mutex_unlock(&(pool->work_mutex));
return NULL;
}
void tpool_wait(tpool_t *pool) {
pthread_mutex_lock(&(pool->work_mutex));
while (1) {
usleep(1000000);
if (pool->working_cnt != 0) {
pthread_cond_wait(&(pool->working_cond), &(pool->work_mutex));
} else {
pool->stop = 1;
break;
}
}
pthread_mutex_unlock(&(pool->work_mutex));
}
void tpool_destroy(tpool_t *pool) {
if (pool == NULL) {
return;
}
pthread_mutex_lock(&(pool->work_mutex));
tpool_work_t *work = pool->work_head;
while (work != NULL) {
tpool_work_t *tmp = work->next;
free(work);
work = tmp;
}
pool->stop = 1;
pthread_cond_broadcast(&(pool->has_work_cond));
pthread_mutex_unlock(&(pool->work_mutex));
tpool_wait(pool);
pthread_mutex_destroy(&(pool->work_mutex));
pthread_cond_destroy(&(pool->has_work_cond));
pthread_cond_destroy(&(pool->working_cond));
free(pool);
}
/**
* Create a thread pool
* @param thread_cnt Worker threads count
*/
tpool_t *tpool_create(size_t thread_cnt, void cleanup_func()) {
tpool_t *pool = malloc(sizeof(tpool_t));
pool->thread_cnt = thread_cnt;
pool->working_cnt = 0;
pool->stop = 0;
pool->cleanup_func = cleanup_func;
pthread_mutex_init(&(pool->work_mutex), NULL);
pthread_cond_init(&(pool->has_work_cond), NULL);
pthread_cond_init(&(pool->working_cond), NULL);
pool->work_head = NULL;
pool->work_tail = NULL;
for (size_t i = 0; i < thread_cnt; i++) {
pthread_t thread;
pthread_create(&thread, NULL, tpool_worker, pool);
pthread_detach(thread);
}
return pool;
}

19
src/tpool.h Normal file
View File

@@ -0,0 +1,19 @@
#ifndef SIST2_TPOOL_H
#define SIST2_TPOOL_H
#include "sist.h"
struct tpool;
typedef struct tpool tpool_t;
typedef void (*thread_func_t)(void *arg);
tpool_t *tpool_create(size_t num, void (*cleanup_func)());
void tpool_destroy(tpool_t *tm);
int tpool_add_work(tpool_t *pool, thread_func_t func, void *arg);
void tpool_wait(tpool_t *tm);
#endif

87
src/types.h Normal file
View File

@@ -0,0 +1,87 @@
#ifndef SIST2_TYPES_H
#define SIST2_TYPES_H
#define META_INT_MASK 0xF0
#define META_STR_MASK 0xE0
#define META_LONG_MASK 0xD0
#define IS_META_INT(key) (key & META_INT_MASK) == META_INT_MASK
#define IS_META_LONG(key) (key & META_LONG_MASK) == META_LONG_MASK
#define IS_META_STR(meta) (meta->key & META_STR_MASK) == META_STR_MASK
// This is written to file as a 8bit char!
enum metakey {
MetaContent = 1 | META_STR_MASK,
MetaWidth = 2 | META_INT_MASK,
MetaHeight = 3 | META_INT_MASK,
MetaMediaDuration = 4 | META_LONG_MASK,
MetaMediaAudioCodec = 5 | META_INT_MASK,
MetaMediaVideoCodec = 6 | META_INT_MASK,
MetaMediaBitrate = 7 | META_LONG_MASK,
MetaArtist = 8 | META_STR_MASK,
MetaAlbum = 9 | META_STR_MASK,
MetaAlbumArtist = 10 | META_STR_MASK,
MetaGenre = 11 | META_STR_MASK,
MetaTitle = 12 | META_STR_MASK,
MetaFontName = 13 | META_STR_MASK,
};
typedef struct index_descriptor {
char uuid[UUID_STR_LEN];
char version[6];
long timestamp;
char root[PATH_MAX];
char rewrite_url[8196];
short root_len;
char name[1024];
} index_descriptor_t;
typedef struct index_t {
struct index_descriptor desc;
struct store_t *store;
char path[PATH_MAX];
} index_t;
typedef struct meta_line {
struct meta_line *next;
enum metakey key;
union {
unsigned long longval;
int intval;
char strval[0];
};
} meta_line_t;
typedef struct document {
unsigned char uuid[16];
unsigned long ino;
unsigned long size;
unsigned int mime;
int mtime;
short base;
short ext;
meta_line_t *meta_head;
meta_line_t *meta_tail;
char *filepath;
} document_t;
typedef struct parse_job_t {
int base;
int ext;
struct stat info;
char filepath[1];
} parse_job_t;
#define APPEND_META(doc, meta) \
meta->next = NULL;\
if (doc->meta_head == NULL) {\
doc->meta_head = meta;\
doc->meta_tail = doc->meta_head;\
} else {\
doc->meta_tail->next = meta;\
doc->meta_tail = meta;\
}
#endif

60
src/util.c Normal file
View File

@@ -0,0 +1,60 @@
#define _GNU_SOURCE
#include "util.h"
#define PBSTR "========================================"
#define PBWIDTH 40
char *abspath(const char *path) {
char *abs = canonicalize_file_name(path);
abs = realloc(abs, strlen(abs) + 1);
strcat(abs, "/");
return abs;
}
void progress_bar_print(double percentage, size_t tn_size, size_t index_size) {
static int last_val = 0;
int val = (int) (percentage * 100);
if (last_val == val || val >= 100) {
return;
}
last_val = val;
int lpad = (int) ((percentage + 0.01) * PBWIDTH);
int rpad = PBWIDTH - lpad;
char tn_unit;
if (tn_size > 1000 * 1000 * 1000) {
tn_size = tn_size / 1000 / 1000 / 1000;
tn_unit = 'G';
} else {
tn_size = tn_size / 1000 / 1000;
tn_unit = 'M';
}
char index_unit;
if (index_size > 1000 * 1000 * 1000) {
index_size = index_size / 1000 / 1000 / 1000;
index_unit = 'G';
} else {
index_size = index_size / 1000 / 1000;
index_unit = 'M';
}
printf(
"\r%2d%%[%.*s>%*s] TN:%3d%c IDX:%3d%c",
val, lpad, PBSTR, rpad, "",
(int) tn_size, tn_unit,
(int) index_size, index_unit
);
fflush(stdout);
}
GHashTable *incremental_get_table() {
GHashTable *file_table = g_hash_table_new(g_direct_hash, g_direct_equal);
return file_table;
}

178
src/util.h Normal file
View File

@@ -0,0 +1,178 @@
#ifndef SIST2_UTIL_H
#define SIST2_UTIL_H
#include <stdio.h>
#define TEXT_BUF_FULL -1
#define INITIAL_BUF_SIZE 1024 * 16
#define SHOULD_IGNORE_CHAR(c) c < '0' || c > 'z'
typedef struct dyn_buffer {
char *buf;
size_t cur;
size_t size;
} dyn_buffer_t;
#include "sist.h"
typedef struct text_buffer {
size_t max_size;
int last_char_was_whitespace;
dyn_buffer_t dyn_buffer;
} text_buffer_t;
__always_inline
dyn_buffer_t dyn_buffer_create() {
dyn_buffer_t buf;
buf.size = INITIAL_BUF_SIZE;
buf.cur = 0;
buf.buf = malloc(INITIAL_BUF_SIZE);
return buf;
}
__always_inline
void grow_buffer(dyn_buffer_t *buf, size_t size) {
if (buf->cur + size > buf->size) {
do {
buf->size *= 2;
} while (buf->cur + size > buf->size);
buf->buf = realloc(buf->buf, buf->size);
}
}
__always_inline
void grow_buffer_small(dyn_buffer_t *buf) {
if (buf->cur + sizeof(long) > buf->size) {
buf->size *= 2;
buf->buf = realloc(buf->buf, buf->size);
}
}
__always_inline
void dyn_buffer_write(dyn_buffer_t *buf, void *data, size_t size) {
grow_buffer(buf, size);
memcpy(buf->buf + buf->cur, data, size);
buf->cur += size;
}
__always_inline
void dyn_buffer_write_char(dyn_buffer_t *buf, char c) {
grow_buffer_small(buf);
*(buf->buf + buf->cur) = c;
buf->cur += sizeof(c);
}
__always_inline
void dyn_buffer_write_str(dyn_buffer_t *buf, char *str) {
dyn_buffer_write(buf, str, strlen(str));
dyn_buffer_write_char(buf, '\0');
}
__always_inline
void dyn_buffer_write_int(dyn_buffer_t *buf, int d) {
grow_buffer_small(buf);
*(int *) (buf->buf + buf->cur) = d;
buf->cur += sizeof(int);
}
__always_inline
void dyn_buffer_write_short(dyn_buffer_t *buf, short s) {
grow_buffer_small(buf);
*(short *) (buf->buf + buf->cur) = s;
buf->cur += sizeof(short);
}
__always_inline
void dyn_buffer_write_long(dyn_buffer_t *buf, unsigned long l) {
grow_buffer_small(buf);
*(unsigned long *) (buf->buf + buf->cur) = l;
buf->cur += sizeof(unsigned long);
}
__always_inline
void dyn_buffer_destroy(dyn_buffer_t *buf) {
free(buf->buf);
}
__always_inline
void text_buffer_destroy(text_buffer_t *buf) {
dyn_buffer_destroy(&buf->dyn_buffer);
}
__always_inline
text_buffer_t text_buffer_create(int max_size) {
text_buffer_t text_buf;
text_buf.dyn_buffer = dyn_buffer_create();
text_buf.max_size = max_size;
text_buf.last_char_was_whitespace = FALSE;
return text_buf;
}
__always_inline
void text_buffer_terminate_string(text_buffer_t *buf) {
dyn_buffer_write_char(&buf->dyn_buffer, '\0');
}
__always_inline
int text_buffer_append_char(text_buffer_t *buf, int c) {
if (SHOULD_IGNORE_CHAR(c)) {
if (!buf->last_char_was_whitespace) {
dyn_buffer_write_char(&buf->dyn_buffer, ' ');
buf->last_char_was_whitespace = TRUE;
if (buf->dyn_buffer.cur >= buf->max_size) {
return TEXT_BUF_FULL;
}
}
} else {
buf->last_char_was_whitespace = FALSE;
dyn_buffer_write_char(&buf->dyn_buffer, (char) c);
if (buf->dyn_buffer.cur >= buf->max_size) {
return TEXT_BUF_FULL;
}
}
return 0;
}
char *abspath(const char * path);
void progress_bar_print(double percentage, size_t tn_size, size_t index_size);
__always_inline
void incremental_put(GHashTable *table, unsigned long inode_no, int mtime) {
g_hash_table_insert(table, (gpointer) inode_no, GINT_TO_POINTER(mtime));
}
__always_inline
int incremental_get(GHashTable *table, unsigned long inode_no) {
if (table != NULL) {
return GPOINTER_TO_INT(g_hash_table_lookup(table, (gpointer) inode_no));
} else {
return 0;
}
}
__always_inline
int incremental_mark_file_for_copy(GHashTable *table, unsigned long inode_no) {
g_hash_table_insert(table, GINT_TO_POINTER(inode_no), GINT_TO_POINTER(1));
}
GHashTable *incremental_get_table();
#endif

395
src/web/serve.c Normal file
View File

@@ -0,0 +1,395 @@
#include "serve.h"
#include <src/ctx.h>
#include <onion/types_internal.h>
#include "static_generated.c"
#define CHUNK_SIZE 1024 * 1024 * 10
__always_inline
void set_default_headers(onion_response *res) {
onion_response_set_header(res, "Server", "sist2");
}
index_t *get_index_by_id(const char *index_id) {
for (int i = WebCtx.index_count; i >= 0; i--) {
if (strcmp(index_id, WebCtx.indices[i].desc.uuid) == 0) {
return &WebCtx.indices[i];
}
}
return NULL;
}
store_t *get_store(const char *index_id) {
index_t *idx = get_index_by_id(index_id);
if (idx != NULL) {
return idx->store;
}
return NULL;
}
int search_index(void *p, onion_request *req, onion_response *res) {
set_default_headers(res);
onion_response_set_header(res, "Content-Type", "text/html");
onion_response_set_length(res, sizeof(search_html));
onion_response_write(res, search_html, sizeof(search_html));
return OCS_PROCESSED;
}
int javascript(void *p, onion_request *req, onion_response *res) {
onion_response_set_header(res, "Content-Type", "text/javascript");
onion_response_set_length(res, sizeof(bundle_js));
onion_response_write(res, bundle_js, sizeof(bundle_js));
return OCS_PROCESSED;
}
int style(void *p, onion_request *req, onion_response *res) {
set_default_headers(res);
onion_response_set_header(res, "Content-Type", "text/css");
onion_response_set_length(res, sizeof(bundle_css));
onion_response_write(res, bundle_css, sizeof(bundle_css));
return OCS_PROCESSED;
}
int bg_bars(void *p, onion_request *req, onion_response *res) {
set_default_headers(res);
onion_response_set_header(res, "Content-Type", "image/png");
onion_response_set_length(res, sizeof(bg_bars_png));
onion_response_write(res, bg_bars_png, sizeof(bg_bars_png));
return OCS_PROCESSED;
}
int img_sprite_skin_flag(void *p, onion_request *req, onion_response *res) {
set_default_headers(res);
onion_response_set_header(res, "Content-Type", "image/png");
onion_response_set_length(res, sizeof(sprite_skin_flat_png));
onion_response_write(res, sprite_skin_flat_png, sizeof(sprite_skin_flat_png));
return OCS_PROCESSED;
}
int thumbnail(void *p, onion_request *req, onion_response *res) {
int flags = onion_request_get_flags(req);
if ((flags & OR_METHODS) != OR_GET) {
return OCS_NOT_PROCESSED;
}
const char *arg_index = onion_request_get_query(req, "1");
const char *arg_uuid = onion_request_get_query(req, "2");
if (arg_uuid == NULL || arg_index == NULL) {
return OCS_NOT_PROCESSED;
}
uuid_t uuid;
uuid_parse(arg_uuid, uuid);
store_t *store = get_store(arg_index);
if (store == NULL) {
return OCS_NOT_PROCESSED;
}
set_default_headers(res);
onion_response_set_header(res, "Content-Type", "image/jpeg");
size_t data_len = 0;
char *data = store_read(store, (char *) uuid, sizeof(uuid_t), &data_len);
onion_response_set_length(res, data_len);
int written = onion_response_write(res, data, data_len);
onion_response_flush(res);
if (written != data_len || data_len == 0) {
printf("Couldn't write thumb\n");
}
free(data);
return OCS_PROCESSED;
}
/**
* Modified version of onion_shortcut_response_file that allows
* browsers to seek media files.
*/
int chunked_response_file(const char *filename, const char *mime,
int partial, onion_request *request, onion_response *res) {
int fd = open(filename, O_RDONLY | O_CLOEXEC);
struct stat st;
if (fd < 0 || stat(filename, &st) != 0 || S_ISDIR(st.st_mode)) {
close(fd);
return OCS_NOT_PROCESSED;
}
size_t length = st.st_size;
size_t ends;
const char *range = onion_request_get_header(request, "Range");
if (partial && range && strncmp(range, "bytes=", 6) == 0) {
onion_response_set_header(res, "Accept-Ranges", "bytes");
onion_response_set_code(res, HTTP_PARTIAL_CONTENT);
char tmp[1024];
if (strlen(range + 6) >= sizeof(tmp)) {
close(fd);
return OCS_INTERNAL_ERROR;
}
strncpy(tmp, range + 6, sizeof(tmp) - 1);
char *start = tmp;
char *end = tmp;
while (*end != '-' && *end) {
end++;
}
if (*end == '-') {
*end = '\0';
end++;
size_t starts;
starts = atol(start);
if (*end) {
// %d-%d
ends = atol(end);
} else {
// %d-
ends = MIN(starts + CHUNK_SIZE, length);
}
if (ends >= length || starts >= length || starts < 0) {
close(fd);
return OCS_INTERNAL_ERROR;
}
length = ends - starts;
if (starts != 0) {
lseek(fd, starts, SEEK_SET);
}
snprintf(tmp, sizeof(tmp), "bytes %ld-%ld/%ld",
starts, ends - 1, st.st_size);
onion_response_set_header(res, "Content-Range", tmp);
}
}
onion_response_set_length(res, length);
onion_response_set_header(res, "Content-Type", mime);
onion_response_write_headers(res);
if ((onion_request_get_flags(request) & OR_HEAD) == OR_HEAD) {
length = 0;
}
if (length) {
int bytes_read = 0, bytes_written;
size_t total_read = 0;
char buf[4046];
if (length > sizeof(buf)) {
size_t max = length - sizeof(buf);
while (total_read < max) {
bytes_read = read(fd, buf, sizeof(buf));
if (bytes_read < 0) {
break;
}
total_read += bytes_read;
bytes_written = onion_response_write(res, buf, bytes_read);
if (bytes_written != bytes_read) {
break;
}
}
}
if (sizeof(buf) >= (length - total_read)) {
bytes_read = read(fd, buf, length - total_read);
onion_response_write(res, buf, bytes_read);
}
}
close(fd);
return OCS_PROCESSED;
}
int search(void *p, onion_request *req, onion_response *res) {
int flags = onion_request_get_flags(req);
if ((flags & OR_METHODS) != OR_POST) {
return OCS_NOT_PROCESSED;
}
char *scroll_param;
const char *scroll = onion_request_get_query(req, "scroll");
if (scroll != NULL) {
scroll_param = "?scroll=3m";
} else {
scroll_param = "";
}
const struct onion_block_t *block = onion_request_get_data(req);
if (block == NULL) {
return OCS_NOT_PROCESSED;
}
char url[4096];
snprintf(url, 4096, "%s/sist2/_search%s", WebCtx.es_url, scroll_param);
response_t *r = web_post(url, onion_block_data(block), "Content-Type: application/json");
set_default_headers(res);
onion_response_set_header(res, "Content-Type", "application/json");
onion_response_set_length(res, r->size);
if (r->status_code == 200) {
onion_response_write(res, r->body, r->size);
}
free_response(r);
return OCS_PROCESSED;
}
int scroll(void *p, onion_request *req, onion_response *res) {
int flags = onion_request_get_flags(req);
if ((flags & OR_METHODS) != OR_GET) {
return OCS_NOT_PROCESSED;
}
char url[4096];
snprintf(url, 4096, "%s/_search/scroll", WebCtx.es_url);
const char *scroll_id = onion_request_get_query(req, "scroll_id");
cJSON *json = cJSON_CreateObject();
cJSON_AddStringToObject(json, "scroll_id", scroll_id);
cJSON_AddStringToObject(json, "scroll", "3m");
char *json_str = cJSON_PrintUnformatted(json);
response_t *r = web_post(url, json_str, "Content-Type: application/json");
cJSON_Delete(json);
cJSON_free(json_str);
if (r->status_code != 200) {
free_response(r);
return OCS_NOT_PROCESSED;
}
set_default_headers(res);
onion_response_set_header(res, "Content-Type", "application/json");
onion_response_set_header(res, "Content-Disposition", "application/json");
onion_response_set_length(res, r->size);
onion_response_write(res, r->body, r->size);
free_response(r);
return OCS_PROCESSED;
}
int serve_file_from_url(cJSON *json, index_t *idx, onion_request *req, onion_response *res) {
const char *path = cJSON_GetObjectItem(json, "path")->valuestring;
const char *name = cJSON_GetObjectItem(json, "name")->valuestring;
const char *ext = cJSON_GetObjectItem(json, "extension")->valuestring;
char url[8196];
snprintf(url, sizeof(url),
"%s%s/%s%s%s",
idx->desc.rewrite_url, path, name, strlen(ext) == 0 ? "" : ".", ext);
return onion_shortcut_redirect(url, req, res);
}
int serve_file_from_disk(cJSON *json, index_t *idx, onion_request *req, onion_response *res) {
const char *path = cJSON_GetObjectItem(json, "path")->valuestring;
const char *name = cJSON_GetObjectItem(json, "name")->valuestring;
const char *ext = cJSON_GetObjectItem(json, "extension")->valuestring;
const char *mime = cJSON_GetObjectItem(json, "mime")->valuestring;
char full_path[PATH_MAX];
snprintf(full_path, PATH_MAX, "%s%s/%s%s%s",
idx->desc.root, path, name, strlen(ext) == 0 ? "" : ".", ext);
return chunked_response_file(full_path, mime, 1, req, res);
}
int index_info(void *p, onion_request *req, onion_response *res) {
cJSON *json = cJSON_CreateObject();
cJSON *arr = cJSON_AddArrayToObject(json, "indices");
set_default_headers(res);
onion_response_set_header(res, "Content-Type", "application/json");
for (int i = 0; i < WebCtx.index_count; i++) {
index_t *idx = &WebCtx.indices[i];
cJSON *idx_json = cJSON_CreateObject();
cJSON_AddStringToObject(idx_json, "name", idx->desc.name);
cJSON_AddStringToObject(idx_json, "version", idx->desc.version);
cJSON_AddStringToObject(idx_json, "id", idx->desc.uuid);
cJSON_AddNumberToObject(idx_json, "timestamp", (double)idx->desc.timestamp);
cJSON_AddItemToArray(arr, idx_json);
}
char *json_str = cJSON_PrintUnformatted(json);
onion_response_write0(res, json_str);
free(json_str);
cJSON_Delete(json);
return OCS_PROCESSED;
}
int file(void *p, onion_request *req, onion_response *res) {
const char *arg_uuid = onion_request_get_query(req, "1");
if (arg_uuid == NULL) {
return OCS_PROCESSED;
}
cJSON *source = elastic_get_document(arg_uuid);
const char *index_id = cJSON_GetObjectItem(source, "index")->valuestring;
index_t *idx = get_index_by_id(index_id);
if (idx == NULL) {
return OCS_NOT_PROCESSED;
}
const char *name = cJSON_GetObjectItem(source, "name")->valuestring;
const char *ext = cJSON_GetObjectItem(source, "extension")->valuestring;
char disposition[8196];
snprintf(disposition, sizeof(disposition), "inline; filename=\"%s%s%s\"",
name, strlen(ext) == 0 ? "" : ".", ext);
onion_response_set_header(res, "Content-Disposition", disposition);
if (strlen(idx->desc.rewrite_url) == 0) {
return serve_file_from_disk(source, idx, req, res);
} else {
return serve_file_from_url(source, idx, req, res);
}
}
void serve(const char *hostname, const char *port) {
onion *o = onion_new(O_POOL);
onion_set_timeout(o, 3500);
onion_set_hostname(o, hostname);
onion_set_port(o, port);
onion_url *urls = onion_root_url(o);
// Static paths
onion_url_add(urls, "", search_index);
onion_url_add(urls, "css", style);
onion_url_add(urls, "js", javascript);
onion_url_add(urls, "img/bg-bars.png", bg_bars);
onion_url_add(urls, "img/sprite-skin-flat.png", img_sprite_skin_flag);
onion_url_add(urls, "es", search);
onion_url_add(urls, "scroll", scroll);
onion_url_add(
urls,
"^t/([a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12})/"
"([a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12})$",
thumbnail
);
onion_url_add(urls, "^f/([a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12})$", file);
onion_url_add(urls, "i", index_info);
printf("Starting web server @ http://%s:%s\n", hostname, port);
onion_listen(o);
onion_free(o);
}

8
src/web/serve.h Normal file
View File

@@ -0,0 +1,8 @@
#ifndef SIST2_SERVE_H
#define SIST2_SERVE_H
#include "src/sist.h"
void serve(const char *hostname, const char *port);
#endif

File diff suppressed because one or more lines are too long