Initial commit (squashed)

This commit is contained in:
2019-09-27 22:56:07 -04:00
commit 564a17a8fa
75 changed files with 7518 additions and 0 deletions

327
src/io/serialize.c Normal file
View File

@@ -0,0 +1,327 @@
#include "src/ctx.h"
#include "serialize.h"
static __thread int IndexFd = -1;
typedef struct {
unsigned char uuid[16];
unsigned long ino;
unsigned long size;
unsigned int mime;
int mtime;
short base;
short ext;
} line_t;
void skip_meta(FILE *file) {
enum metakey key = getc(file);
while (key != '\n') {
if (IS_META_INT(key)) {
fseek(file, sizeof(int), SEEK_CUR);
} else if (IS_META_LONG(key)) {
fseek(file, sizeof(long), SEEK_CUR);
} else {
while ((getc(file))) {}
}
key = getc(file);
}
}
void write_index_descriptor(char *path, index_descriptor_t *desc) {
cJSON *json = cJSON_CreateObject();
cJSON_AddStringToObject(json, "uuid", desc->uuid);
cJSON_AddStringToObject(json, "version", desc->version);
cJSON_AddStringToObject(json, "root", desc->root);
cJSON_AddStringToObject(json, "name", desc->name);
cJSON_AddStringToObject(json, "rewrite_url", desc->rewrite_url);
cJSON_AddNumberToObject(json, "timestamp", (double) desc->timestamp);
int fd = open(path, O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR);
if (fd == -1) {
perror(path);
}
char *str = cJSON_Print(json);
write(fd, str, strlen(str));
free(str);
close(fd);
cJSON_Delete(json);
}
index_descriptor_t read_index_descriptor(char *path) {
struct stat info;
stat(path, &info);
int fd = open(path, O_RDONLY);
char *buf = malloc(info.st_size + 1);
read(fd, buf, info.st_size);
*(buf + info.st_size) = '\0';
close(fd);
cJSON *json = cJSON_Parse(buf);
index_descriptor_t descriptor;
descriptor.timestamp = (long) cJSON_GetObjectItem(json, "timestamp")->valuedouble;
strcpy(descriptor.root, cJSON_GetObjectItem(json, "root")->valuestring);
strcpy(descriptor.name, cJSON_GetObjectItem(json, "name")->valuestring);
strcpy(descriptor.rewrite_url, cJSON_GetObjectItem(json, "rewrite_url")->valuestring);
descriptor.root_len = (int)strlen(descriptor.root);
strcpy(descriptor.version, cJSON_GetObjectItem(json, "version")->valuestring);
strcpy(descriptor.uuid, cJSON_GetObjectItem(json, "uuid")->valuestring);
cJSON_Delete(json);
free(buf);
return descriptor;
}
char *get_meta_key_text(enum metakey meta_key) {
switch (meta_key) {
case MetaContent:
return "content";
case MetaWidth:
return "width";
case MetaHeight:
return "height";
case MetaMediaDuration:
return "duration";
case MetaMediaAudioCodec:
return "audioc";
case MetaMediaVideoCodec:
return "videoc";
case MetaMediaBitrate:
return "bitrate";
case MetaArtist:
return "artist";
case MetaAlbum:
return "album";
case MetaAlbumArtist:
return "album_artist";
case MetaGenre:
return "genre";
case MetaTitle:
return "title";
case MetaFontName:
return "font_name";
default:
return NULL;
}
}
void write_document(document_t *doc) {
if (IndexFd == -1) {
char dstfile[PATH_MAX];
pid_t tid = syscall(SYS_gettid);
snprintf(dstfile, PATH_MAX, "%s_index_%d", ScanCtx.index.path, tid);
IndexFd = open(dstfile, O_CREAT | O_WRONLY | O_APPEND, S_IRUSR | S_IWUSR);
if (IndexFd == -1) {
perror("open");
}
}
dyn_buffer_t buf = dyn_buffer_create();
// Ignore root directory in the file path
doc->ext = (short)(doc->ext - ScanCtx.index.desc.root_len);
doc->base = (short)(doc->base - ScanCtx.index.desc.root_len);
doc->filepath += ScanCtx.index.desc.root_len;
dyn_buffer_write(&buf, doc, sizeof(line_t));
dyn_buffer_write_str(&buf, doc->filepath);
meta_line_t *meta = doc->meta_head;
while (meta != NULL) {
dyn_buffer_write_char(&buf, meta->key);
if (IS_META_INT(meta->key)) {
dyn_buffer_write_int(&buf, meta->intval);
} else if (IS_META_LONG(meta->key)) {
dyn_buffer_write_long(&buf, meta->longval);
} else {
dyn_buffer_write_str(&buf, meta->strval);
}
meta_line_t *tmp = meta;
meta = meta->next;
free(tmp);
}
dyn_buffer_write_char(&buf, '\n');
write(IndexFd, buf.buf, buf.cur);
ScanCtx.stat_index_size += buf.cur;
dyn_buffer_destroy(&buf);
}
void serializer_cleanup() {
close(IndexFd);
}
void read_index(const char *path, const char index_id[UUID_STR_LEN], index_func func) {
line_t line;
dyn_buffer_t buf = dyn_buffer_create();
FILE *file = fopen(path, "rb");
while (1) {
buf.cur = 0;
fread((void *) &line, 1, sizeof(line_t), file);
if (feof(file)) {
break;
}
cJSON *document = cJSON_CreateObject();
cJSON_AddStringToObject(document, "index", index_id);
char uuid_str[UUID_STR_LEN];
uuid_unparse(line.uuid, uuid_str);
cJSON_AddStringToObject(document, "mime", mime_get_mime_text(line.mime));
cJSON_AddNumberToObject(document, "size", (double)line.size);
cJSON_AddNumberToObject(document, "mtime", line.mtime);
int c;
while ((c = getc(file)) != 0) {
dyn_buffer_write_char(&buf, (char) c);
}
dyn_buffer_write_char(&buf, '\0');
cJSON_AddStringToObject(document, "extension", buf.buf + line.ext);
if (*(buf.buf + line.ext - 1) == '.') {
*(buf.buf + line.ext - 1) = '\0';
} else {
*(buf.buf + line.ext) = '\0';
}
cJSON_AddStringToObject(document, "name", buf.buf + line.base);
*(buf.buf + line.base - 1) = '\0';
cJSON_AddStringToObject(document, "path", buf.buf);
enum metakey key = getc(file);
while (key != '\n') {
switch (key) {
case MetaWidth:
case MetaHeight:
case MetaMediaDuration:
case MetaMediaBitrate: {
int value;
fread(&value, sizeof(int), 1, file);
cJSON_AddNumberToObject(document, get_meta_key_text(key), value);
break;
}
case MetaMediaAudioCodec:
case MetaMediaVideoCodec: {
int value;
fread(&value, sizeof(int), 1, file);
const AVCodecDescriptor *desc = avcodec_descriptor_get(value);
if (desc != NULL) {
cJSON_AddStringToObject(document, get_meta_key_text(key), desc->name);
}
break;
}
case MetaContent:
case MetaArtist:
case MetaAlbum:
case MetaAlbumArtist:
case MetaGenre:
case MetaFontName:
case MetaTitle: {
buf.cur = 0;
while ((c = getc(file)) != 0) {
dyn_buffer_write_char(&buf, (char) c);
}
dyn_buffer_write_char(&buf, '\0');
cJSON_AddStringToObject(document, get_meta_key_text(key), buf.buf);
break;
}
}
key = getc(file);
}
func(document, uuid_str);
cJSON_free(document);
}
fclose(file);
}
void incremental_read(GHashTable *table, const char *filepath) {
FILE *file = fopen(filepath, "rb");
line_t line;
while (1) {
fread((void *) &line, 1, sizeof(line_t), file);
if (feof(file)) {
break;
}
incremental_put(table, line.ino, line.mtime);
while ((getc(file))) {}
skip_meta(file);
}
fclose(file);
}
/**
* Copy items from an index that are in the copy_table. Also copies from
* the store.
*/
void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
const char *dst_filepath, GHashTable *copy_table) {
FILE *file = fopen(filepath, "rb");
FILE *dst_file = fopen(dst_filepath, "ab");
line_t line;
while (1) {
fread((void *) &line, 1, sizeof(line_t), file);
if (feof(file)) {
break;
}
if (incremental_get(copy_table, line.ino)) {
fwrite(&line, sizeof(line), 1, dst_file);
size_t buf_len;
char *buf = store_read(store, (char *) line.uuid, 16, &buf_len);
store_write(dst_store, (char *) line.uuid, 16, buf, buf_len);
char c;
while ((c = (char) getc(file))) {
fwrite(&c, sizeof(c), 1, dst_file);
}
fwrite("\0", sizeof(c), 1, dst_file);
enum metakey key;
while (1) {
key = getc(file);
if (key == '\n') {
break;
}
fwrite(&key, sizeof(char), 1, dst_file);
if (IS_META_INT(key)) {
int val;
fread(&val, sizeof(val), 1, file);
fwrite(&val, sizeof(val), 1, dst_file);
} else if (IS_META_LONG(key)) {
long val;
fread(&val, sizeof(val), 1, file);
fwrite(&val, sizeof(val), 1, dst_file);
} else {
while ((c = (char) getc(file))) {
fwrite(&c, sizeof(c), 1, dst_file);
}
fwrite("\0", sizeof(c), 1, dst_file);
}
}
} else {
skip_meta(file);
}
}
fclose(file);
}

27
src/io/serialize.h Normal file
View File

@@ -0,0 +1,27 @@
#ifndef SIST2_SERIALIZE_H
#define SIST2_SERIALIZE_H
#include "src/sist.h"
#include <sys/syscall.h>
typedef void(*index_func)(cJSON *, const char[UUID_STR_LEN]);
void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
const char *dst_filepath, GHashTable *copy_table);
void write_document(document_t *doc);
void read_index(const char *path, const char[UUID_STR_LEN], index_func);
void incremental_read(GHashTable *table, const char *filepath);
/**
* Must be called after write_document
*/
void serializer_cleanup();
void write_index_descriptor(char *path, index_descriptor_t *desc);
index_descriptor_t read_index_descriptor(char *path);
#endif

105
src/io/store.c Normal file
View File

@@ -0,0 +1,105 @@
#include "store.h"
#include "src/ctx.h"
store_t *store_create(char *path) {
store_t *store = malloc(sizeof(struct store_t));
pthread_rwlock_init(&store->lock, NULL);
mdb_env_create(&store->env);
int open_ret = mdb_env_open(store->env,
path,
MDB_WRITEMAP | MDB_MAPASYNC,
S_IRUSR | S_IWUSR
);
if (open_ret != 0) {
fprintf(stderr, "Error while opening store: %s", mdb_strerror(open_ret));
exit(1);
}
store->size = (size_t) 1024 * 1024 * 5;
ScanCtx.stat_tn_size = store->size;
mdb_env_set_mapsize(store->env, store->size);
// Open dbi
MDB_txn *txn;
int r3 = mdb_txn_begin(store->env, NULL, 0, &txn);
int r4 = mdb_dbi_open(txn, NULL, 0, &store->dbi);
mdb_txn_commit(txn);
return store;
}
void store_destroy(store_t *store) {
pthread_rwlock_destroy(&store->lock);
mdb_close(store->env, store->dbi);
mdb_env_close(store->env);
free(store);
}
void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len) {
MDB_val mdb_key;
mdb_key.mv_data = key;
mdb_key.mv_size = key_len;
MDB_val mdb_value;
mdb_value.mv_data = buf;
mdb_value.mv_size = buf_len;
MDB_txn *txn;
pthread_rwlock_rdlock(&store->lock);
mdb_txn_begin(store->env, NULL, 0, &txn);
int put_ret = mdb_put(txn, store->dbi, &mdb_key, &mdb_value, 0);
if (put_ret == MDB_MAP_FULL) {
mdb_txn_abort(txn);
pthread_rwlock_unlock(&store->lock);
// Cannot resize when there is a opened transaction.
// Resize take effect on the next commit.
pthread_rwlock_wrlock(&store->lock);
store->size += 1024 * 1024 * 5;
mdb_env_set_mapsize(store->env, store->size);
mdb_txn_begin(store->env, NULL, 0, &txn);
put_ret = mdb_put(txn, store->dbi, &mdb_key, &mdb_value, 0);
ScanCtx.stat_tn_size = store->size;
}
mdb_txn_commit(txn);
pthread_rwlock_unlock(&store->lock);
if (put_ret != 0) {
printf("%s\n", mdb_strerror(put_ret));
}
}
char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen) {
char *buf = NULL;
MDB_val mdb_key;
mdb_key.mv_data = key;
mdb_key.mv_size = key_len;
MDB_val mdb_value;
MDB_txn *txn;
mdb_txn_begin(store->env, NULL, MDB_RDONLY, &txn);
int get_ret = mdb_get(txn, store->dbi, &mdb_key, &mdb_value);
if (get_ret == MDB_NOTFOUND) {
*ret_vallen = 0;
} else {
*ret_vallen = mdb_value.mv_size;
buf = malloc(mdb_value.mv_size);
memcpy(buf, mdb_value.mv_data, mdb_value.mv_size);
}
mdb_txn_abort(txn);
return buf;
}

24
src/io/store.h Normal file
View File

@@ -0,0 +1,24 @@
#ifndef SIST2_STORE_H
#define SIST2_STORE_H
#include <pthread.h>
#include <lmdb.h>
typedef struct store_t {
MDB_dbi dbi;
MDB_env *env;
size_t size;
pthread_rwlock_t lock;
} store_t;
#include "src/sist.h"
store_t *store_create(char *path);
void store_destroy(store_t *store);
void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len);
char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen);
#endif

34
src/io/walk.c Normal file
View File

@@ -0,0 +1,34 @@
#include "walk.h"
#include "src/ctx.h"
parse_job_t *create_parse_job(const char *filepath, const struct stat *info, int base) {
int len = (int) strlen(filepath);
parse_job_t *job = malloc(sizeof(parse_job_t) + len);
memcpy(&(job->filepath), filepath, len + 1);
job->base = base;
char *p = strrchr(filepath + base, '.');
if (p != NULL) {
job->ext = (int)(p - filepath + 1);
} else {
job->ext = len;
}
memcpy(&(job->info), info, sizeof(struct stat));
return job;
}
int handle_entry(const char *filepath, const struct stat *info, const int typeflag, struct FTW *ftw) {
if (typeflag == FTW_F && S_ISREG(info->st_mode)) {
parse_job_t *job = create_parse_job(filepath, info, ftw->base);
tpool_add_work(ScanCtx.pool, parse, job);
}
return 0;
}
int walk_directory_tree(const char *dirpath) {
return nftw(dirpath, handle_entry, 15, FTW_PHYS);
}

10
src/io/walk.h Normal file
View File

@@ -0,0 +1,10 @@
#ifndef WALK_H
#define WALK_H
#define _XOPEN_SOURCE 500
#include "src/sist.h"
int walk_directory_tree(const char *);
#endif