Compare commits

...

24 Commits

Author SHA1 Message Date
046edea0e2 Handle special characters in file paths 2020-06-10 19:45:36 -04:00
a011b7e97b Fragment size setting 2020-06-09 21:40:53 -04:00
8c1c1697e0 Fix file wordexp in some paths #59 2020-06-05 19:41:02 -04:00
018b49fa4c Fix csv_escape #58 2020-06-05 19:13:03 -04:00
27b4e6403e Re-enable path autocomplete #54 2020-06-02 19:46:58 -04:00
13fdbd9e69 Fix for ES 7.7 #54 2020-06-01 18:14:34 -04:00
5e7fdaf8dd Update issue-template.md 2020-06-01 10:45:43 -04:00
19d5c8ac9f Update issue-template.md 2020-05-29 18:19:21 -04:00
99497049a8 Merge pull request #53 from dpieski/patch-1
Update README
2020-05-29 18:16:13 -04:00
Andrew
1a3181d78b Update README
changed case of path in a link to the usage guide to fix 404 error.
2020-05-29 15:37:20 -05:00
449aa77c8f Fix for unknown mime inside archives 2020-05-25 17:36:04 -04:00
3058c55510 Memory leak fix #37 2020-05-24 15:42:42 -04:00
dedf9287b2 Fix name separation in --archive list mode 2020-05-24 14:36:59 -04:00
ab199b0c0c Remove arc_reset() function because seek() inside archive doesn't work 2020-05-24 14:18:31 -04:00
c4fbae123e Better support for media files inside archives 2020-05-24 14:10:23 -04:00
dd2397ef5c handle .tgz #44, ignore files inside archives for stats page 2020-05-24 10:10:28 -04:00
ee0f71f4d3 fix compile warning 2020-05-17 15:00:56 -04:00
0bbb96b149 Merge pull request #51 from simon987/stats
Stats page
2020-05-17 14:49:28 -04:00
78f6e16701 image 2020-05-17 12:47:45 -04:00
4625bca9a9 stats 2020-05-17 12:47:02 -04:00
f2ae653886 Revert "wip"
This reverts commit 5686bc86
2020-05-16 08:16:49 -04:00
5686bc864d wip 2020-05-13 17:37:40 -04:00
cf513b4ad8 Escape invalid UTF8 characters simon987/sist2#44, increase magic buffer size 2020-05-12 19:28:02 -04:00
013423424e UTF-8 fix attempt w/ libarchive (#44) 2020-05-10 19:52:42 -04:00
36 changed files with 2212 additions and 531 deletions

View File

@@ -9,7 +9,9 @@ assignees: ''
sist2 version:
Platform (please indicate if you're using Docker):
Platform (Linux or Docker):
Elasticsearch version:
Command with arguments: `ex: "scan ~/Documents -o ./i2 --threads 3 -q 1.0`

View File

@@ -30,7 +30,7 @@ add_executable(
third-party/argparse/argparse.h third-party/argparse/argparse.c
src/cli.c src/cli.h
)
src/stats.c src/stats.h)
target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/)
set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib)

View File

@@ -21,12 +21,16 @@ sist2 (Simple incremental search tool)
* Automatic tagging from file attributes via [user scripts](scripting/README.md)
* Recursive scan inside archive files \*\*
* OCR support with tesseract \*\*\*
* Stats page & disk utilisation visualization
\* See [format support](#format-support)
\*\* See [Archive files](#archive-files)
\*\*\* See [OCR](#ocr)
![stats](docs/stats.png)
## Getting Started
1. Have an Elasticsearch (>= 6.X.X) instance running
@@ -49,7 +53,7 @@ sist2 (Simple incremental search tool)
1. *(or)* Download a [development snapshot](https://files.simon987.net/artifacts/Sist2/Build/) *(Not recommended!)*
1. *(or)* `docker pull simon987/sist2:latest`
1. See [Usage guide](DOCS/USAGE.md)
1. See [Usage guide](docs/USAGE.md)
\* *Windows users*: **sist2** runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux)
@@ -57,7 +61,7 @@ sist2 (Simple incremental search tool)
## Example usage
See [Usage guide](DOCS/USAGE.md) for more details
See [Usage guide](docs/USAGE.md) for more details
1. Scan a directory: `sist2 scan ~/Documents -o ./docs_idx`
1. Push index to Elasticsearch: `sist2 index ./docs_idx`
@@ -87,13 +91,11 @@ they were directly in the file system. Recursive (archives inside archives)
scan is also supported.
**Limitations**:
* Parsing media files with formats that require
*seek* (e.g. `.gif`, `.mp4` w/ fragmented metadata etc.) is not supported.
* Support for parsing media files with formats that require *seek* (e.g. `.gif`, `.mp4` w/ fragmented metadata etc.)
is limitted (see `--mem-buffer` option)
* Archive files are scanned sequentially, by a single thread. On systems where
**sist2** is not I/O bound, scans might be faster when larger archives are split
into smaller parts.
To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
### OCR

View File

@@ -40,6 +40,9 @@ Scan options
--ocr=<str> Tesseract language (use tesseract --list-langs to see which are installed on your machine)
-e, --exclude=<str> Files that match this regex will not be scanned
--fast Only index file names & mime type
--treemap-threshold=<str> Relative size threshold for treemap (see USAGE.md). DEFAULT: 0.0005
--mem-buffer=<int> Maximum memory buffer size in MB for files inside archives (see USAGE.md). DEFAULT: 2000
Index options
--es-url=<str> Elasticsearch url with port. DEFAULT=http://localhost:9200
@@ -94,6 +97,18 @@ Made by simon987 <me@simon987.net>. Released under GPL-3.0
* `-e "(^/usr/)|(^/var/)|(^/media/DRIVE-A/tmp/)|(^/media/DRIVE-B/Trash/)"` Exclude the
`/usr`, `/var`, `/media/DRIVE-A/tmp`, `/media/DRIVE-B/Trash` directories
* `--fast` Only index file names and mime type
* `--treemap-threshold` Directories smaller than (`treemap-threshold` * `<total size of the index>`)
will not be considered for the disk utilisation visualization; their size will be added to
the parent directory. If the parent directory is still smaller than the threshold, it will also be "merged upwards"
and so on.
In effect, smaller `treemap-threshold` values will yield a more detailed
(but also a more cluttered and harder to read) visualization.
* `--mem-buffer` Maximum memory buffer size in MB (per thread) for files inside archives. Media files
larger than this number will be read sequentially and no *seek* operations will be supported.
To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
### Scan examples
@@ -122,6 +137,11 @@ documents.idx/
├── _index_139965425223424
├── _index_139965433616128
├── _index_139965442008832
├── _index_139965442008832
├── treemap.csv
├── agg_mime.csv
├── agg_date.csv
├── add_size.csv
└── thumbs
├── data.mdb
└── lock.mdb
@@ -137,6 +157,8 @@ database containing the thumbnails.
The `descriptor.json` file contains general information about the index. The
following fields are safe to modify manually: `root`, `name`, [rewrite_url](#rewrite_url) and `timestamp`.
The `.csv` are pre-computed aggregations necessary for the stats page.
*Advanced usage*

BIN
docs/stats.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 167 KiB

View File

@@ -10,6 +10,7 @@
"path": {
"type": "text",
"analyzer": "path_analyzer",
"copy_to": "suggest-path",
"fielddata": true,
"fields": {
"nGram": {
@@ -22,6 +23,10 @@
}
}
},
"suggest-path": {
"type": "completion",
"analyzer": "case_insensitive_kw_analyzer"
},
"mime": {
"type": "keyword"
},

View File

@@ -4,7 +4,7 @@ rm -rf index.sist2/
rm src/static/js/bundle.js 2> /dev/null
cat `ls src/static/js/*.min.js` > src/static/js/bundle.js
cat src/static/js/{util,dom,search}.js >> src/static/js/bundle.js
cat src/static/js/{util,dom}.js >> src/static/js/bundle.js
rm src/static/css/bundle*.css 2> /dev/null
cat src/static/css/*.min.css > src/static/css/bundle.css

View File

@@ -13,7 +13,7 @@ application/epub+zip, epub
application/freeloader, frl
application/futuresplash, spl
application/groupwise, vew
application/gzip, gz
application/gzip, gz|tgz
application/hta, hta
application/i-deas, unv
application/iges, iges|igs
@@ -429,3 +429,4 @@ video/x-qtc, qtc
video/x-sgi-movie, movie|mv
x-epoc/x-sisx-app,
application/x-zstd-dictionary,
application/vnd.ms-outlook, msg
1 application/arj arj
13 application/freeloader frl
14 application/futuresplash spl
15 application/groupwise vew
16 application/gzip gz gz|tgz
17 application/hta hta
18 application/i-deas unv
19 application/iges iges|igs
429 video/x-sgi-movie movie|mv
430 x-epoc/x-sisx-app
431 application/x-zstd-dictionary
432 application/vnd.ms-outlook msg

View File

@@ -2,9 +2,11 @@ files = [
"src/static/css/bundle.css",
"src/static/css/bundle_dark.css",
"src/static/js/bundle.js",
"src/static/js/search.js",
"src/static/img/sprite-skin-flat.png",
"src/static/img/sprite-skin-flat-dark.png",
"src/static/search.html",
"src/static/stats.html",
]

View File

@@ -12,6 +12,9 @@
#define DEFAULT_BATCH_SIZE 100
#define DEFAULT_LISTEN_ADDRESS "localhost:4090"
#define DEFAULT_TREEMAP_THRESHOLD 0.0005
#define DEFAULT_MAX_MEM_BUFFER 2000
const char* TESS_DATAPATHS[] = {
"/usr/share/tessdata/",
@@ -180,6 +183,16 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
ScanCtx.exclude = NULL;
}
if (args->treemap_threshold_str == 0) {
args->treemap_threshold = DEFAULT_TREEMAP_THRESHOLD;
} else {
args->treemap_threshold = atof(args->treemap_threshold_str);
}
if (args->max_memory_buffer == 0) {
args->max_memory_buffer = DEFAULT_MAX_MEM_BUFFER;
}
LOG_DEBUGF("cli.c", "arg quality=%f", args->quality)
LOG_DEBUGF("cli.c", "arg size=%d", args->size)
LOG_DEBUGF("cli.c", "arg content_size=%d", args->content_size)
@@ -195,6 +208,8 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
LOG_DEBUGF("cli.c", "arg tesseract_path=%s", args->tesseract_path)
LOG_DEBUGF("cli.c", "arg exclude=%s", args->exclude_regex)
LOG_DEBUGF("cli.c", "arg fast=%d", args->fast)
LOG_DEBUGF("cli.c", "arg treemap_threshold=%f", args->treemap_threshold)
LOG_DEBUGF("cli.c", "arg max_memory_buffer=%d", args->max_memory_buffer)
return 0;
}

View File

@@ -22,6 +22,9 @@ typedef struct scan_args {
const char *tesseract_path;
char *exclude_regex;
int fast;
const char* treemap_threshold_str;
double treemap_threshold;
int max_memory_buffer;
} scan_args_t;
scan_args_t *scan_args_create();

View File

@@ -202,9 +202,8 @@ void delete_queue(int max) {
Indexer->line_head = tmp->next;
if (Indexer->line_head == NULL) {
Indexer->line_tail = NULL;
} else {
free(tmp);
}
free(tmp);
Indexer->queued -= 1;
}
}

File diff suppressed because one or more lines are too long

View File

@@ -251,10 +251,17 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
} else {
*(buf.buf + line.ext) = '\0';
}
cJSON_AddStringToObject(document, "name", buf.buf + line.base);
char tmp[PATH_MAX * 3];
str_escape(tmp, buf.buf + line.base);
cJSON_AddStringToObject(document, "name", tmp);
if (line.base > 0) {
*(buf.buf + line.base - 1) = '\0';
cJSON_AddStringToObject(document, "path", buf.buf);
str_escape(tmp, buf.buf);
cJSON_AddStringToObject(document, "path", tmp);
} else {
cJSON_AddStringToObject(document, "path", "");
}

View File

@@ -3,6 +3,7 @@
#include <third-party/argparse/argparse.h>
#include <glib.h>
#include <locale.h>
#include "cli.h"
#include "io/serialize.h"
@@ -14,12 +15,14 @@
#include "parsing/mime.h"
#include "parsing/parse.h"
#include "stats.h"
#define DESCRIPTION "Lightning-fast file system indexer and search tool."
#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
static const char *const Version = "2.2.0";
static const char *const Version = "2.4.2";
static const char *const usage[] = {
"sist2 scan [OPTION]... PATH",
"sist2 index [OPTION]... INDEX",
@@ -124,6 +127,7 @@ void initialize_scan_context(scan_args_t *args) {
ScanCtx.media_ctx.log = _log;
ScanCtx.media_ctx.logf = _logf;
ScanCtx.media_ctx.store = _store;
ScanCtx.media_ctx.max_media_buffer = (long) args->max_memory_buffer * 1024 * 1024;
init_media();
// OOXML
@@ -206,6 +210,8 @@ void sist2_scan(scan_args_t *args) {
tpool_wait(ScanCtx.pool);
tpool_destroy(ScanCtx.pool);
generate_stats(&ScanCtx.index, args->treemap_threshold, ScanCtx.index.path);
if (args->incremental != NULL) {
char dst_path[PATH_MAX];
snprintf(store_path, PATH_MAX, "%sthumbs", args->incremental);
@@ -312,6 +318,7 @@ void sist2_web(web_args_t *args) {
int main(int argc, const char *argv[]) {
setlocale(LC_ALL, "");
scan_args_t *scan_args = scan_args_create();
index_args_t *index_args = index_args_create();
@@ -350,6 +357,11 @@ int main(int argc, const char *argv[]) {
"which are installed on your machine)"),
OPT_STRING('e', "exclude", &scan_args->exclude_regex, "Files that match this regex will not be scanned"),
OPT_BOOLEAN(0, "fast", &scan_args->fast, "Only index file names & mime type"),
OPT_STRING(0, "treemap-threshold", &scan_args->treemap_threshold_str, "Relative size threshold for treemap "
"(see USAGE.md). DEFAULT: 0.0005"),
OPT_INTEGER(0, "mem-buffer", &scan_args->max_memory_buffer,
"Maximum memory buffer size per thread in MB for files inside archives "
"(see USAGE.md). DEFAULT: 2000"),
OPT_GROUP("Index options"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT=http://localhost:9200"),

View File

@@ -63,378 +63,379 @@ enum mime {
application_vnd_ms_excel=655415,
application_vnd_ms_fontobject=655416,
application_vnd_ms_opentype=655417 | 0x20000000,
application_vnd_ms_pki_certstore=655418,
application_vnd_ms_pki_pko=655419,
application_vnd_ms_pki_seccat=655420,
application_vnd_ms_powerpoint=655421,
application_vnd_ms_project=655422,
application_vnd_oasis_opendocument_base=655423,
application_vnd_oasis_opendocument_formula=655424,
application_vnd_oasis_opendocument_graphics=655425,
application_vnd_oasis_opendocument_presentation=655426,
application_vnd_oasis_opendocument_spreadsheet=655427,
application_vnd_oasis_opendocument_text=655428,
application_vnd_openxmlformats_officedocument_presentationml_presentation=655429 | 0x04000000,
application_vnd_openxmlformats_officedocument_spreadsheetml_sheet=655430 | 0x04000000,
application_vnd_openxmlformats_officedocument_wordprocessingml_document=655431 | 0x04000000,
application_vnd_symbian_install=655432,
application_vnd_tcpdump_pcap=655433,
application_vnd_wap_wmlc=655434,
application_vnd_wap_wmlscriptc=655435,
application_vnd_xara=655436,
application_vocaltec_media_desc=655437,
application_vocaltec_media_file=655438,
application_warc=655439,
application_winhelp=655440,
application_wordperfect=655441,
application_wordperfect6_0=655442,
application_wordperfect6_1=655443,
application_x_123=655444,
application_x_7z_compressed=655445 | 0x10000000,
application_x_aim=655446,
application_x_apple_diskimage=655447,
application_x_arc=655448 | 0x10000000,
application_x_archive=655449,
application_x_atari_7800_rom=655450,
application_x_authorware_bin=655451,
application_x_authorware_map=655452,
application_x_authorware_seg=655453,
application_x_avira_qua=655454,
application_x_bcpio=655455,
application_x_bittorrent=655456,
application_x_bsh=655457,
application_x_bytecode_python=655458,
application_x_bzip=655459,
application_x_bzip2=655460 | 0x08000000,
application_x_cbr=655461,
application_x_cbz=655462 | 0x40000000,
application_x_cdlink=655463,
application_x_chat=655464,
application_x_chrome_extension=655465,
application_x_cocoa=655466,
application_x_conference=655467,
application_x_coredump=655468,
application_x_cpio=655469,
application_x_dbf=655470,
application_x_dbt=655471,
application_x_debian_package=655472,
application_x_deepv=655473,
application_x_director=655474,
application_x_dmp=655475,
application_x_dosdriver=655476,
application_x_dosexec=655477,
application_x_dvi=655478,
application_x_elc=655479,
application_vnd_ms_outlook=655418,
application_vnd_ms_pki_certstore=655419,
application_vnd_ms_pki_pko=655420,
application_vnd_ms_pki_seccat=655421,
application_vnd_ms_powerpoint=655422,
application_vnd_ms_project=655423,
application_vnd_oasis_opendocument_base=655424,
application_vnd_oasis_opendocument_formula=655425,
application_vnd_oasis_opendocument_graphics=655426,
application_vnd_oasis_opendocument_presentation=655427,
application_vnd_oasis_opendocument_spreadsheet=655428,
application_vnd_oasis_opendocument_text=655429,
application_vnd_openxmlformats_officedocument_presentationml_presentation=655430 | 0x04000000,
application_vnd_openxmlformats_officedocument_spreadsheetml_sheet=655431 | 0x04000000,
application_vnd_openxmlformats_officedocument_wordprocessingml_document=655432 | 0x04000000,
application_vnd_symbian_install=655433,
application_vnd_tcpdump_pcap=655434,
application_vnd_wap_wmlc=655435,
application_vnd_wap_wmlscriptc=655436,
application_vnd_xara=655437,
application_vocaltec_media_desc=655438,
application_vocaltec_media_file=655439,
application_warc=655440,
application_winhelp=655441,
application_wordperfect=655442,
application_wordperfect6_0=655443,
application_wordperfect6_1=655444,
application_x_123=655445,
application_x_7z_compressed=655446 | 0x10000000,
application_x_aim=655447,
application_x_apple_diskimage=655448,
application_x_arc=655449 | 0x10000000,
application_x_archive=655450,
application_x_atari_7800_rom=655451,
application_x_authorware_bin=655452,
application_x_authorware_map=655453,
application_x_authorware_seg=655454,
application_x_avira_qua=655455,
application_x_bcpio=655456,
application_x_bittorrent=655457,
application_x_bsh=655458,
application_x_bytecode_python=655459,
application_x_bzip=655460,
application_x_bzip2=655461 | 0x08000000,
application_x_cbr=655462,
application_x_cbz=655463 | 0x40000000,
application_x_cdlink=655464,
application_x_chat=655465,
application_x_chrome_extension=655466,
application_x_cocoa=655467,
application_x_conference=655468,
application_x_coredump=655469,
application_x_cpio=655470,
application_x_dbf=655471,
application_x_dbt=655472,
application_x_debian_package=655473,
application_x_deepv=655474,
application_x_director=655475,
application_x_dmp=655476,
application_x_dosdriver=655477,
application_x_dosexec=655478,
application_x_dvi=655479,
application_x_elc=655480,
application_x_empty=1,
application_x_envoy=655481,
application_x_esrehber=655482,
application_x_excel=655483,
application_x_executable=655484,
application_x_font_gdos=655485,
application_x_font_pf2=655486,
application_x_font_pfm=655487,
application_x_font_sfn=655488,
application_x_font_ttf=655489 | 0x20000000,
application_x_fptapplication_x_dbt=655490,
application_x_freelance=655491,
application_x_gamecube_rom=655492,
application_x_gdbm=655493,
application_x_gettext_translation=655494,
application_x_git=655495,
application_x_gsp=655496,
application_x_gss=655497,
application_x_gtar=655498,
application_x_gzip=655499,
application_x_hdf=655500,
application_x_helpfile=655501,
application_x_httpd_imap=655502,
application_x_ima=655503,
application_x_innosetup=655504,
application_x_internett_signup=655505,
application_x_inventor=655506,
application_x_ip2=655507,
application_x_java_applet=655508,
application_x_java_commerce=655509,
application_x_java_image=655510,
application_x_java_jmod=655511,
application_x_java_keystore=655512,
application_x_kdelnk=655513,
application_x_koan=655514,
application_x_latex=655515,
application_x_livescreen=655516,
application_x_lotus=655517,
application_x_lz4=655518 | 0x08000000,
application_x_lz4_json=655519,
application_x_lzh=655520,
application_x_lzh_compressed=655521,
application_x_lzip=655522 | 0x08000000,
application_x_lzma=655523 | 0x08000000,
application_x_lzop=655524 | 0x08000000,
application_x_lzx=655525,
application_x_mach_binary=655526,
application_x_mach_executable=655527,
application_x_magic_cap_package_1_0=655528,
application_x_mathcad=655529,
application_x_maxis_dbpf=655530,
application_x_meme=655531,
application_x_midi=655532,
application_x_mif=655533,
application_x_mix_transfer=655534,
application_x_mobipocket_ebook=655535 | 0x02000000,
application_x_ms_compress_szdd=655536,
application_x_ms_pdb=655537,
application_x_ms_reader=655538,
application_x_msaccess=655539,
application_x_n64_rom=655540,
application_x_navi_animation=655541,
application_x_navidoc=655542,
application_x_navimap=655543,
application_x_navistyle=655544,
application_x_nes_rom=655545,
application_x_netcdf=655546,
application_x_newton_compatible_pkg=655547,
application_x_nintendo_ds_rom=655548,
application_x_object=655549,
application_x_omc=655550,
application_x_omcdatamaker=655551,
application_x_omcregerator=655552,
application_x_pagemaker=655553,
application_x_pcl=655554,
application_x_pgp_keyring=655555,
application_x_pixclscript=655556,
application_x_pkcs7_certreqresp=655557,
application_x_pkcs7_signature=655558,
application_x_project=655559,
application_x_qpro=655560,
application_x_rar=655561 | 0x10000000,
application_x_rpm=655562,
application_x_sdp=655563,
application_x_sea=655564,
application_x_seelogo=655565,
application_x_setupscript=655566,
application_x_shar=655567,
application_x_sharedlib=655568,
application_x_shockwave_flash=655569,
application_x_snappy_framed=655570,
application_x_sprite=655571,
application_x_sqlite3=655572,
application_x_stargallery_thm=655573,
application_x_stuffit=655574,
application_x_sv4cpio=655575,
application_x_sv4crc=655576,
application_x_tar=655577 | 0x10000000,
application_x_tbook=655578,
application_x_terminfo=655579,
application_x_terminfo2=655580,
application_x_tex_tfm=655581,
application_x_texinfo=655582,
application_x_ustar=655583,
application_x_visio=655584,
application_x_vnd_audioexplosion_mzz=655585,
application_x_vnd_ls_xpix=655586,
application_x_vrml=655587,
application_x_wais_source=655588,
application_x_wine_extension_ini=655589,
application_x_wintalk=655590,
application_x_world=655591,
application_x_wri=655592,
application_x_x509_ca_cert=655593,
application_x_xz=655594 | 0x08000000,
application_x_zip=655595,
application_x_zstd=655596 | 0x08000000,
application_x_zstd_dictionary=655597,
application_xml=655598,
application_zip=655599 | 0x10000000,
application_zlib=655600,
audio_basic=458993 | 0x80000000,
audio_it=458994,
audio_make=458995,
audio_mid=458996,
audio_midi=458997,
audio_mp4=458998,
audio_mpeg=458999,
audio_ogg=459000,
audio_s3m=459001,
audio_tsp_audio=459002,
audio_tsplayer=459003,
audio_vnd_qcelp=459004,
audio_voxware=459005,
audio_x_aiff=459006,
audio_x_flac=459007,
audio_x_gsm=459008,
audio_x_hx_aac_adts=459009,
audio_x_jam=459010,
audio_x_liveaudio=459011,
audio_x_m4a=459012,
audio_x_midi=459013,
audio_x_mod=459014,
audio_x_mp4a_latm=459015,
audio_x_mpeg_3=459016,
audio_x_mpequrl=459017,
audio_x_nspaudio=459018,
audio_x_pn_realaudio=459019,
audio_x_psid=459020,
audio_x_realaudio=459021,
audio_x_s3m=459022,
audio_x_twinvq=459023,
audio_x_twinvq_plugin=459024,
audio_x_voc=459025,
audio_x_wav=459026,
audio_x_xbox_executable=459027 | 0x80000000,
audio_x_xbox360_executable=459028 | 0x80000000,
audio_xm=459029,
font_otf=327958 | 0x20000000,
font_sfnt=327959 | 0x20000000,
font_woff=327960 | 0x20000000,
font_woff2=327961 | 0x20000000,
image_bmp=524570,
image_cmu_raster=524571,
image_fif=524572,
image_florian=524573,
image_g3fax=524574,
image_gif=524575,
image_heic=524576,
image_ief=524577,
image_jpeg=524578,
image_jutvision=524579,
image_naplps=524580,
image_pict=524581,
image_png=524582,
image_svg=524583 | 0x80000000,
image_svg_xml=524584 | 0x80000000,
image_tiff=524585,
image_vnd_adobe_photoshop=524586 | 0x80000000,
image_vnd_djvu=524587 | 0x80000000,
image_vnd_fpx=524588,
image_vnd_microsoft_icon=524589,
image_vnd_rn_realflash=524590,
image_vnd_rn_realpix=524591,
image_vnd_wap_wbmp=524592,
image_vnd_xiff=524593,
image_webp=524594,
image_wmf=524595,
image_x_3ds=524596,
image_x_award_bioslogo=524597,
image_x_cmu_raster=524598,
image_x_cur=524599,
image_x_dwg=524600,
image_x_eps=524601,
image_x_exr=524602,
image_x_gem=524603,
image_x_icns=524604,
image_x_icon=524605 | 0x80000000,
image_x_jg=524606,
image_x_jps=524607,
image_x_ms_bmp=524608,
image_x_niff=524609,
image_x_pcx=524610,
image_x_pict=524611,
image_x_portable_bitmap=524612,
image_x_portable_graymap=524613,
image_x_portable_pixmap=524614,
image_x_quicktime=524615,
image_x_rgb=524616,
image_x_tga=524617,
image_x_tiff=524618,
image_x_win_bitmap=524619,
image_x_xcf=524620 | 0x80000000,
image_x_xpixmap=524621 | 0x80000000,
image_x_xwindowdump=524622,
message_news=196943,
message_rfc822=196944,
model_vnd_dwf=65873,
model_vnd_gdl=65874,
model_vnd_gs_gdl=65875,
model_vrml=65876,
model_x_pov=65877,
text_PGP=590166,
text_asp=590167,
text_css=590168,
text_html=590169 | 0x01000000,
text_javascript=590170,
text_mcf=590171,
text_pascal=590172,
text_plain=590173,
text_richtext=590174,
text_rtf=590175,
text_scriplet=590176,
text_tab_separated_values=590177,
text_troff=590178,
text_uri_list=590179,
text_vnd_abc=590180,
text_vnd_fmi_flexstor=590181,
text_vnd_wap_wml=590182,
text_vnd_wap_wmlscript=590183,
text_webviewhtml=590184,
text_x_Algol68=590185,
text_x_asm=590186,
text_x_audiosoft_intra=590187,
text_x_awk=590188,
text_x_bcpl=590189,
text_x_c=590190,
text_x_c__=590191,
text_x_component=590192,
text_x_diff=590193,
text_x_fortran=590194,
text_x_java=590195,
text_x_la_asf=590196,
text_x_lisp=590197,
text_x_m=590198,
text_x_m4=590199,
text_x_makefile=590200,
text_x_ms_regedit=590201,
text_x_msdos_batch=590202,
text_x_objective_c=590203,
text_x_pascal=590204,
text_x_perl=590205,
text_x_php=590206,
text_x_po=590207,
text_x_python=590208,
text_x_ruby=590209,
text_x_sass=590210,
text_x_scss=590211,
text_x_server_parsed_html=590212,
text_x_setext=590213,
text_x_sgml=590214 | 0x01000000,
text_x_shellscript=590215,
text_x_speech=590216,
text_x_tcl=590217,
text_x_tex=590218,
text_x_uil=590219,
text_x_uuencode=590220,
text_x_vcalendar=590221,
text_x_vcard=590222,
text_xml=590223 | 0x01000000,
video_MP2T=393616,
video_animaflex=393617,
video_avi=393618,
video_avs_video=393619,
video_mp4=393620,
video_mpeg=393621,
video_quicktime=393622,
video_vdo=393623,
video_vivo=393624,
video_vnd_rn_realvideo=393625,
video_vosaic=393626,
video_webm=393627,
video_x_amt_demorun=393628,
video_x_amt_showrun=393629,
video_x_atomic3d_feature=393630,
video_x_dl=393631,
video_x_dv=393632,
video_x_fli=393633,
video_x_flv=393634,
video_x_isvideo=393635,
video_x_jng=393636 | 0x80000000,
video_x_m4v=393637,
video_x_matroska=393638,
video_x_mng=393639,
video_x_motion_jpeg=393640,
video_x_ms_asf=393641,
video_x_msvideo=393642,
video_x_qtc=393643,
video_x_sgi_movie=393644,
x_epoc_x_sisx_app=721325,
application_x_envoy=655482,
application_x_esrehber=655483,
application_x_excel=655484,
application_x_executable=655485,
application_x_font_gdos=655486,
application_x_font_pf2=655487,
application_x_font_pfm=655488,
application_x_font_sfn=655489,
application_x_font_ttf=655490 | 0x20000000,
application_x_fptapplication_x_dbt=655491,
application_x_freelance=655492,
application_x_gamecube_rom=655493,
application_x_gdbm=655494,
application_x_gettext_translation=655495,
application_x_git=655496,
application_x_gsp=655497,
application_x_gss=655498,
application_x_gtar=655499,
application_x_gzip=655500,
application_x_hdf=655501,
application_x_helpfile=655502,
application_x_httpd_imap=655503,
application_x_ima=655504,
application_x_innosetup=655505,
application_x_internett_signup=655506,
application_x_inventor=655507,
application_x_ip2=655508,
application_x_java_applet=655509,
application_x_java_commerce=655510,
application_x_java_image=655511,
application_x_java_jmod=655512,
application_x_java_keystore=655513,
application_x_kdelnk=655514,
application_x_koan=655515,
application_x_latex=655516,
application_x_livescreen=655517,
application_x_lotus=655518,
application_x_lz4=655519 | 0x08000000,
application_x_lz4_json=655520,
application_x_lzh=655521,
application_x_lzh_compressed=655522,
application_x_lzip=655523 | 0x08000000,
application_x_lzma=655524 | 0x08000000,
application_x_lzop=655525 | 0x08000000,
application_x_lzx=655526,
application_x_mach_binary=655527,
application_x_mach_executable=655528,
application_x_magic_cap_package_1_0=655529,
application_x_mathcad=655530,
application_x_maxis_dbpf=655531,
application_x_meme=655532,
application_x_midi=655533,
application_x_mif=655534,
application_x_mix_transfer=655535,
application_x_mobipocket_ebook=655536 | 0x02000000,
application_x_ms_compress_szdd=655537,
application_x_ms_pdb=655538,
application_x_ms_reader=655539,
application_x_msaccess=655540,
application_x_n64_rom=655541,
application_x_navi_animation=655542,
application_x_navidoc=655543,
application_x_navimap=655544,
application_x_navistyle=655545,
application_x_nes_rom=655546,
application_x_netcdf=655547,
application_x_newton_compatible_pkg=655548,
application_x_nintendo_ds_rom=655549,
application_x_object=655550,
application_x_omc=655551,
application_x_omcdatamaker=655552,
application_x_omcregerator=655553,
application_x_pagemaker=655554,
application_x_pcl=655555,
application_x_pgp_keyring=655556,
application_x_pixclscript=655557,
application_x_pkcs7_certreqresp=655558,
application_x_pkcs7_signature=655559,
application_x_project=655560,
application_x_qpro=655561,
application_x_rar=655562 | 0x10000000,
application_x_rpm=655563,
application_x_sdp=655564,
application_x_sea=655565,
application_x_seelogo=655566,
application_x_setupscript=655567,
application_x_shar=655568,
application_x_sharedlib=655569,
application_x_shockwave_flash=655570,
application_x_snappy_framed=655571,
application_x_sprite=655572,
application_x_sqlite3=655573,
application_x_stargallery_thm=655574,
application_x_stuffit=655575,
application_x_sv4cpio=655576,
application_x_sv4crc=655577,
application_x_tar=655578 | 0x10000000,
application_x_tbook=655579,
application_x_terminfo=655580,
application_x_terminfo2=655581,
application_x_tex_tfm=655582,
application_x_texinfo=655583,
application_x_ustar=655584,
application_x_visio=655585,
application_x_vnd_audioexplosion_mzz=655586,
application_x_vnd_ls_xpix=655587,
application_x_vrml=655588,
application_x_wais_source=655589,
application_x_wine_extension_ini=655590,
application_x_wintalk=655591,
application_x_world=655592,
application_x_wri=655593,
application_x_x509_ca_cert=655594,
application_x_xz=655595 | 0x08000000,
application_x_zip=655596,
application_x_zstd=655597 | 0x08000000,
application_x_zstd_dictionary=655598,
application_xml=655599,
application_zip=655600 | 0x10000000,
application_zlib=655601,
audio_basic=458994 | 0x80000000,
audio_it=458995,
audio_make=458996,
audio_mid=458997,
audio_midi=458998,
audio_mp4=458999,
audio_mpeg=459000,
audio_ogg=459001,
audio_s3m=459002,
audio_tsp_audio=459003,
audio_tsplayer=459004,
audio_vnd_qcelp=459005,
audio_voxware=459006,
audio_x_aiff=459007,
audio_x_flac=459008,
audio_x_gsm=459009,
audio_x_hx_aac_adts=459010,
audio_x_jam=459011,
audio_x_liveaudio=459012,
audio_x_m4a=459013,
audio_x_midi=459014,
audio_x_mod=459015,
audio_x_mp4a_latm=459016,
audio_x_mpeg_3=459017,
audio_x_mpequrl=459018,
audio_x_nspaudio=459019,
audio_x_pn_realaudio=459020,
audio_x_psid=459021,
audio_x_realaudio=459022,
audio_x_s3m=459023,
audio_x_twinvq=459024,
audio_x_twinvq_plugin=459025,
audio_x_voc=459026,
audio_x_wav=459027,
audio_x_xbox_executable=459028 | 0x80000000,
audio_x_xbox360_executable=459029 | 0x80000000,
audio_xm=459030,
font_otf=327959 | 0x20000000,
font_sfnt=327960 | 0x20000000,
font_woff=327961 | 0x20000000,
font_woff2=327962 | 0x20000000,
image_bmp=524571,
image_cmu_raster=524572,
image_fif=524573,
image_florian=524574,
image_g3fax=524575,
image_gif=524576,
image_heic=524577,
image_ief=524578,
image_jpeg=524579,
image_jutvision=524580,
image_naplps=524581,
image_pict=524582,
image_png=524583,
image_svg=524584 | 0x80000000,
image_svg_xml=524585 | 0x80000000,
image_tiff=524586,
image_vnd_adobe_photoshop=524587 | 0x80000000,
image_vnd_djvu=524588 | 0x80000000,
image_vnd_fpx=524589,
image_vnd_microsoft_icon=524590,
image_vnd_rn_realflash=524591,
image_vnd_rn_realpix=524592,
image_vnd_wap_wbmp=524593,
image_vnd_xiff=524594,
image_webp=524595,
image_wmf=524596,
image_x_3ds=524597,
image_x_award_bioslogo=524598,
image_x_cmu_raster=524599,
image_x_cur=524600,
image_x_dwg=524601,
image_x_eps=524602,
image_x_exr=524603,
image_x_gem=524604,
image_x_icns=524605,
image_x_icon=524606 | 0x80000000,
image_x_jg=524607,
image_x_jps=524608,
image_x_ms_bmp=524609,
image_x_niff=524610,
image_x_pcx=524611,
image_x_pict=524612,
image_x_portable_bitmap=524613,
image_x_portable_graymap=524614,
image_x_portable_pixmap=524615,
image_x_quicktime=524616,
image_x_rgb=524617,
image_x_tga=524618,
image_x_tiff=524619,
image_x_win_bitmap=524620,
image_x_xcf=524621 | 0x80000000,
image_x_xpixmap=524622 | 0x80000000,
image_x_xwindowdump=524623,
message_news=196944,
message_rfc822=196945,
model_vnd_dwf=65874,
model_vnd_gdl=65875,
model_vnd_gs_gdl=65876,
model_vrml=65877,
model_x_pov=65878,
text_PGP=590167,
text_asp=590168,
text_css=590169,
text_html=590170 | 0x01000000,
text_javascript=590171,
text_mcf=590172,
text_pascal=590173,
text_plain=590174,
text_richtext=590175,
text_rtf=590176,
text_scriplet=590177,
text_tab_separated_values=590178,
text_troff=590179,
text_uri_list=590180,
text_vnd_abc=590181,
text_vnd_fmi_flexstor=590182,
text_vnd_wap_wml=590183,
text_vnd_wap_wmlscript=590184,
text_webviewhtml=590185,
text_x_Algol68=590186,
text_x_asm=590187,
text_x_audiosoft_intra=590188,
text_x_awk=590189,
text_x_bcpl=590190,
text_x_c=590191,
text_x_c__=590192,
text_x_component=590193,
text_x_diff=590194,
text_x_fortran=590195,
text_x_java=590196,
text_x_la_asf=590197,
text_x_lisp=590198,
text_x_m=590199,
text_x_m4=590200,
text_x_makefile=590201,
text_x_ms_regedit=590202,
text_x_msdos_batch=590203,
text_x_objective_c=590204,
text_x_pascal=590205,
text_x_perl=590206,
text_x_php=590207,
text_x_po=590208,
text_x_python=590209,
text_x_ruby=590210,
text_x_sass=590211,
text_x_scss=590212,
text_x_server_parsed_html=590213,
text_x_setext=590214,
text_x_sgml=590215 | 0x01000000,
text_x_shellscript=590216,
text_x_speech=590217,
text_x_tcl=590218,
text_x_tex=590219,
text_x_uil=590220,
text_x_uuencode=590221,
text_x_vcalendar=590222,
text_x_vcard=590223,
text_xml=590224 | 0x01000000,
video_MP2T=393617,
video_animaflex=393618,
video_avi=393619,
video_avs_video=393620,
video_mp4=393621,
video_mpeg=393622,
video_quicktime=393623,
video_vdo=393624,
video_vivo=393625,
video_vnd_rn_realvideo=393626,
video_vosaic=393627,
video_webm=393628,
video_x_amt_demorun=393629,
video_x_amt_showrun=393630,
video_x_atomic3d_feature=393631,
video_x_dl=393632,
video_x_dv=393633,
video_x_fli=393634,
video_x_flv=393635,
video_x_isvideo=393636,
video_x_jng=393637 | 0x80000000,
video_x_m4v=393638,
video_x_matroska=393639,
video_x_mng=393640,
video_x_motion_jpeg=393641,
video_x_ms_asf=393642,
video_x_msvideo=393643,
video_x_qtc=393644,
video_x_sgi_movie=393645,
x_epoc_x_sisx_app=721326,
};
char *mime_get_mime_text(unsigned int mime_id) {switch (mime_id) {
case application_arj: return "application/arj";
@@ -866,6 +867,7 @@ case video_x_qtc: return "video/x-qtc";
case video_x_sgi_movie: return "video/x-sgi-movie";
case x_epoc_x_sisx_app: return "x-epoc/x-sisx-app";
case application_x_zstd_dictionary: return "application/x-zstd-dictionary";
case application_vnd_ms_outlook: return "application/vnd.ms-outlook";
default: return NULL;}}
GHashTable *mime_get_ext_table() {GHashTable *ext_table = g_hash_table_new(g_str_hash, g_str_equal);
g_hash_table_insert(ext_table, "arj", (gpointer)application_arj);
@@ -883,6 +885,7 @@ g_hash_table_insert(ext_table, "frl", (gpointer)application_freeloader);
g_hash_table_insert(ext_table, "spl", (gpointer)application_futuresplash);
g_hash_table_insert(ext_table, "vew", (gpointer)application_groupwise);
g_hash_table_insert(ext_table, "gz", (gpointer)application_gzip);
g_hash_table_insert(ext_table, "tgz", (gpointer)application_gzip);
g_hash_table_insert(ext_table, "hta", (gpointer)application_hta);
g_hash_table_insert(ext_table, "unv", (gpointer)application_i_deas);
g_hash_table_insert(ext_table, "iges", (gpointer)application_iges);
@@ -1385,6 +1388,7 @@ g_hash_table_insert(ext_table, "divx", (gpointer)video_x_msvideo);
g_hash_table_insert(ext_table, "qtc", (gpointer)video_x_qtc);
g_hash_table_insert(ext_table, "movie", (gpointer)video_x_sgi_movie);
g_hash_table_insert(ext_table, "mv", (gpointer)video_x_sgi_movie);
g_hash_table_insert(ext_table, "msg", (gpointer)application_vnd_ms_outlook);
return ext_table;}
GHashTable *mime_get_mime_table() {GHashTable *mime_table = g_hash_table_new(g_str_hash, g_str_equal);
g_hash_table_insert(mime_table, "application/arj", (gpointer)application_arj);
@@ -1816,5 +1820,6 @@ g_hash_table_insert(mime_table, "video/x-qtc", (gpointer)video_x_qtc);
g_hash_table_insert(mime_table, "video/x-sgi-movie", (gpointer)video_x_sgi_movie);
g_hash_table_insert(mime_table, "x-epoc/x-sisx-app", (gpointer)x_epoc_x_sisx_app);
g_hash_table_insert(mime_table, "application/x-zstd-dictionary", (gpointer)application_x_zstd_dictionary);
g_hash_table_insert(mime_table, "application/vnd.ms-outlook", (gpointer)application_vnd_ms_outlook);
return mime_table;}
#endif

View File

@@ -49,7 +49,6 @@ void parse(void *arg) {
return;
}
doc.filepath = job->filepath;
doc.ext = (short) job->ext;
doc.base = (short) job->base;
@@ -61,7 +60,7 @@ void parse(void *arg) {
doc.mtime = job->vfile.info.st_mtim.tv_sec;
uuid_generate(doc.uuid);
char *buf[PARSE_BUF_SIZE];
char *buf[MAGIC_BUF_SIZE];
if (LogCtx.very_verbose) {
char uuid_str[UUID_STR_LEN];
@@ -79,7 +78,12 @@ void parse(void *arg) {
if (doc.mime == 0 && !ScanCtx.fast) {
// Get mime type with libmagic
bytes_read = job->vfile.read(&job->vfile, buf, PARSE_BUF_SIZE);
if (!job->vfile.is_fs_file) {
LOG_WARNING(job->filepath, "Guessing mime type with libmagic inside archive files is not currently supported");
goto abort;
}
bytes_read = job->vfile.read(&job->vfile, buf, MAGIC_BUF_SIZE);
if (bytes_read < 0) {
if (job->vfile.is_fs_file) {
@@ -148,14 +152,13 @@ void parse(void *arg) {
parse_mobi(&ScanCtx.mobi_ctx, &job->vfile, &doc);
}
abort:
//Parent meta
if (!uuid_is_null(job->parent)) {
char tmp[UUID_STR_LEN];
uuid_unparse(job->parent, tmp);
meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + UUID_STR_LEN + 1);
meta_parent->key = MetaParent;
strcpy(meta_parent->str_val, tmp);
uuid_unparse(job->parent, meta_parent->str_val);
APPEND_META((&doc), meta_parent)
}

View File

@@ -3,7 +3,7 @@
#include "../sist.h"
#define PARSE_BUF_SIZE 4096
#define MAGIC_BUF_SIZE 4096 * 6
int fs_read(struct vfile *f, void *buf, size_t size);
void fs_close(struct vfile *f);

4
src/static/css/autocomplete.min.css vendored Normal file
View File

@@ -0,0 +1,4 @@
.autocomplete-suggestions { text-align: left; cursor: default; border: 1px solid #ccc; border-top: 0; background: #fff; box-shadow: -1px 1px 3px rgba(0,0,0,.1); position: absolute; display: none; z-index: 9999; max-height: 254px; overflow: hidden; overflow-y: auto; box-sizing: border-box; }
.autocomplete-suggestion { position: relative; padding: 0 .6em; line-height: 23px; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; font-size: 1.02em; color: #333; }
.autocomplete-suggestion b { font-weight: normal; color: #1f8dd6; }
.autocomplete-suggestion.selected { background: #f0f0f0; }

View File

@@ -121,7 +121,7 @@ body {
background: #546b7a;
}
.navbar a:hover {
a:hover,.btn:hover {
color: #fff;
}
@@ -266,6 +266,7 @@ mark {
margin: 3px;
white-space: normal;
color: rgb(224, 224, 224);
overflow: hidden;
}
.irs-single, .irs-from, .irs-to {
@@ -480,3 +481,34 @@ svg {
cursor: pointer;
}
.stats-card {
text-align: center;
margin-top: 1em;
padding: 1em;
box-shadow: 0 .125rem .25rem rgba(0, 0, 0, .08) !important;
border-radius: 0;
border: none;
background: #212121;
}
.graph {
display: inline-block;
width: 40%;
}
.full-screen {
position: absolute;
left: 0;
width: 100%;
}
.stats-btn {
float: right;
margin-bottom: 10px;
}
#graphs-card svg text {
fill: #eee;
}

View File

@@ -205,6 +205,7 @@ mark {
margin: 3px;
white-space: normal;
color: #000;
overflow: hidden;
}
.irs-single, .irs-from, .irs-to {
@@ -347,3 +348,30 @@ mark {
cursor: pointer;
}
.stats-card {
text-align: center;
margin-top: 1em;
padding: 1em;
box-shadow: 0 .125rem .25rem rgba(0, 0, 0, .08) !important;
border-radius: 0;
border: none;
background: #fff;
}
.graph {
display: inline-block;
width: 40%;
}
.full-screen {
position: absolute;
left: 0;
width: 100%;
}
.stats-btn {
float: right;
margin-bottom: 10px;
}

3
src/static/js/auto-complete.min.js vendored Normal file

File diff suppressed because one or more lines are too long

2
src/static/js/d3.v5.min.js vendored Normal file

File diff suppressed because one or more lines are too long

2
src/static/js/dom-to-image.min.js vendored Normal file

File diff suppressed because one or more lines are too long

View File

@@ -27,18 +27,12 @@ function gifOver(thumbnail, hit) {
}
function getContentHighlight(hit) {
const re = RegExp(/<mark>/g);
const sortByMathCount = (a, b) => {
return b.match(re).length - a.match(re).length;
};
if (hit.hasOwnProperty("highlight")) {
if (hit["highlight"].hasOwnProperty("content")) {
return hit["highlight"]["content"].sort(sortByMathCount)[0];
return hit["highlight"]["content"][0];
} else if (hit["highlight"].hasOwnProperty("content.nGram")) {
return hit["highlight"]["content.nGram"].sort(sortByMathCount)[0];
return hit["highlight"]["content.nGram"][0];
}
}
@@ -77,6 +71,7 @@ function shouldPlayVideo(hit) {
return mime &&
mime.startsWith("video/") &&
!("parent" in hit["_source"]) &&
hit["_source"]["extension"] !== "mkv" &&
hit["_source"]["extension"] !== "avi" &&
videoc !== "hevc" &&

View File

@@ -20,16 +20,6 @@ let size_max = 10000000000000;
let date_min = null;
let date_max = null;
const CONF = new Settings();
const _defaults = {
display: "grid",
fuzzy: true,
highlight: true,
sort: "score",
searchInPath: false,
};
SORT_MODES = {
score: {
text: "Relevance",
@@ -69,36 +59,6 @@ SORT_MODES = {
},
};
function Settings() {
this.options = {};
this._onUpdate = function () {
$("#fuzzyToggle").prop("checked", this.options.fuzzy);
};
this.load = function () {
const raw = window.localStorage.getItem("options");
if (raw === null) {
this.options = _defaults;
} else {
const j = JSON.parse(raw);
if (!j || Object.keys(_defaults).some(k => !j.hasOwnProperty(k))) {
this.options = _defaults;
} else {
this.options = j;
}
}
this._onUpdate();
};
this.save = function () {
window.localStorage.setItem("options", JSON.stringify(this.options));
this._onUpdate();
}
}
function showEsError() {
$.toast({
heading: "Elasticsearch connection error",
@@ -112,28 +72,43 @@ function showEsError() {
});
}
jQuery["jsonPost"] = function (url, data) {
return jQuery.ajax({
url: url,
type: "post",
data: JSON.stringify(data),
contentType: "application/json"
}).fail(err => {
showEsError();
console.log(err);
});
};
window.onload = () => {
$("#theme").on("click", () => {
if (!document.cookie.includes("sist")) {
document.cookie = "sist=dark";
} else {
document.cookie = "sist=; Max-Age=-99999999;";
}
window.location.reload();
})
CONF.load();
new autoComplete({
selector: '#pathBar',
minChars: 1,
delay: 400,
renderItem: function (item) {
return '<div class="autocomplete-suggestion" data-val="' + item + '">' + item + '</div>';
},
source: async function (term, suggest) {
if (!CONF.options.suggestPath) {
return []
}
term = term.toLowerCase();
const choices = await getPathChoices();
let matches = [];
for (let i = 0; i < choices.length; i++) {
if (~choices[i].toLowerCase().indexOf(term)) {
matches.push(choices[i]);
}
}
suggest(matches.sort());
},
onSelect: function () {
searchDebounced();
}
});
searchBar.addEventListener("keyup", searchDebounced);
pathBar.addEventListener("keyup", e => {
if (e.key === "Enter") {
searchDebounced();
}
});
};
function toggleFuzzy() {
@@ -165,10 +140,7 @@ $.jsonPost("i").then(resp => {
});
function getDocumentInfo(id) {
return $.getJSON("d/" + id).fail(e => {
console.log(e);
showEsError();
})
return $.getJSON("d/" + id).fail(showEsError)
}
function handleTreeClick(tree) {
@@ -392,24 +364,24 @@ function search(after = null) {
let path = pathBar.value.replace(/\/$/, "").toLowerCase(); //remove trailing slashes
if (path !== "") {
filters.push([{term: {path: path}}])
filters.push({term: {path: path}})
}
let mimeTypes = getSelectedNodes(mimeTree);
if (!mimeTypes.includes("any")) {
filters.push([{terms: {"mime": mimeTypes}}]);
filters.push({terms: {"mime": mimeTypes}});
}
let tags = getSelectedNodes(tagTree);
if (!tags.includes("any")) {
filters.push([{terms: {"tag": tags}}]);
filters.push({terms: {"tag": tags}});
}
if (date_min && date_max) {
filters.push([{range: {mtime: {gte: date_min, lte: date_max}}}])
filters.push({range: {mtime: {gte: date_min, lte: date_max}}})
} else if (date_min) {
filters.push([{range: {mtime: {gte: date_min}}}])
filters.push({range: {mtime: {gte: date_min}}})
} else if (date_max) {
filters.push([{range: {mtime: {lte: date_max}}}])
filters.push({range: {mtime: {lte: date_max}}})
}
let q = {
@@ -445,6 +417,9 @@ function search(after = null) {
q.highlight = {
pre_tags: ["<mark>"],
post_tags: ["</mark>"],
fragment_size: CONF.options.fragmentSize,
number_of_fragments: 1,
order: "score",
fields: {
content: {},
// "content.nGram": {},
@@ -465,6 +440,11 @@ function search(after = null) {
lastDoc = hits[hits.length - 1];
}
hits.forEach(hit => {
hit["_source"]["name"] = strUnescape(hit["_source"]["name"]);
hit["_source"]["path"] = strUnescape(hit["_source"]["path"]);
});
if (!after) {
preload.remove();
searchResults.appendChild(makeStatsCard(searchResult));
@@ -496,8 +476,6 @@ let searchDebounced = _.debounce(function () {
search()
}, 500);
searchBar.addEventListener("keyup", searchDebounced);
pathBar.addEventListener("keyup", searchDebounced);
//Size slider
$("#sizeSlider").ionRangeSlider({
@@ -603,7 +581,7 @@ function getNextDepth(node) {
}
},
size: 0
}
};
if (node.depth > 0) {
q.query.bool.must = {
@@ -662,7 +640,8 @@ function createPathTree(target) {
let pathTree = new InspireTree({
data: function (node, resolve, reject) {
return getNextDepth(node);
}
},
sort: "text"
});
selectedIndices.forEach(index => {
@@ -682,32 +661,19 @@ function createPathTree(target) {
pathTree.on("node.click", handlePathTreeClick(pathTree));
}
function updateSettings() {
CONF.options.display = $("#settingDisplay").val();
CONF.options.fuzzy = $("#settingFuzzy").prop("checked");
CONF.options.highlight = $("#settingHighlight").prop("checked");
CONF.options.searchInPath = $("#settingSearchInPath").prop("checked");
CONF.save();
searchDebounced();
$.toast({
heading: "Settings updated",
text: "Settings saved to browser storage",
stack: 3,
bgColor: "#00a4bc",
textColor: "#fff",
position: 'bottom-right',
hideAfter: 3000,
loaderBg: "#08c7e8",
});
}
function loadSettings() {
CONF.load();
$("#settingDisplay").val(CONF.options.display);
$("#settingFuzzy").prop("checked", CONF.options.fuzzy);
$("#settingHighlight").prop("checked", CONF.options.highlight);
$("#settingSearchInPath").prop("checked", CONF.options.searchInPath);
function getPathChoices() {
return new Promise(getPaths => {
$.jsonPost("es", {
suggest: {
path: {
prefix: pathBar.value,
completion: {
field: "suggest-path",
skip_duplicates: true,
size: 10000
}
}
}
}).then(resp => getPaths(resp["suggest"]["path"][0]["options"].map(opt => opt["_source"]["path"])));
})
}

View File

@@ -64,3 +64,142 @@ function lum(c) {
return 0.2126 * r + 0.7152 * g + 0.0722 * b;
}
function strUnescape(str) {
let result = "";
for (let i = 0; i < str.length; i++) {
const c = str[i];
const next = str[i+1];
if (c === ']') {
if (next === ']') {
result += c;
i += 1;
} else {
result += String.fromCharCode(parseInt(str.slice(i, i + 2), 16));
i += 2;
}
} else {
result += c;
}
}
return result;
}
const CONF = new Settings();
const _defaults = {
display: "grid",
fuzzy: true,
highlight: true,
sort: "score",
searchInPath: false,
treemapType: "cascaded",
treemapTiling: "squarify",
treemapGroupingDepth: 3,
treemapColor: "PuBuGn",
treemapSize: "large",
suggestPath: true,
fragmentSize: 100
};
function loadSettings() {
CONF.load();
$("#settingDisplay").val(CONF.options.display);
$("#settingFuzzy").prop("checked", CONF.options.fuzzy);
$("#settingHighlight").prop("checked", CONF.options.highlight);
$("#settingSearchInPath").prop("checked", CONF.options.searchInPath);
$("#settingTreemapTiling").val(CONF.options.treemapTiling);
$("#settingTreemapGroupingDepth").val(CONF.options.treemapGroupingDepth);
$("#settingTreemapColor").val(CONF.options.treemapColor);
$("#settingTreemapSize").val(CONF.options.treemapSize);
$("#settingTreemapType").val(CONF.options.treemapType);
$("#settingSuggestPath").prop("checked", CONF.options.suggestPath);
$("#settingFragmentSize").val(CONF.options.fragmentSize);
}
function Settings() {
this.options = {};
this._onUpdate = function () {
$("#fuzzyToggle").prop("checked", this.options.fuzzy);
};
this.load = function () {
const raw = window.localStorage.getItem("options");
if (raw === null) {
this.options = _defaults;
} else {
const j = JSON.parse(raw);
if (!j || Object.keys(_defaults).some(k => !j.hasOwnProperty(k))) {
this.options = _defaults;
} else {
this.options = j;
}
}
this._onUpdate();
};
this.save = function () {
window.localStorage.setItem("options", JSON.stringify(this.options));
this._onUpdate();
}
}
function updateSettings() {
CONF.options.display = $("#settingDisplay").val();
CONF.options.fuzzy = $("#settingFuzzy").prop("checked");
CONF.options.highlight = $("#settingHighlight").prop("checked");
CONF.options.searchInPath = $("#settingSearchInPath").prop("checked");
CONF.options.treemapTiling = $("#settingTreemapTiling").val();
CONF.options.treemapGroupingDepth = $("#settingTreemapGroupingDepth").val();
CONF.options.treemapColor = $("#settingTreemapColor").val();
CONF.options.treemapSize = $("#settingTreemapSize").val();
CONF.options.treemapType = $("#settingTreemapType").val();
CONF.options.suggestPath = $("#settingSuggestPath").prop("checked");
CONF.options.fragmentSize = $("#settingFragmentSize").val();
CONF.save();
if (typeof searchDebounced !== "undefined") {
searchDebounced();
}
if (typeof updateStats !== "undefined") {
updateStats();
}
$.toast({
heading: "Settings updated",
text: "Settings saved to browser storage",
stack: 3,
bgColor: "#00a4bc",
textColor: "#fff",
position: 'bottom-right',
hideAfter: 3000,
loaderBg: "#08c7e8",
});
}
jQuery["jsonPost"] = function (url, data) {
return jQuery.ajax({
url: url,
type: "post",
data: JSON.stringify(data),
contentType: "application/json"
}).fail(err => {
showEsError();
console.log(err);
});
};
function toggleTheme() {
if (!document.cookie.includes("sist")) {
document.cookie = "sist=dark;SameSite=Strict";
} else {
document.cookie = "sist=; Max-Age=-99999999;";
}
window.location.reload();
}

View File

@@ -11,10 +11,11 @@
<nav class="navbar navbar-expand-lg">
<a class="navbar-brand" href="/">sist2</a>
<span class="badge badge-pill version">2.2.0</span>
<span class="badge badge-pill version">2.4.2</span>
<span class="tagline">Lightning-fast file system indexer and search tool </span>
<button style="margin-left: auto" class="btn" type="button" data-toggle="modal" data-target="#settings" onclick="loadSettings()">Settings</button>
<a id="theme" class="btn" title="Toggle theme" href="/">Theme</a>
<a class="btn ml-auto" href="/stats">Stats</a>
<button class="btn" type="button" data-toggle="modal" data-target="#settings" onclick="loadSettings()">Settings</button>
<button class="btn" title="Toggle theme" onclick="toggleTheme()">Theme</button>
</nav>
<div class="container">
@@ -191,14 +192,81 @@
<label class="custom-control-label" for="settingSearchInPath">Enable matching query against document path</label>
</div>
<div class="custom-control custom-checkbox">
<input type="checkbox" class="custom-control-input" id="settingSuggestPath">
<label class="custom-control-label" for="settingSuggestPath">Enable auto-complete in path filter bar</label>
</div>
<br/>
<div class="form-group">
<input type="number" class="form-control" id="settingFragmentSize">
<label for="settingFragmentSize">Highlight context size in characters</label>
</div>
<label for="settingDisplay">Display</label>
<select id="settingDisplay" class="form-control form-control-sm">
<option value="grid">Grid</option>
<option value="list">List</option>
</select>
<hr/>
<h4>Stats</h4>
<div class="form-group">
<label for="settingTreemapType">Treemap type</label>
<select id="settingTreemapType" class="form-control form-control-sm">
<option value="cascaded">Cascaded</option>
<option value="flat">Flat (compact)</option>
</select>
</div>
<div class="form-group">
<label for="settingTreemapTiling">Treemap tiling</label>
<select id="settingTreemapTiling" class="form-control form-control-sm">
<option value="binary">Binary</option>
<option value="squarify">Squarify</option>
<option value="slice">Slice</option>
<option value="dice">Dice</option>
<option value="sliceDice">Slide & Dice</option>
</select>
</div>
<div class="form-group">
<label for="settingTreemapGroupingDepth">Treemap color grouping depth (flat)</label>
<input type="number" class="form-control" id="settingTreemapGroupingDepth" min="1" max="10">
</div>
<div class="form-group">
<label for="settingTreemapColor">Treemap color (cascaded)</label>
<select id="settingTreemapColor" class="form-control form-control-sm">
<option value="PuBuGn">Purple-Blue-Green</option>
<option value="PuRd">Purple-Red</option>
<option value="PuBu">Purple-Blue</option>
<option value="YlOrBr">Yellow-Orange-Brown</option>
<option value="YlOrRd">Yellow-Orange-Red</option>
<option value="YlGn">Yellow-Green</option>
<option value="YlGnBu">Yellow-Green-Blue</option>
<option value="Plasma">Plasma</option>
<option value="Magma">Magma</option>
<option value="Inferno">Inferno</option>
<option value="Viridis">Viridis</option>
<option value="Turbo">Turbo</option>
</select>
</div>
<div class="form-group">
<label for="settingTreemapSize">Treemap size</label>
<select id="settingTreemapSize" class="form-control form-control-sm">
<option value="small">Small</option>
<option value="medium">Medium</option>
<option value="large">Large</option>
<option value="x-large">X-Large</option>
<option value="xx-large">XX-Large</option>
</select>
</div>
<br>
<button style="float: right" class="btn btn-primary" onclick="updateSettings()">Update settings</button>
<button class="btn btn-primary ml-auto" onclick="updateSettings()">Update settings</button>
</div>
</div>
</div>
@@ -223,6 +291,7 @@
<div id="searchResults"></div>
</div>
<script src="js" type="text/javascript"></script>
<script src="jslib" type="text/javascript"></script>
<script src="jssearch" type="text/javascript"></script>
</body>
</html>

800
src/static/stats.html Normal file
View File

@@ -0,0 +1,800 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>sist2 - Stats</title>
<meta name='viewport' content='width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no'/>
<link href="css" rel="stylesheet" type="text/css">
</head>
<body>
<nav class="navbar navbar-expand-lg">
<a class="navbar-brand" href="/">sist2</a>
<span class="badge badge-pill version">2.4.2</span>
<span class="tagline">Lightning-fast file system indexer and search tool </span>
<a style="margin-left: auto" class="btn" href="/">Back</a>
<button class="btn" type="button" data-toggle="modal" data-target="#settings"
onclick="loadSettings()">Settings
</button>
<button class="btn" title="Toggle theme" onclick="toggleTheme()">Theme</button>
</nav>
<div class="container pb-3">
<div class="card">
<div class="card-body">
<label for="indices">Index</label>
<select id="indices" onchange="updateStats()"></select>
</div>
</div>
<div id="treemap-card" class="stats-card">
<button class="btn stats-btn" onclick="fullScreen('treemap-card')">Enlarge</button>
<button class="btn stats-btn" onclick="exportTreemap()">Export</button>
<svg id="treemap"></svg>
</div>
<div id="graphs-card" class="stats-card">
<button class="btn stats-btn" onclick="fullScreen('graphs-card')">Enlarge</button>
<div class="graph">
<svg id="agg_mime_size"></svg>
</div>
<div class="graph">
<svg id="agg_mime_count"></svg>
</div>
<div class="graph">
<svg id="date_histogram"></svg>
</div>
<div class="graph">
<svg id="size_histogram"></svg>
</div>
</div>
</div>
<div class="modal" id="settings" tabindex="-1" role="dialog" aria-labelledby="modal-title" aria-hidden="true">
<div class="modal-dialog modal-dialog-centered" role="document">
<div class="modal-content">
<div class="modal-header">
<h5 class="modal-title">Settings</h5>
<button type="button" class="close" data-dismiss="modal" aria-label="Close">
<span aria-hidden="true">&times;</span>
</button>
</div>
<div class="modal-body">
<div class="custom-control custom-checkbox">
<input type="checkbox" class="custom-control-input" id="settingHighlight">
<label class="custom-control-label" for="settingHighlight">Enable highlighting</label>
</div>
<div class="custom-control custom-checkbox">
<input type="checkbox" class="custom-control-input" id="settingFuzzy">
<label class="custom-control-label" for="settingFuzzy">Set fuzzy search by default</label>
</div>
<div class="custom-control custom-checkbox">
<input type="checkbox" class="custom-control-input" id="settingSearchInPath">
<label class="custom-control-label" for="settingSearchInPath">Enable matching query against document
path</label>
</div>
<div class="custom-control custom-checkbox">
<input type="checkbox" class="custom-control-input" id="settingSuggestPath">
<label class="custom-control-label" for="settingSuggestPath">Enable auto-complete in path filter bar</label>
</div>
<br/>
<div class="form-group">
<input type="number" class="form-control" id="settingFragmentSize">
<label for="settingFragmentSize">Highlight context size in characters</label>
</div>
<label for="settingDisplay">Display</label>
<select id="settingDisplay" class="form-control form-control-sm">
<option value="grid">Grid</option>
<option value="list">List</option>
</select>
<hr/>
<h4>Stats</h4>
<div class="form-group">
<label for="settingTreemapType">Treemap type</label>
<select id="settingTreemapType" class="form-control form-control-sm">
<option value="cascaded">Cascaded</option>
<option value="flat">Flat (compact)</option>
</select>
</div>
<div class="form-group">
<label for="settingTreemapTiling">Treemap tiling</label>
<select id="settingTreemapTiling" class="form-control form-control-sm">
<option value="binary">Binary</option>
<option value="squarify">Squarify</option>
<option value="slice">Slice</option>
<option value="dice">Dice</option>
<option value="sliceDice">Slide & Dice</option>
</select>
</div>
<div class="form-group">
<label for="settingTreemapGroupingDepth">Treemap color grouping depth (flat)</label>
<input type="number" class="form-control" id="settingTreemapGroupingDepth" min="1" max="10">
</div>
<div class="form-group">
<label for="settingTreemapColor">Treemap color (cascaded)</label>
<select id="settingTreemapColor" class="form-control form-control-sm">
<option value="PuBuGn">Purple-Blue-Green</option>
<option value="PuRd">Purple-Red</option>
<option value="PuBu">Purple-Blue</option>
<option value="YlOrBr">Yellow-Orange-Brown</option>
<option value="YlOrRd">Yellow-Orange-Red</option>
<option value="YlGn">Yellow-Green</option>
<option value="YlGnBu">Yellow-Green-Blue</option>
<option value="Plasma">Plasma</option>
<option value="Magma">Magma</option>
<option value="Inferno">Inferno</option>
<option value="Viridis">Viridis</option>
<option value="Turbo">Turbo</option>
</select>
</div>
<div class="form-group">
<label for="settingTreemapSize">Treemap size</label>
<select id="settingTreemapSize" class="form-control form-control-sm">
<option value="small">Small</option>
<option value="medium">Medium</option>
<option value="large">Large</option>
<option value="x-large">X-Large</option>
<option value="xx-large">XX-Large</option>
</select>
</div>
<br>
<button class="btn btn-primary float-right" onclick="updateSettings()">Update settings</button>
</div>
</div>
</div>
</div>
<script src="jslib" type="text/javascript"></script>
<script>
let width;
let height;
let indexMap = {};
const barHeight = 20;
const ordinalColor = d3.scaleOrdinal(d3.schemeCategory10);
const formatSI = d3.format("~s");
const TILING_MODES = {
"squarify": d3.treemapSquarify,
"binary": d3.treemapBinary,
"sliceDice": d3.treemapSliceDice,
"slice": d3.treemapSlice,
"dice": d3.treemapDice,
};
const COLORS = {
"PuBuGn": d3.interpolatePuBuGn,
"PuRd": d3.interpolatePuRd,
"PuBu": d3.interpolatePuBu,
"YlOrBr": d3.interpolateYlOrBr,
"YlOrRd": d3.interpolateYlOrRd,
"YlGn": d3.interpolateYlGn,
"YlGnBu": d3.interpolateYlGnBu,
"Plasma": d3.interpolatePlasma,
"Magma": d3.interpolateMagma,
"Inferno": d3.interpolateInferno,
"Viridis": d3.interpolateViridis,
"Turbo": d3.interpolateTurbo,
};
const SIZES = {
"small": [800, 600],
"medium": [1300, 750],
"large": [1900, 900],
"x-large": [2800, 1700],
"xx-large": [3600, 2000],
};
const fillOpacity = document.cookie.includes("sist") ? 0.9 : 0.6;
const uids = {};
function uid(name) {
let id = uids[name] || 0;
uids[name] = id + 1;
return name + id;
}
const burrow = function (table, addSelfDir) {
const root = {};
table.forEach(row => {
let layer = root;
row.taxonomy.forEach(key => {
layer[key] = key in layer ? layer[key] : {};
layer = layer[key];
});
if (Object.keys(layer).length === 0) {
layer["$size$"] = row.size;
} else if (addSelfDir) {
layer["."] = {
"$size$": row.size,
};
}
});
const descend = function (obj, depth) {
return Object.keys(obj).filter(k => k !== "$size$").map(k => {
const child = {
name: k,
depth: depth,
value: 0,
children: descend(obj[k], depth + 1)
};
if ("$size$" in obj[k]) {
child.value = obj[k]["$size$"];
}
return child;
});
};
return {
name: `[${indexMap[$("#indices").val()]}]`,
children: descend(root, 1),
value: 0,
depth: 0,
}
};
function flatTreemap(data, svg) {
const root = d3.treemap()
.tile(TILING_MODES[CONF.options.treemapTiling])
.size([width, height])
.padding(1)
.round(true)(
d3.hierarchy(data)
.sum(d => d.value)
.sort((a, b) => b.value - a.value)
);
const leaf = svg.selectAll("g")
.data(root.leaves())
.join("g")
.attr("transform", d => `translate(${d.x0},${d.y0})`);
leaf.append("title")
.text(d => `${d.ancestors().reverse().map(d => d.data.name).join("/")}\n${humanFileSize(d.value)}`);
leaf.append("rect")
.attr("id", d => (d.leafUid = uid("leaf")))
.attr("fill", d => {
while (d.depth > CONF.options.treemapGroupingDepth) d = d.parent;
return ordinalColor(d.data.name);
})
.attr("fill-opacity", fillOpacity)
.attr("width", d => d.x1 - d.x0)
.attr("height", d => d.y1 - d.y0);
leaf.append("clipPath")
.attr("id", d => (d.clipUid = uid("clip")))
.append("use")
.attr("href", d => `#${d.leafUid}`);
leaf.append("text")
.attr("clip-path", d => `url(#${d.clipUid})`)
.selectAll("tspan")
.data(d => {
if (d.data.name === ".") {
d = d.parent;
}
return [d.data.name, humanFileSize(d.value)]
})
.join("tspan")
.attr("x", 2)
.attr("y", (d, i, nodes) => `${i === 0 ? 1.1 : 2.3}em`)
.text(d => d);
}
function cascade(root, offset) {
const x = new Map;
const y = new Map;
return root.eachAfter(d => {
if (d.children && d.children.length !== 0) {
x.set(d, 1 + d3.max(d.children, c => c.x1 === d.x1 - offset ? x.get(c) : NaN));
y.set(d, 1 + d3.max(d.children, c => c.y1 === d.y1 - offset ? y.get(c) : NaN));
} else {
x.set(d, 0);
y.set(d, 0);
}
}).eachBefore(d => {
d.x1 -= 2 * offset * x.get(d);
d.y1 -= 2 * offset * y.get(d);
});
}
function cascadeTreemap(data, svg) {
const root = cascade(
d3.treemap()
.size([width, height])
.tile(TILING_MODES[CONF.options.treemapTiling])
.paddingOuter(3)
.paddingTop(16)
.paddingInner(1)
.round(true)(
d3.hierarchy(data)
.sum(d => d.value)
.sort((a, b) => b.value - a.value)
),
3 // treemap.paddingOuter
);
const maxDepth = Math.max(...root.descendants().map(d => d.depth));
const color = d3.scaleSequential([maxDepth, -1], COLORS[CONF.options.treemapColor]);
svg.append("filter")
.attr("id", "shadow")
.append("feDropShadow")
.attr("flood-opacity", 0.3)
.attr("dx", 0)
.attr("stdDeviation", 3);
const node = svg.selectAll("g")
.data(
d3.nest()
.key(d => d.depth).sortKeys(d3.ascending)
.entries(root.descendants())
)
.join("g")
.attr("filter", "url(#shadow)")
.selectAll("g")
.data(d => d.values)
.join("g")
.attr("transform", d => `translate(${d.x0},${d.y0})`);
node.append("title")
.text(d => `${d.ancestors().reverse().splice(1).map(d => d.data.name).join("/")}\n${humanFileSize(d.value)}`);
node.append("rect")
.attr("id", d => (d.nodeUid = uid("node")))
.attr("fill", d => color(d.depth))
.attr("width", d => d.x1 - d.x0)
.attr("height", d => d.y1 - d.y0);
node.append("clipPath")
.attr("id", d => (d.clipUid = uid("clip")))
.append("use")
.attr("href", d => `#${d.nodeUid}`);
node.append("text")
.attr("fill", d => d3.hsl(color(d.depth)).l > .5 ? "#333" : "#eee")
.attr("clip-path", d => `url(#${d.clipUid})`)
.selectAll("tspan")
.data(d => [d.data.name, humanFileSize(d.value)])
.join("tspan")
.text(d => d);
node.filter(d => d.children).selectAll("tspan")
.attr("dx", 3)
.attr("y", 13);
node.filter(d => !d.children).selectAll("tspan")
.attr("x", 3)
.attr("y", (d, i, nodes) => `${i === 0 ? 1.1 : 2.3}em`);
}
function mimeBarSize(data, svg) {
const margin = {
top: 50,
right: 0,
bottom: 10,
left: Math.max(
d3.max(data.sort((a, b) => b.count - a.count).slice(0, 15), d => d.mime.length) * 6,
d3.max(data.sort((a, b) => b.size - a.size).slice(0, 15), d => d.mime.length) * 6,
)
};
data.forEach(d => {
d.name = d.mime;
d.value = Number(d.size);
});
data = data.sort((a, b) => b.value - a.value).slice(0, 15);
const width = 550;
const height = Math.ceil((data.length + 0.1) * barHeight) + margin.top + margin.bottom;
svg.selectAll("*").remove();
svg.attr("viewBox", [0, 0, width, height]);
const y = d3.scaleBand()
.domain(d3.range(data.length))
.rangeRound([margin.top, height - margin.bottom]);
const x = d3.scaleLinear()
.domain([0, d3.max(data, d => d.value)])
.range([margin.left, width - margin.right]);
svg.append("g")
.attr("fill-opacity", fillOpacity)
.selectAll("rect")
.data(data)
.join("rect")
.attr("fill", d => ordinalColor(d.name))
.attr("x", x(0))
.attr("y", (d, i) => y(i))
.attr("width", d => x(d.value) - x(0))
.attr("height", y.bandwidth())
.append("title")
.text(d => formatSI(d.value));
svg.append("g")
.attr("transform", `translate(0,${margin.top})`)
.call(d3.axisTop(x).ticks(width / 80, data.format).tickFormat(formatSI))
.call(g => g.select(".domain").remove());
svg.append("g")
.attr("transform", `translate(${margin.left},0)`)
.call(d3.axisLeft(y).tickFormat(i => data[i].name).tickSizeOuter(0));
svg.append("text")
.attr("x", (width / 2))
.attr("y", (margin.top / 2))
.attr("text-anchor", "middle")
.style("font-size", "16px")
.text("Size distribution by MIME type");
}
function mimeBarCount(data, svg) {
const margin = {
top: 50,
right: 0,
bottom: 10,
left: Math.max(
d3.max(data.sort((a, b) => b.count - a.count).slice(0, 15), d => d.mime.length) * 6,
d3.max(data.sort((a, b) => b.size - a.size).slice(0, 15), d => d.mime.length) * 6,
)
};
data.forEach(d => {
d.name = d.mime;
d.value = Number(d.count);
});
data = data.sort((a, b) => b.value - a.value).slice(0, 15);
const width = 550;
const height = Math.ceil((data.length + 0.1) * barHeight) + margin.top + margin.bottom;
svg.selectAll("*").remove();
svg.attr("viewBox", [0, 0, width, height]);
const y = d3.scaleBand()
.domain(d3.range(data.length))
.rangeRound([margin.top, height - margin.bottom]);
const x = d3.scaleLinear()
.domain([0, d3.max(data, d => d.value)])
.range([margin.left, width - margin.right]);
svg.append("g")
.attr("fill-opacity", fillOpacity)
.selectAll("rect")
.data(data)
.join("rect")
.attr("fill", d => ordinalColor(d.name))
.attr("x", x(0))
.attr("y", (d, i) => y(i))
.attr("width", d => x(d.value) - x(0))
.attr("height", y.bandwidth())
.append("title")
.text(d => d3.format(",")(d.value));
svg.append("g")
.attr("transform", `translate(0,${margin.top})`)
.call(d3.axisTop(x).ticks(width / 80, data.format).tickFormat(formatSI))
.call(g => g.select(".domain").remove());
svg.append("g")
.attr("transform", `translate(${margin.left},0)`)
.call(d3.axisLeft(y).tickFormat(i => data[i].name).tickSizeOuter(0));
svg.append("text")
.attr("x", (width / 2))
.attr("y", (margin.top / 2))
.attr("text-anchor", "middle")
.style("font-size", "16px")
.text("File count distribution by MIME type");
}
function dateHistogram(data, svg) {
let bins = data.map(d => {
return {
length: Number(d.count),
x0: Number(d.bucket),
x1: Number(d.bucket) + 2629800
}
});
bins.sort((a, b) => a.length - b.length);
const margin = {
top: 50,
right: 20,
bottom: 70,
left: 40
};
const thresh = d3.quantile(bins, 0.9, d => d.length);
bins = bins.filter(d => d.length > thresh);
const width = 550;
const height = 450;
svg.selectAll("*").remove();
svg.attr("viewBox", [0, 0, width, height]);
const y = d3.scaleLinear()
.domain([0, d3.max(bins, d => d.length)]).nice()
.range([height - margin.bottom, margin.top]);
const x = d3.scaleLinear()
.domain(d3.extent(bins, d => d.x0)).nice()
.range([margin.left, width - margin.right]);
svg.append("g")
.attr("fill", "steelblue")
.selectAll("rect")
.data(bins)
.join("rect")
.attr("x", d => x(d.x0) + 1)
.attr("width", d => Math.max(1, x(d.x1) - x(d.x0) - 1))
.attr("y", d => y(d.length))
.attr("height", d => y(0) - y(d.length))
.call(g => g
.append("title")
.text(d => d.length)
);
svg.append("g")
.attr("transform", `translate(0,${height - margin.bottom})`)
.call(
d3.axisBottom(x)
.ticks(width / 30)
.tickSizeOuter(0)
.tickFormat(t => d3.timeFormat("%Y-%m-%d")(d3.utcParse("%s")(t)))
)
.call(g => g
.selectAll("text")
.style("text-anchor", "end")
.attr("dx", "-.8em")
.attr("dy", ".15em")
.attr("transform", "rotate(-65)")
)
.call(g => g.append("text")
.attr("x", width - margin.right)
.attr("y", -4)
.attr("fill", "currentColor")
.attr("font-weight", "bold")
.attr("text-anchor", "end")
.text("mtime")
);
svg.append("g")
.attr("transform", `translate(${margin.left},0)`)
.call(
d3.axisLeft(y)
.ticks(height / 40)
.tickFormat(t => formatSI(t))
)
.call(g => g.select(".domain").remove())
.call(g => g.select(".tick:last-of-type text").clone()
.attr("x", 4)
.attr("text-anchor", "start")
.attr("font-weight", "bold")
.text("File count"));
svg.append("text")
.attr("x", (width / 2))
.attr("y", (margin.top / 2))
.attr("text-anchor", "middle")
.style("font-size", "16px")
.text("File modification time distribution");
}
function sizeHistogram(data, svg) {
let bins = data.map(d => {
return {
length: Number(d.count),
x0: Number(d.bucket),
x1: Number(d.bucket) + (5 * 1024 * 1024)
}
});
bins = bins.sort((a, b) => b.length - a.length).slice(0, 25);
const margin = {
top: 50,
right: 20,
bottom: 70,
left: 40
};
const width = 550;
const height = 450;
svg.selectAll("*").remove();
svg.attr("viewBox", [0, 0, width, height]);
const y = d3.scaleLinear()
.domain([0, d3.max(bins, d => d.length)])
.range([height - margin.bottom, margin.top]);
const x = d3.scaleLinear()
.domain(d3.extent(bins, d => d.x0)).nice()
.range([margin.left, width - margin.right]);
svg.append("g")
.attr("fill", "steelblue")
.selectAll("rect")
.data(bins)
.join("rect")
.attr("x", d => x(d.x0) + 1)
.attr("width", d => Math.max(1, x(d.x1) - x(d.x0) - 1))
.attr("y", d => y(d.length))
.attr("height", d => y(0) - y(d.length))
.call(g => g
.append("title")
.text(d => d.length)
);
svg.append("g")
.attr("transform", `translate(0,${height - margin.bottom})`)
.call(
d3.axisBottom(x)
.ticks(width / 30)
.tickSizeOuter(0)
.tickFormat(formatSI)
)
.call(g => g
.selectAll("text")
.style("text-anchor", "end")
.attr("dx", "-.8em")
.attr("dy", ".15em")
.attr("transform", "rotate(-65)")
)
.call(g => g.append("text")
.attr("x", width - margin.right)
.attr("y", -4)
.attr("fill", "currentColor")
.attr("font-weight", "bold")
.attr("text-anchor", "end")
.text("size (bytes)")
);
svg.append("g")
.attr("transform", `translate(${margin.left},0)`)
.call(
d3.axisLeft(y)
.ticks(height / 40)
.tickFormat(t => formatSI(t))
)
.call(g => g.select(".domain").remove())
.call(g => g.select(".tick:last-of-type text").clone()
.attr("x", 4)
.attr("text-anchor", "start")
.attr("font-weight", "bold")
.text("File count"));
svg.append("text")
.attr("x", (width / 2))
.attr("y", (margin.top / 2))
.attr("text-anchor", "middle")
.style("font-size", "16px")
.text("File size distribution");
}
function updateStats() {
width = SIZES[CONF.options.treemapSize][0];
height = SIZES[CONF.options.treemapSize][1];
const treemapSvg = d3.select("#treemap");
const mimeSvgSize = d3.select("#agg_mime_size");
const mimeSvgCount = d3.select("#agg_mime_count");
const dateHistogramSvg = d3.select("#date_histogram");
const sizeHistogramSvg = d3.select("#size_histogram");
const indexId = $("#indices").val();
d3.csv(`/s/${indexId}/1`).then(tabularData => {
tabularData.forEach(row => {
row.taxonomy = row.path.split("/");
row.size = Number(row.size);
});
if (CONF.options.treemapType === "cascaded") {
const data = burrow(tabularData, false);
cascadeTreemap(data, treemapSvg);
} else {
const data = burrow(tabularData.sort((a, b) => b.taxonomy.length - a.taxonomy.length), true);
flatTreemap(data, treemapSvg);
}
});
d3.csv(`/s/${indexId}/2`).then(tabularData => {
mimeBarSize(tabularData.slice(), mimeSvgSize);
mimeBarCount(tabularData.slice(), mimeSvgCount);
});
d3.csv(`/s/${indexId}/3`).then(tabularData => {
sizeHistogram(tabularData, sizeHistogramSvg);
});
d3.csv(`/s/${indexId}/4`).then(tabularData => {
dateHistogram(tabularData, dateHistogramSvg);
});
treemapSvg.selectAll("*").remove();
treemapSvg.attr("viewBox", [0, 0, width, height])
.attr("xmlns", "http://www.w3.org/2000/svg")
.attr("xmlns:xlink", "http://www.w3.org/1999/xlink")
.attr("version", "1.1")
.style("overflow", "visible")
.style("font", "10px sans-serif");
}
window.onload = function () {
CONF.load();
$.jsonPost("i").then(resp => {
const select = $("#indices");
const urlIndices = (new URLSearchParams(location.search)).get("i");
resp["indices"].forEach(idx => {
indexMap[idx.id] = idx.name;
select.append($("<option>")
.attr("value", idx.id)
.append(idx.name));
if (urlIndices && urlIndices.split(",").indexOf(idx.name) !== -1) {
select.select(idx.name);
}
});
updateStats();
});
};
function fullScreen(selector) {
const card = document.getElementById(selector);
card.classList.toggle("full-screen");
}
function exportTreemap() {
domtoimage.toBlob(document.getElementById("treemap"), {width: width, height: height})
.then(function (blob) {
let a = document.createElement("a");
let url = URL.createObjectURL(blob);
a.href = url;
a.download = `${indexMap[$("#indices").val()]}_treemap.png`;
document.body.appendChild(a);
a.click();
setTimeout(function() {
document.body.removeChild(a);
window.URL.revokeObjectURL(url);
}, 0);
});
}
</script>
</body>
</html>

353
src/stats.c Normal file
View File

@@ -0,0 +1,353 @@
#include "sist.h"
#include "io/serialize.h"
#include "ctx.h"
#include <glib.h>
static GHashTable *FlatTree;
static GHashTable *BufferTable;
static GHashTable *AggMime;
static GHashTable *AggSize;
static GHashTable *AggDate;
#define SIZE_BUCKET (long)(5 * 1024 * 1024)
#define DATE_BUCKET (long)(2629800)
static long TotalSize = 0;
static long DocumentCount = 0;
typedef struct {
long size;
long count;
} agg_t;
void fill_tables(cJSON *document, UNUSED(const char uuid_str[UUID_STR_LEN])) {
if (cJSON_GetObjectItem(document, "parent") != NULL) {
return;
}
const char *json_path = cJSON_GetObjectItem(document, "path")->valuestring;
char *path = malloc(strlen(json_path) + 1);
strcpy(path, json_path);
const char *json_mime = cJSON_GetObjectItem(document, "mime")->valuestring;
char *mime;
if (json_mime == NULL) {
mime = NULL;
} else {
mime = malloc(strlen(json_mime) + 1);
strcpy(mime, json_mime);
}
long size = (long) cJSON_GetObjectItem(document, "size")->valuedouble;
int mtime = cJSON_GetObjectItem(document, "mtime")->valueint;
// treemap
void *existing_path = g_hash_table_lookup(FlatTree, path);
if (existing_path == NULL) {
g_hash_table_insert(FlatTree, path, (gpointer) size);
} else {
g_hash_table_replace(FlatTree, path, (gpointer) ((long) existing_path + size));
}
// mime agg
if (mime != NULL) {
agg_t *orig_agg = g_hash_table_lookup(AggMime, mime);
if (orig_agg == NULL) {
agg_t *agg = malloc(sizeof(agg_t));
agg->size = size;
agg->count = 1;
g_hash_table_insert(AggMime, mime, agg);
} else {
orig_agg->size += size;
orig_agg->count += 1;
free(mime);
}
}
// size agg
long size_bucket = size - (size % SIZE_BUCKET);
agg_t *orig_agg = g_hash_table_lookup(AggSize, (gpointer) size_bucket);
if (orig_agg == NULL) {
agg_t *agg = malloc(sizeof(agg_t));
agg->size = size;
agg->count = 1;
g_hash_table_insert(AggSize, (gpointer) size_bucket, agg);
} else {
orig_agg->count += 1;
orig_agg->size += size;
}
// date agg
long date_bucket = mtime - (mtime % DATE_BUCKET);
orig_agg = g_hash_table_lookup(AggDate, (gpointer) date_bucket);
if (orig_agg == NULL) {
agg_t *agg = malloc(sizeof(agg_t));
agg->size = size;
agg->count = 1;
g_hash_table_insert(AggDate, (gpointer) date_bucket, agg);
} else {
orig_agg->count += 1;
orig_agg->size += size;
}
TotalSize += size;
DocumentCount += 1;
}
void read_index_into_tables(index_t *index) {
DIR *dir = opendir(index->path);
struct dirent *de;
while ((de = readdir(dir)) != NULL) {
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
char file_path[PATH_MAX];
snprintf(file_path, PATH_MAX, "%s/%s", index->path, de->d_name);
read_index(file_path, index->desc.uuid, index->desc.type, fill_tables);
}
}
closedir(dir);
}
static size_t rfind(const char *str, int c) {
for (int i = (int)strlen(str); i >= 0; i--) {
if (str[i] == c) {
return i;
}
}
return -1;
}
int merge_up(double thresh) {
long min_size = (long) (thresh * (double) TotalSize);
int count = 0;
GHashTableIter iter;
g_hash_table_iter_init(&iter, FlatTree);
void *key;
void *value;
while (g_hash_table_iter_next(&iter, &key, &value)) {
long size = (long) value;
if (size < min_size) {
int stop = rfind(key, '/');
if (stop == -1) {
stop = 0;
}
char *parent = malloc(stop + 1);
strncpy(parent, key, stop);
*(parent + stop) = '\0';
void *existing_parent = g_hash_table_lookup(FlatTree, parent);
if (existing_parent == NULL) {
void *existing_parent2_key;
void *existing_parent2_val;
int found = g_hash_table_lookup_extended(BufferTable, parent, &existing_parent2_key,
&existing_parent2_val);
if (!found) {
g_hash_table_insert(BufferTable, parent, value);
} else {
g_hash_table_replace(BufferTable, parent, (gpointer) ((long) existing_parent2_val + size));
free(existing_parent2_key);
}
} else {
g_hash_table_replace(FlatTree, parent, (gpointer) ((long) existing_parent + size));
}
g_hash_table_iter_remove(&iter);
count += 1;
}
}
g_hash_table_iter_init(&iter, BufferTable);
while (g_hash_table_iter_next(&iter, &key, &value)) {
g_hash_table_insert(FlatTree, key, value);
g_hash_table_iter_remove(&iter);
}
int size = g_hash_table_size(FlatTree);
LOG_DEBUGF("stats.c", "Merge up iteration (%d merged, %d in tree)", count, size)
return count;
}
/**
* Assumes out is at at least PATH_MAX *4
*/
void csv_escape(char *dst, const char *str) {
const char *ptr = str;
char *out = dst;
if (rfind(str, ',') == -1 && rfind(str, '"') == -1) {
strcpy(dst, str);
return;
}
*out++ = '"';
char c;
while ((c = *ptr++) != 0) {
if (c == '"') {
*out++ = '"';
*out++ = '"';
} else {
*out++ = c;
}
}
*out++ = '"';
*out = '\0';
}
int open_or_exit(const char *path) {
int fd = open(path, O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR);
if (fd < 0) {
LOG_FATALF("stats.c", "Error while creating file: %s [%d]\n", strerror(errno), errno)
}
return fd;
}
#define TREEMAP_CSV_HEADER "path,size"
#define MIME_AGG_CSV_HEADER "mime,size,count"
#define SIZE_AGG_CSV_HEADER "bucket,size,count"
#define DATE_AGG_CSV_HEADER "bucket,size,count"
void write_treemap_csv(double thresh, const char *out_path) {
void *key;
void *value;
long min_size = (long) (thresh * (double) TotalSize);
int fd = open_or_exit(out_path);
int ret = write(fd, TREEMAP_CSV_HEADER, sizeof(TREEMAP_CSV_HEADER) - 1);
if (ret == -1) {
LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
}
GHashTableIter iter;
g_hash_table_iter_init(&iter, FlatTree);
while (g_hash_table_iter_next(&iter, &key, &value)) {
long size = (long) value;
if (size >= min_size) {
char path_buf[PATH_MAX * 4];
char buf[PATH_MAX * 4 + 16];
csv_escape(path_buf, key);
size_t written = sprintf(buf, "\n%s,%ld", path_buf, (long) value);
ret = write(fd, buf, written);
if (ret == -1) {
LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
}
}
}
close(fd);
}
void write_agg_csv_str(const char *out_path, const char *header, GHashTable *table) {
void *key;
void *value;
char buf[4096];
int fd = open_or_exit(out_path);
int ret = write(fd, header, strlen(header));
if (ret == -1) {
LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
}
GHashTableIter iter;
g_hash_table_iter_init(&iter, table);
while (g_hash_table_iter_next(&iter, &key, &value)) {
agg_t *agg = value;
size_t written = sprintf(buf, "\n%s,%ld,%ld", (const char*)key, agg->size, agg->count);
ret = write(fd, buf, written);
if (ret == -1) {
LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
}
}
close(fd);
}
void write_agg_csv_long(const char *out_path, const char *header, GHashTable *table) {
void *key;
void *value;
char buf[4096];
int fd = open_or_exit(out_path);
int ret = write(fd, header, strlen(header));
if (ret == -1) {
LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
}
GHashTableIter iter;
g_hash_table_iter_init(&iter, table);
while (g_hash_table_iter_next(&iter, &key, &value)) {
agg_t *agg = value;
size_t written = sprintf(buf, "\n%ld,%ld,%ld", (long)key, agg->size, agg->count);
ret = write(fd, buf, written);
if (ret == -1) {
LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
}
}
close(fd);
}
int generate_stats(index_t *index, const double threshold, const char *out_prefix) {
FlatTree = g_hash_table_new_full(g_str_hash, g_str_equal, free, NULL);
BufferTable = g_hash_table_new(g_str_hash, g_str_equal);
AggMime = g_hash_table_new_full(g_str_hash, g_str_equal, free, free);
AggSize = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, free);
AggDate = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, free);
LOG_INFO("stats.c", "Generating stats...")
read_index_into_tables(index);
LOG_DEBUG("stats.c", "Read index into tables")
LOG_DEBUGF("stats.c", "Total size is %ld", TotalSize)
LOG_DEBUGF("stats.c", "Document count is %ld", DocumentCount)
LOG_DEBUGF("stats.c", "Merging small directories upwards with a threshold of %f%%", threshold * 100)
while (merge_up(threshold) > 100) {}
char tmp[PATH_MAX];
strncpy(tmp, out_prefix, sizeof(tmp));
strcat(tmp, "treemap.csv");
write_treemap_csv(threshold, tmp);
strncpy(tmp, out_prefix, sizeof(tmp));
strcat(tmp, "mime_agg.csv");
write_agg_csv_str(tmp, MIME_AGG_CSV_HEADER, AggMime);
strncpy(tmp, out_prefix, sizeof(tmp));
strcat(tmp, "size_agg.csv");
write_agg_csv_long(tmp, SIZE_AGG_CSV_HEADER, AggSize);
strncpy(tmp, out_prefix, sizeof(tmp));
strcat(tmp, "date_agg.csv");
write_agg_csv_long(tmp, DATE_AGG_CSV_HEADER, AggDate);
g_hash_table_remove_all(FlatTree);
g_hash_table_destroy(FlatTree);
g_hash_table_destroy(BufferTable);
g_hash_table_remove_all(AggMime);
g_hash_table_destroy(AggMime);
g_hash_table_remove_all(AggSize);
g_hash_table_destroy(AggSize);
g_hash_table_remove_all(AggDate);
g_hash_table_destroy(AggDate);
return 0;
}

6
src/stats.h Normal file
View File

@@ -0,0 +1,6 @@
#ifndef SIST2_STATS_H
#define SIST2_STATS_H
int generate_stats(index_t *index, double threshold, const char* out_prefix);
#endif

View File

@@ -26,10 +26,11 @@ dyn_buffer_t url_escape(char *str) {
}
char *abspath(const char *path) {
wordexp_t w;
wordexp(path, &w, 0);
char *abs = realpath(w.we_wordv[0], NULL);
char *expanded = expandpath(path);
char *abs = realpath(expanded, NULL);
free(expanded);
if (abs == NULL) {
return NULL;
}
@@ -38,16 +39,46 @@ char *abspath(const char *path) {
strcat(abs, "/");
}
wordfree(&w);
return abs;
}
char *expandpath(const char *path) {
wordexp_t w;
wordexp(path, &w, 0);
void shell_escape(char *dst, const char *src) {
const char *ptr = src;
char *out = dst;
while ((*ptr)) {
char c = *ptr++;
char *expanded = malloc(strlen(w.we_wordv[0]) + 2);
strcpy(expanded, w.we_wordv[0]);
if (c == '&' || c == '\n' || c == '|' || c == ';' || c == '<' ||
c == '>' || c == '(' || c == ')' || c == '{' || c == '}') {
*out++ = '\\';
}
*out++ = c;
}
*out = 0;
}
char *expandpath(const char *path) {
char tmp[PATH_MAX * 2];
shell_escape(tmp, path);
wordexp_t w;
wordexp(tmp, &w, 0);
if (w.we_wordv == NULL) {
return NULL;
}
*tmp = '\0';
for (int i = 0; i < w.we_wordc; i++) {
strcat(tmp, w.we_wordv[i]);
if (i != w.we_wordc - 1) {
strcat(tmp, " ");
}
}
char *expanded = malloc(strlen(tmp) + 2);
strcpy(expanded, tmp);
strcat(expanded, "/");
wordfree(&w);
@@ -123,4 +154,92 @@ const char *find_file_in_paths(const char *paths[], const char *filename) {
return NULL;
}
#define ESCAPE_CHAR ']'
void str_escape(char *dst, const char *str) {
const size_t len = strlen(str);
char buf[16384];
memset(buf + len, 0, 8);
strcpy(buf, str);
char *cur = dst;
const char *ptr = buf;
const char *oldPtr = ptr;
utf8_int32_t c;
char tmp[16];
do {
ptr = (char *) utf8codepoint(ptr, &c);
*(int *) tmp = 0x00000000;
size_t code_len = (ptr - oldPtr);
memcpy(tmp, oldPtr, code_len);
oldPtr = ptr;
if (!utf8_validchr2(tmp)) {
for (int i = 0; i < code_len; i++) {
if (tmp[i] == 0) {
break;
}
cur += sprintf(cur, "%c%02X", ESCAPE_CHAR, (unsigned char) tmp[i]);
}
continue;
}
if (c == ESCAPE_CHAR) {
*cur++ = ESCAPE_CHAR;
*cur++ = ESCAPE_CHAR;
continue;
}
if (((utf8_int32_t) 0xffffff80 & c) == 0) {
*(cur++) = (char) c;
} else if (((utf8_int32_t) 0xfffff800 & c) == 0) {
*(cur++) = 0xc0 | (char) (c >> 6);
*(cur++) = 0x80 | (char) (c & 0x3f);
} else if (((utf8_int32_t) 0xffff0000 & c) == 0) {
*(cur++) = 0xe0 | (char) (c >> 12);
*(cur++) = 0x80 | (char) ((c >> 6) & 0x3f);
*(cur++) = 0x80 | (char) (c & 0x3f);
} else {
*(cur++) = 0xf0 | (char) (c >> 18);
*(cur++) = 0x80 | (char) ((c >> 12) & 0x3f);
*(cur++) = 0x80 | (char) ((c >> 6) & 0x3f);
*(cur++) = 0x80 | (char) (c & 0x3f);
}
} while (*ptr != '\0');
*cur = '\0';
}
void str_unescape(char *dst, const char *str) {
char *cur = dst;
const char *ptr = str;
char tmp[3];
tmp[2] = '\0';
while (*ptr != 0) {
char c = *ptr++;
if (c == ESCAPE_CHAR) {
char next = *ptr;
if (next == ESCAPE_CHAR) {
*cur++ = (char) c;
ptr += 1;
} else {
tmp[0] = *(ptr);
tmp[1] = *(ptr + 1);
*cur++ = (char) strtol(tmp, NULL, 16);
ptr += 2;
}
} else {
*cur++ = c;
}
}
*cur = '\0';
}

View File

@@ -43,4 +43,9 @@ static int incremental_mark_file_for_copy(GHashTable *table, unsigned long inode
const char *find_file_in_paths(const char **paths, const char *filename);
void str_escape(char *dst, const char *str);
void str_unescape(char *dst, const char *str);
#endif

View File

@@ -59,12 +59,71 @@ void search_index(struct mg_connection *nc) {
nc->flags |= MG_F_SEND_AND_CLOSE;
}
void javascript(struct mg_connection *nc) {
void stats(struct mg_connection *nc) {
send_response_line(nc, 200, sizeof(stats_html), "Content-Type: text/html");
mg_send(nc, stats_html, sizeof(stats_html));
nc->flags |= MG_F_SEND_AND_CLOSE;
}
void stats_files(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) {
if (path->len != UUID_STR_LEN + 4) {
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
char arg_uuid[UUID_STR_LEN];
memcpy(arg_uuid, hm->uri.p + 3, UUID_STR_LEN);
*(arg_uuid + UUID_STR_LEN - 1) = '\0';
index_t *index = get_index_by_id(arg_uuid);
if (index == NULL) {
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
const char *file;
switch (atoi(hm->uri.p + 3 + UUID_STR_LEN)) {
case 1:
file = "treemap.csv";
break;
case 2:
file = "mime_agg.csv";
break;
case 3:
file = "size_agg.csv";
break;
case 4:
file = "date_agg.csv";
break;
default:
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
char disposition[8196];
snprintf(disposition, sizeof(disposition), "Content-Disposition: inline; filename=\"%s\"", file);
char full_path[PATH_MAX];
strcpy(full_path, index->path);
strcat(full_path, file);
mg_http_serve_file(nc, hm, full_path, mg_mk_str("text/csv"), mg_mk_str(disposition));
nc->flags |= MG_F_SEND_AND_CLOSE;
}
void javascript_lib(struct mg_connection *nc) {
send_response_line(nc, 200, sizeof(bundle_js), "Content-Type: application/javascript");
mg_send(nc, bundle_js, sizeof(bundle_js));
nc->flags |= MG_F_SEND_AND_CLOSE;
}
void javascript_search(struct mg_connection *nc) {
send_response_line(nc, 200, sizeof(search_js), "Content-Type: application/javascript");
mg_send(nc, search_js, sizeof(search_js));
nc->flags |= MG_F_SEND_AND_CLOSE;
}
int client_requested_dark_theme(struct http_message *hm) {
struct mg_str *cookie_header = mg_get_http_header(hm, "cookie");
if (cookie_header == NULL) {
@@ -174,12 +233,19 @@ int serve_file_from_url(cJSON *json, index_t *idx, struct mg_connection *nc) {
const char *path = cJSON_GetObjectItem(json, "path")->valuestring;
const char *name = cJSON_GetObjectItem(json, "name")->valuestring;
char name_unescaped[PATH_MAX * 3];
str_unescape(name_unescaped, name);
char path_unescaped[PATH_MAX * 3];
str_unescape(path_unescaped, path);
const char *ext = cJSON_GetObjectItem(json, "extension")->valuestring;
char url[8196];
snprintf(url, sizeof(url),
"%s%s/%s%s%s",
idx->desc.rewrite_url, path, name, strlen(ext) == 0 ? "" : ".", ext);
idx->desc.rewrite_url, path_unescaped, name_unescaped, strlen(ext) == 0 ? "" : ".", ext);
dyn_buffer_t encoded = url_escape(url);
mg_http_send_redirect(
@@ -198,10 +264,16 @@ void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, s
const char *ext = cJSON_GetObjectItem(json, "extension")->valuestring;
const char *mime = cJSON_GetObjectItem(json, "mime")->valuestring;
char name_unescaped[PATH_MAX * 3];
str_unescape(name_unescaped, name);
char path_unescaped[PATH_MAX * 3];
str_unescape(path_unescaped, path);
char full_path[PATH_MAX];
snprintf(full_path, PATH_MAX, "%s%s%s%s%s%s",
idx->desc.root, path, strlen(path) == 0 ? "" : "/",
name, strlen(ext) == 0 ? "" : ".", ext);
idx->desc.root, path_unescaped, strlen(path_unescaped) == 0 ? "" : "/",
name_unescaped, strlen(ext) == 0 ? "" : ".", ext);
LOG_DEBUGF("serve.c", "Serving file from disk: %s", full_path)
@@ -374,8 +446,12 @@ static void ev_router(struct mg_connection *nc, int ev, void *p) {
search_index(nc);
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/css")))) {
style(nc, hm);
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/js")))) {
javascript(nc);
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/stats")))) {
stats(nc);
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/jslib")))) {
javascript_lib(nc);
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/jssearch")))) {
javascript_search(nc);
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/img/sprite-skin-flat.png")))) {
img_sprite_skin_flat(nc, hm);
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/es")))) {
@@ -388,6 +464,8 @@ static void ev_router(struct mg_connection *nc, int ev, void *p) {
file(nc, hm, &path);
} else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/t/")))) {
thumbnail(nc, hm, &path);
} else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/s/")))) {
stats_files(nc, hm, &path);
} else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/d/")))) {
document_info(nc, hm, &path);
} else {

File diff suppressed because one or more lines are too long