import sqlite3
import orjson as json
import os
import string
from hashlib import md5
import random
from tqdm import tqdm

schema = """
CREATE TABLE thumbnail (
  id TEXT NOT NULL CHECK (
    length(id) = 32
  ), 
  num INTEGER NOT NULL, 
  data BLOB NOT NULL, 
  PRIMARY KEY(id, num)
) WITHOUT ROWID;
CREATE TABLE version (
  id INTEGER PRIMARY KEY AUTOINCREMENT, 
  date TEXT NOT NULL DEFAULT (CURRENT_TIMESTAMP)
);
CREATE TABLE document (
  id TEXT PRIMARY KEY NOT NULL CHECK (
    length(id) = 32
  ), 
  marked INTEGER NOT NULL DEFAULT (1), 
  version INTEGER NOT NULL REFERENCES version(id), 
  mtime INTEGER NOT NULL, 
  size INTEGER NOT NULL, 
  json_data TEXT NOT NULL CHECK (
    json_valid(json_data)
  )
);
CREATE TABLE delete_list (
  id TEXT PRIMARY KEY CHECK (
    length(id) = 32
  )
) WITHOUT ROWID;
CREATE TABLE tag (
  id TEXT NOT NULL, 
  tag TEXT NOT NULL, 
  PRIMARY KEY (id, tag)
);
CREATE TABLE document_sidecar (
  id TEXT PRIMARY KEY NOT NULL, json_data TEXT NOT NULL
) WITHOUT ROWID;
CREATE TABLE descriptor (
  id TEXT NOT NULL, version_major INTEGER NOT NULL, 
  version_minor INTEGER NOT NULL, version_patch INTEGER NOT NULL, 
  root TEXT NOT NULL, name TEXT NOT NULL, 
  rewrite_url TEXT, timestamp INTEGER NOT NULL
);
CREATE TABLE stats_treemap (
  path TEXT NOT NULL, size INTEGER NOT NULL
);
CREATE TABLE stats_size_agg (
  bucket INTEGER NOT NULL, count INTEGER NOT NULL
);
CREATE TABLE stats_date_agg (
  bucket INTEGER NOT NULL, count INTEGER NOT NULL
);
CREATE TABLE stats_mime_agg (
  mime TEXT NOT NULL, size INTEGER NOT NULL, 
  count INTEGER NOT NULL
);
CREATE TABLE embedding (
  id TEXT REFERENCES document(id), 
  model_id INTEGER NOT NULL references model(id), 
  start INTEGER NOT NULL, 
  end INTEGER, 
  embedding BLOB NOT NULL, 
  PRIMARY KEY (id, model_id, start)
);
CREATE TABLE model (
  id INTEGER PRIMARY KEY, 
  name TEXT NOT NULL UNIQUE CHECK (
    length(name) < 16
  ), 
  url TEXT, 
  path TEXT NOT NULL UNIQUE, 
  size INTEGER NOT NULL, 
  type TEXT NOT NULL CHECK (
    type IN ('flat', 'nested')
  )
);
"""

content = "".join(random.choices(string.ascii_letters, k=500))


def gen_document():
    return [
        md5(random.randbytes(8)).hexdigest(),
        json.dumps({
            "content": content,
            "mime": "image/jpeg",
            "extension": "jpeg",
            "name": "test",
            "path": "",
        })
    ]


if __name__ == "__main__":
    DB_NAME = "big_index.sist2"
    SIZE = 30_000_000

    os.remove(DB_NAME)
    db = sqlite3.connect(DB_NAME)
    db.executescript(schema)

    db.executescript("""
    PRAGMA journal_mode = OFF;
    PRAGMA synchronous = 0;
    """)

    for _ in tqdm(range(SIZE), total=SIZE):
        db.execute(
            "INSERT INTO document (id, version, mtime, size, json_data) VALUES (?, 1, 1000000, 10000, ?)",
            gen_document()
        )

    # 1. Enable rowid from document
    # 2. CREATE TABLE marked (
    #     id INTEGER PRIMARY KEY,
    #     marked int
    #    );
    # 3. Set FK for document_sidecar, embedding, tag, thumbnail
    # 4. Toggle FK if debug

    db.commit()