Merge pull request #25 from simon987/docker-wip

Dockerize
This commit is contained in:
simon987 2020-01-22 16:04:51 -05:00 committed by GitHub
commit 853e38e46b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
21 changed files with 248 additions and 248 deletions

2
.gitignore vendored
View File

@ -4,9 +4,9 @@
__pycache__/ __pycache__/
captchas/ captchas/
_stats.json _stats.json
config.py
oddb.log oddb.log
praw.ini praw.ini
env/ env/
worker.json worker.json
search_blacklist.txt search_blacklist.txt
*.iml

11
Dockerfile Normal file
View File

@ -0,0 +1,11 @@
FROM python:3.7
WORKDIR /app
ADD requirements.txt /app/requirements.txt
RUN pip install -r requirements.txt
ENTRYPOINT ["python", "app.py"]
COPY . /app

View File

@ -1,7 +1,5 @@
# OD-Database # OD-Database
[![Build Status](https://ci.simon987.net/buildStatus/icon?job=od-database_qa)](https://ci.simon987.net/job/od-database_qa/)
OD-Database is a web-crawling project that aims to index a very large number of file links and their basic metadata from open directories (misconfigured Apache/Nginx/FTP servers, or more often, mirrors of various public services). OD-Database is a web-crawling project that aims to index a very large number of file links and their basic metadata from open directories (misconfigured Apache/Nginx/FTP servers, or more often, mirrors of various public services).
Each crawler instance fetches tasks from the central server and pushes the result once completed. A single instance can crawl hundreds of websites at the same time (Both FTP and HTTP(S)) and the central server is capable of ingesting thousands of new documents per second. Each crawler instance fetches tasks from the central server and pushes the result once completed. A single instance can crawl hundreds of websites at the same time (Both FTP and HTTP(S)) and the central server is capable of ingesting thousands of new documents per second.
@ -14,82 +12,22 @@ The data is indexed into elasticsearch and made available via the web frontend (
### Contributing ### Contributing
Suggestions/concerns/PRs are welcome Suggestions/concerns/PRs are welcome
## Installation ## Installation (Docker)
Assuming you have Python 3 and git installed:
```bash ```bash
sudo apt install libssl-dev libcurl4-openssl-dev
git clone https://github.com/simon987/od-database git clone https://github.com/simon987/od-database
cd od-database cd od-database
git submodule update --init --recursive docker-compose up
sudo pip3 install -r requirements.txt
``` ```
Create `/config.py` and fill out the parameters. Sample config:
```python
# Leave default values for no CAPTCHAs
CAPTCHA_LOGIN = False
CAPTCHA_SUBMIT = False
CAPTCHA_SEARCH = False
CAPTCHA_EVERY = 10
# Flask secret key for sessions ## Architecture
FLASK_SECRET = ""
RESULTS_PER_PAGE = (25, 50, 100, 250, 500, 1000)
# Allow ftp websites in /submit
SUBMIT_FTP = False
# Allow http(s) websites in /submit
SUBMIT_HTTP = True
# Number of re-crawl tasks to keep in the queue ![diag](high_level_diagram.png)
RECRAWL_POOL_SIZE = 10000
# task_tracker API url
TT_API = "http://localhost:3010"
# task_tracker crawl project id
TT_CRAWL_PROJECT = 3
# task_tracker indexing project id
TT_INDEX_PROJECT = 9
# Number of threads to use for ES indexing
INDEXER_THREADS = 4
# ws_bucket API url
WSB_API = "http://localhost:3020"
# ws_bucket secret
WSB_SECRET = "default_secret"
# ws_bucket data directory
WSB_PATH = "/mnt/data/github.com/simon987/ws_bucket/data"
# od-database PostgreSQL connection string
DB_CONN_STR = "dbname=od-database user=od-database password=xxx"
```
## Running the crawl server ## Running the crawl server
The python crawler that was a part of this project is discontinued, The python crawler that was a part of this project is discontinued,
[the go implementation](https://github.com/terorie/od-database-crawler) is currently in use. [the go implementation](https://github.com/terorie/od-database-crawler) is currently in use.
## Running the web server (debug) ### Configure Elasticsearch
```bash
cd od-database
python3 app.py
```
## Running the web server with Nginx (production)
* Install dependencies:
```bash
sudo apt install build-essential python-dev redis-server uwsgi-plugin-python3
```
* Configure nginx (on Debian 9: `/etc/nginx/sites-enabled/default`):
```nginx
server {
...
include uwsgi_params;
location / {
uwsgi_pass 127.0.0.1:3031;
}
...
}
```
* Configure Elasticsearch
``` ```
PUT _template/default PUT _template/default
{ {
@ -102,9 +40,3 @@ PUT _template/default
"routing_partition_size" : 5 "routing_partition_size" : 5
} }
} }
```
* Start uwsgi:
```bash
uwsgi od-database.ini
```

3
app.py
View File

@ -9,9 +9,8 @@ app = Flask(__name__)
app.secret_key = config.FLASK_SECRET app.secret_key = config.FLASK_SECRET
template_filters.setup_template_filters(app) template_filters.setup_template_filters(app)
views.setup_views(app) views.setup_views(app)
api.setup_api(app) api.setup_api(app)
if __name__ == '__main__': if __name__ == '__main__':
app.run("0.0.0.0", port=12345, threaded=True) app.run("0.0.0.0", port=80, threaded=True)

View File

@ -26,14 +26,13 @@ logger.addHandler(file_handler)
logger.addHandler(StreamHandler(sys.stdout)) logger.addHandler(StreamHandler(sys.stdout))
taskManager = TaskManager() taskManager = TaskManager()
searchEngine = ElasticSearchEngine("od-database") searchEngine = ElasticSearchEngine(config.ES_URL, config.ES_INDEX)
searchEngine.start_stats_scheduler() searchEngine.start_stats_scheduler()
db = Database(config.DB_CONN_STR) db = Database(config.DB_CONN_STR)
redis = r.Redis() redis = r.Redis(host=config.REDIS_HOST, port=config.REDIS_PORT)
def require_role(role: str): def require_role(role: str):
if db.get_user_role(session.get("username", None)) != role: if db.get_user_role(session.get("username", None)) != role:
abort(403) abort(403)

29
config.py Normal file
View File

@ -0,0 +1,29 @@
from os import environ
CAPTCHA_LOGIN = bool(environ.get("CAPTCHA_LOGIN", False))
CAPTCHA_SUBMIT = bool(environ.get("CAPTCHA_SUBMIT", False))
CAPTCHA_SEARCH = bool(environ.get("CAPTCHA_SEARCH", False))
CAPTCHA_EVERY = int(environ.get("CAPTCHA_EVERY", 10))
FLASK_SECRET = environ.get("FLASK_SECRET", "A very secret secret")
RESULTS_PER_PAGE = (12, 25, 50, 100, 250, 500, 1000)
SUBMIT_FTP = bool(environ.get("SUBMIT_FTP", False))
SUBMIT_HTTP = bool(environ.get("SUBMIT_HTTP", True))
TT_API = environ.get("TT_API", "http://localhost:3010")
TT_CRAWL_PROJECT = int(environ.get("TT_CRAWL_PROJECT", 3))
TT_INDEX_PROJECT = int(environ.get("TT_INDEX_PROJECT", 9))
WSB_API = environ.get("WSB_API", "http://localhost:3020")
WSB_SECRET = environ.get("WSB_API", "default_secret")
ES_URL = environ.get("ES_URL", "http://localhost:9200")
ES_INDEX = environ.get("ES_INDEX", "od-database")
REDIS_HOST = environ.get("REDIS_HOST", "localhost")
REDIS_PORT = environ.get("REDIS_PORT", 6379)
DB_CONN_STR = environ.get("DB_CONN_STR", "dbname=od_database user=od_database password=od_database")
RECRAWL_POOL_SIZE = environ.get("RECRAWL_POOL_SIZE", 10000)
INDEXER_THREADS = int(environ.get("INDEXER_THREAD", 3))

101
docker-compose.yml Normal file
View File

@ -0,0 +1,101 @@
version: "2.1"
services:
oddb:
image: simon987/od-database
ports:
- 5020:80
environment:
- "CAPTCHA_LOGIN=True"
- "CAPTCHA_SUBMIT=True"
- "CAPTCHA_SEARCH=True"
- "CAPTCHA_EVERY=10"
- "FLASK_SECRET=changeme"
- "SUBMIT_FTP=False"
- "SUBMIT_HTTP=True"
- "TT_API=http://tt:3010"
- "TT_CRAWL_PROJECT=1"
- "TT_INDEX_PROJECT=2"
- "WSB_API=http://wsb:3020"
- "WSB_SECRET=changeme"
- "REDIS_HOST=oddb_redis"
- "ES_URL=es:9200"
- "DB_CONN_STR=postgres://od_database:changeme@oddb_db/od_database?sslmode=disable"
- "RECRAWL_POOL_SIZE=10000"
- "INDEXER_THREADS=2"
depends_on:
wsb:
condition: service_started
tt:
condition: service_started
oddb_db:
condition: service_healthy
es:
condition: service_healthy
restart: always
oddb_db:
image: postgres
volumes:
- ./oddb_pg_data:/var/lib/postgresql/data
environment:
- "POSTGRES_USER=od_database"
- "POSTGRES_PASSWORD=changeme"
healthcheck:
test: ["CMD-SHELL", "pg_isready -U od_database"]
interval: 5s
timeout: 5s
retries: 5
oddb_redis:
image: redis
wsb:
image: simon987/wsb_bucket
volumes:
- ./wsb_data:/data
environment:
- "WS_BUCKET_SECRET=changeme"
ports:
- 3020:3020
tt_db:
image: postgres
volumes:
- ./tt_pg_data:/var/lib/postgresql/data
environment:
POSTGRES_USER: task_tracker
POSTGRES_PASSWORD: changeme
healthcheck:
test: ["CMD-SHELL", "pg_isready -U task_tracker"]
interval: 5s
timeout: 5s
retries: 5
tt:
image: simon987/task_tracker
volumes:
- ./tt_pg_data:/var/lib/postgresql/data
- ./tt_config.yml:/root/config.yml
ports:
- 3010:80
depends_on:
tt_db:
condition: service_healthy
es:
image: docker.elastic.co/elasticsearch/elasticsearch:7.4.2
environment:
- discovery.type=single-node
- "ES_JAVA_OPTS=-Xms1G -Xmx10G"
volumes:
- ./es_data:/usr/share/elasticsearch/data
healthcheck:
test: ["CMD-SHELL", "curl --silent --fail localhost:9200/_cluster/health || exit 1"]
interval: 5s
timeout: 5s
retries: 5
# (Optional)
kibana:
image: docker.elastic.co/kibana/kibana:7.4.2
environment:
- ELASTICSEARCH_HOSTS=http://es:9200
- xpack.monitoring.collection.enabled=true
ports:
- 5021:5601
depends_on:
es:
condition: service_healthy

View File

@ -28,7 +28,7 @@ for file in os.listdir(dldir):
print("Export started, connecting to databases...") print("Export started, connecting to databases...")
db = Database(config.DB_CONN_STR) db = Database(config.DB_CONN_STR)
es = ElasticSearchEngine("od-database") es = ElasticSearchEngine(config.ES_URL, config.ES_INDEX)
docs_with_url = db.join_website_url(es.stream_all_docs()) docs_with_url = db.join_website_url(es.stream_all_docs())

50
jenkins/Jenkinsfile vendored
View File

@ -1,50 +0,0 @@
def remote = [:]
remote.name = 'remote'
remote.host = env.DEPLOY_HOST
remote.user = env.DEPLOY_USER
remote.identityFile = '/var/lib/jenkins/.ssh/id_rsa'
remote.knownHosts = '/var/lib/jenkins/.ssh/known_hosts'
pipeline {
agent any
stages {
stage('Build') {
steps {
sh './jenkins/build.sh'
}
}
stage('Deploy') {
steps {
sh 'echo $ODDB_CONFIG > config.py'
sshCommand remote: remote, command: "cd od-database && rm -rf env fold_to_ascii search static task_tracker_drone templates ws_bucket_client *.py deploy.sh"
sshPut remote: remote, from: 'requirements.txt', into: 'od-database'
sshPut remote: remote, from: 'fold_to_ascii', into: 'od-database'
sshPut remote: remote, from: 'search', into: 'od-database'
sshPut remote: remote, from: 'static', into: 'od-database'
sshPut remote: remote, from: 'task_tracker_drone', into: 'od-database'
sshPut remote: remote, from: 'templates', into: 'od-database'
sshPut remote: remote, from: 'ws_bucket_client', into: 'od-database'
sshPut remote: remote, from: '__init__.py', into: 'od-database'
sshPut remote: remote, from: 'api.py', into: 'od-database'
sshPut remote: remote, from: 'app.py', into: 'od-database'
sshPut remote: remote, from: 'captcha.py', into: 'od-database'
sshPut remote: remote, from: 'common.py', into: 'od-database'
sshPut remote: remote, from: 'database.py', into: 'od-database'
sshPut remote: remote, from: 'export.py', into: 'od-database'
sshPut remote: remote, from: 'init_script.sql', into: 'od-database'
sshPut remote: remote, from: 'od_util.py', into: 'od-database'
sshPut remote: remote, from: 'reddit_bot.py', into: 'od-database'
sshPut remote: remote, from: 'tasks.py', into: 'od-database'
sshPut remote: remote, from: 'template_filters.py', into: 'od-database'
sshPut remote: remote, from: 'uwsgi.py', into: 'od-database'
sshPut remote: remote, from: 'views.py', into: 'od-database'
sshPut remote: remote, from: 'config.py', into: 'od-database'
sshPut remote: remote, from: 'mass_import.py', into: 'od-database'
sshPut remote: remote, from: 'do_recrawl.py', into: 'od-database'
sshPut remote: remote, from: 'od-database.ini', into: 'od-database'
sshPut remote: remote, from: 'jenkins/deploy.sh', into: 'od-database'
sshCommand remote: remote, command: 'chmod +x od-database/deploy.sh && ./od-database/deploy.sh'
}
}
}
}

View File

@ -1,4 +0,0 @@
#!/bin/bash
git submodule init
git submodule update --remote --recursive

View File

@ -1,32 +0,0 @@
#!/bin/bash
export ODDBROOT="od-database"
virtualenv ${ODDBROOT}/env -p python3.7
source ${ODDBROOT}/env/bin/activate
pip install -r ${ODDBROOT}/requirements.txt
screen -S oddb_web -X quit
killall -9 uwsgi
sleep 5
echo "starting oddb_web"
screen -S oddb_web -d -m bash -c "cd ${ODDBROOT} && source env/bin/activate && uwsgi od-database.ini 2> stderr.txt"
sleep 1
screen -list
echo "Installing crontabs"
absolute_dir=$(cd ${ODDBROOT} && pwd)
# Re-crawl dirs
command="bash -c \"cd '${absolute_dir}' && source env/bin/activate && python do_recrawl.py >> recrawl_logs.txt\""
job="*/10 * * * * $command"
echo "$job"
cat <(fgrep -i -v "$command" <(crontab -l)) <(echo "$job") | crontab -
# Cleanup captchas
command="bash -c \"cd '${absolute_dir}' && rm captchas/*.png\""
job="*/60 * * * * $command"
echo "$job"
cat <(fgrep -i -v "$command" <(crontab -l)) <(echo "$job") | crontab -

View File

View File

@ -1,10 +0,0 @@
[uwsgi]
uwsgi-socket = 127.0.0.1:3031
wsgi-file = uwsgi.py
processes = 2
threads = 16
stats = 127.0.0.1:9191
callable=app
virtualenv=./env
disable-logging=True

View File

@ -18,7 +18,6 @@ lxml
pillow pillow
Wand Wand
numpy numpy
matplotlib
uwsgi uwsgi
redis redis
psycopg2-binary psycopg2-binary

View File

@ -2,9 +2,8 @@ import os
import time import time
from urllib.parse import urljoin from urllib.parse import urljoin
import ujson
import elasticsearch import elasticsearch
import ujson
from apscheduler.schedulers.background import BackgroundScheduler from apscheduler.schedulers.background import BackgroundScheduler
from elasticsearch import helpers from elasticsearch import helpers
@ -20,32 +19,7 @@ class IndexingError(Exception):
pass pass
class SearchEngine: class ElasticSearchEngine:
def __init__(self):
pass
def import_json(self, in_str: str, website_id: int):
raise NotImplementedError
def search(self, query, page, per_page, sort_order, extension, size_min, size_max, match_all, fields, date_min,
date_max) -> {}:
raise NotImplementedError
def reset(self):
raise NotImplementedError
def ping(self):
raise NotImplementedError
def get_stats(self, website_id: int, subdir: str = None):
raise NotImplementedError
def refresh(self):
raise NotImplementedError
class ElasticSearchEngine(SearchEngine):
SORT_ORDERS = { SORT_ORDERS = {
"score": ["_score"], "score": ["_score"],
"size_asc": [{"size": {"order": "asc"}}], "size_asc": [{"size": {"order": "asc"}}],
@ -55,10 +29,11 @@ class ElasticSearchEngine(SearchEngine):
"none": [] "none": []
} }
def __init__(self, index_name): def __init__(self, url, index_name):
super().__init__() super().__init__()
self.index_name = index_name self.index_name = index_name
self.es = elasticsearch.Elasticsearch() logger.info("Connecting to ES @ %s" % url)
self.es = elasticsearch.Elasticsearch(hosts=[url])
self.filter = SearchFilter() self.filter = SearchFilter()
if not self.es.indices.exists(self.index_name): if not self.es.indices.exists(self.index_name):
@ -73,28 +48,31 @@ class ElasticSearchEngine(SearchEngine):
logger.info("Elasticsearch first time setup") logger.info("Elasticsearch first time setup")
if self.es.indices.exists(self.index_name): if self.es.indices.exists(self.index_name):
self.es.indices.delete(index=self.index_name) self.es.indices.delete(index=self.index_name)
self.es.indices.create(index=self.index_name) self.es.indices.create(index=self.index_name, body={
self.es.indices.close(index=self.index_name) "settings": {
"index": {
# Index settings "number_of_shards": 50,
self.es.indices.put_settings(body={ "number_of_replicas": 0,
"analysis": { "refresh_interval": "30s",
"tokenizer": { "codec": "best_compression"
"my_nGram_tokenizer": { },
"type": "nGram", "min_gram": 3, "max_gram": 3 "analysis": {
"analyzer": {
"my_nGram": {
"tokenizer": "my_nGram_tokenizer",
"filter": ["lowercase", "asciifolding"]
}
},
"tokenizer": {
"my_nGram_tokenizer": {
"type": "nGram", "min_gram": 3, "max_gram": 3
}
} }
} }
}}, index=self.index_name) }
self.es.indices.put_settings(body={ })
"analysis": {
"analyzer": {
"my_nGram": {
"tokenizer": "my_nGram_tokenizer",
"filter": ["lowercase", "asciifolding"]
}
}
}}, index=self.index_name)
# Index Mappings
self.es.indices.put_mapping(body={ self.es.indices.put_mapping(body={
"properties": { "properties": {
"path": {"analyzer": "standard", "type": "text"}, "path": {"analyzer": "standard", "type": "text"},
@ -110,12 +88,6 @@ class ElasticSearchEngine(SearchEngine):
self.es.indices.open(index=self.index_name) self.es.indices.open(index=self.index_name)
def reset(self):
self.init()
def ping(self):
return self.es.ping()
def delete_docs(self, website_id): def delete_docs(self, website_id):
while True: while True:
@ -332,7 +304,8 @@ class ElasticSearchEngine(SearchEngine):
yield urljoin(base_url, "/") + src["path"] + ("/" if src["path"] != "" else "") + src["name"] + \ yield urljoin(base_url, "/") + src["path"] + ("/" if src["path"] != "" else "") + src["name"] + \
("." if src["ext"] != "" else "") + src["ext"] ("." if src["ext"] != "" else "") + src["ext"]
def get_global_stats(self): @staticmethod
def get_global_stats():
if os.path.exists("_stats.json"): if os.path.exists("_stats.json"):
with open("_stats.json", "r") as f: with open("_stats.json", "r") as f:
@ -489,7 +462,7 @@ class ElasticSearchEngine(SearchEngine):
"query": { "query": {
"match_all": {} "match_all": {}
} }
}, scroll="1m", client=self.es, index=self.index_name, request_timeout=60) }, scroll="30s", client=self.es, index=self.index_name, request_timeout=30)
def refresh(self): def refresh(self):
self.es.indices.refresh(self.index_name) self.es.indices.refresh(self.index_name)

View File

@ -3,9 +3,11 @@ import logging
import os import os
import time import time
from multiprocessing.pool import ThreadPool from multiprocessing.pool import ThreadPool
from tempfile import NamedTemporaryFile
from threading import Thread from threading import Thread
from uuid import uuid4 from uuid import uuid4
import requests
import urllib3 import urllib3
import config import config
@ -60,13 +62,13 @@ class IndexingTask:
class TaskManager: class TaskManager:
def __init__(self): def __init__(self):
self.search = ElasticSearchEngine("od-database") self.search = ElasticSearchEngine(config.ES_URL, config.ES_INDEX)
self.db = database.Database(config.DB_CONN_STR) self.db = database.Database(config.DB_CONN_STR)
self.tracker = TaskTrackerApi(config.TT_API) self.tracker = TaskTrackerApi(config.TT_API)
self.worker = Worker.from_file(self.tracker) self.worker = Worker.from_file(self.tracker)
if not self.worker: if not self.worker:
self.worker = self.tracker.make_worker("oddb_master") self.worker = self.tracker.make_worker("$oddb_master")
self.worker.dump_to_file() self.worker.dump_to_file()
self.worker.request_access(config.TT_CRAWL_PROJECT, False, True) self.worker.request_access(config.TT_CRAWL_PROJECT, False, True)
self.worker.request_access(config.TT_INDEX_PROJECT, True, False) self.worker.request_access(config.TT_INDEX_PROJECT, True, False)
@ -91,8 +93,9 @@ class TaskManager:
try: try:
recipe = task.json_recipe() recipe = task.json_recipe()
logger.debug("Got indexing task: " + str(recipe)) logger.debug("Got indexing task: " + str(recipe))
filename = os.path.join(config.WSB_PATH,
format_file_name(recipe["website_id"], recipe["upload_token"])) filename = download_file(config.WSB_API + "/slot?token=" + recipe["upload_token"])
self._complete_task(filename, Task(recipe["website_id"], recipe["url"])) self._complete_task(filename, Task(recipe["website_id"], recipe["url"]))
except Exception as e: except Exception as e:
self.worker.release_task(task_id=task.id, result=1, verification=0) self.worker.release_task(task_id=task.id, result=1, verification=0)
@ -167,3 +170,18 @@ class TaskManager:
def format_file_name(website_id, token): def format_file_name(website_id, token):
return "%d_%s.NDJSON" % (website_id, token,) return "%d_%s.NDJSON" % (website_id, token,)
def download_file(url):
r = requests.get(url, stream=True,)
if r.status_code != 200:
raise ValueError("HTTP error %d: %s" % (r.status_code, url))
tmp = NamedTemporaryFile(delete=False)
for chunk in r.iter_content(chunk_size=4096):
if chunk:
tmp.write(chunk)
tmp.close()
return tmp.name

View File

@ -26,12 +26,8 @@
<form action="/search" id="sfrm"> <form action="/search" id="sfrm">
<div class="form-row"> <div class="form-row">
<div class="col-md-11"> <input class="form-control" style="max-width: calc(100% - 80px);" name="q" id="q" placeholder="Query">
<input class="form-control" name="q" id="q" placeholder="Query"> <input class="btn btn-primary btn-shadow" type="submit" value="Search" style="margin-left: 3px">
</div>
<div class="col-md-1">
<input class="btn btn-primary btn-shadow" type="submit" value="Search">
</div>
</div> </div>
{% if show_captcha %} {% if show_captcha %}
{{ captcha.get_code()|safe }} {{ captcha.get_code()|safe }}

0
templates/search.html Executable file → Normal file
View File

24
tt_config.yml Normal file
View File

@ -0,0 +1,24 @@
server:
address: "0.0.0.0:3010"
database:
conn_str: "postgres://task_tracker:changeme@tt_db/task_tracker?sslmode=disable"
log_levels: ["error", "info", "warn"]
git:
webhook_hash: "sha256"
webhook_sig_header: "X-Gogs-Signature"
log:
level: "trace"
session:
cookie_name: "tt"
expiration: "48h"
monitoring:
snapshot_interval: "120s"
history_length: "1800h"
maintenance:
reset_timed_out_tasks_interval: "10m"

9
uwsgi.ini Normal file
View File

@ -0,0 +1,9 @@
[uwsgi]
module = main
callable = app
enable-threads = true
processes = 4
threads = 16
disable-logging = True

View File

@ -3,19 +3,24 @@ import os
from multiprocessing.pool import Pool from multiprocessing.pool import Pool
from urllib.parse import urlparse from urllib.parse import urlparse
from flask import render_template, redirect, request, flash, abort, Response, session
from flask_caching import Cache
import captcha import captcha
import config import config
import od_util import od_util
from common import db, taskManager, searchEngine, logger, require_role from common import db, taskManager, searchEngine, logger, require_role
from database import Website from database import Website
from flask import render_template, redirect, request, flash, abort, Response, session
from flask_caching import Cache
from search.search import InvalidQueryException from search.search import InvalidQueryException
from tasks import Task from tasks import Task
def setup_views(app): def setup_views(app):
cache = Cache(app, config={'CACHE_TYPE': 'simple'}) cache = Cache(app, config={
"CACHE_TYPE": "redis",
"CACHE_REDIS_HOST": config.REDIS_HOST,
"CACHE_REDIS_PORT": config.REDIS_PORT,
})
@app.route("/dl") @app.route("/dl")
@cache.cached(120) @cache.cached(120)
@ -207,7 +212,8 @@ def setup_views(app):
flash("Query failed, this could mean that the search server is overloaded or is not reachable. " flash("Query failed, this could mean that the search server is overloaded or is not reachable. "
"Please try again later", "danger") "Please try again later", "danger")
results = hits["hits"]["total"]["value"] if not isinstance(hits["hits"]["total"], int) else hits["hits"]["total"] if hits else -1 results = hits["hits"]["total"]["value"] if not isinstance(hits["hits"]["total"], int) else \
hits["hits"]["total"] if hits else -1
took = hits["took"] if hits else -1 took = hits["took"] if hits else -1
forwarded_for = request.headers["X-Forwarded-For"] if "X-Forwarded-For" in request.headers else None forwarded_for = request.headers["X-Forwarded-For"] if "X-Forwarded-For" in request.headers else None