mirror of
https://github.com/simon987/opendirectories-bot.git
synced 2025-04-20 02:36:45 +00:00
Fixed bugs, enhanced parser
This commit is contained in:
parent
f3dc1445e4
commit
23775ec126
43
crawler.py
43
crawler.py
@ -2,19 +2,41 @@ import requests
|
|||||||
from parser import NginxParser, ApacheParser
|
from parser import NginxParser, ApacheParser
|
||||||
from reports import ReportSaver, ReportBuilder
|
from reports import ReportSaver, ReportBuilder
|
||||||
|
|
||||||
headers = {
|
|
||||||
'User-Agent': "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
|
|
||||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class Crawler:
|
class Crawler:
|
||||||
|
|
||||||
def __init__(self, url):
|
def __init__(self, url, test_url):
|
||||||
self.parser = NginxParser()
|
|
||||||
self.files = []
|
self.files = []
|
||||||
self.base_url = url
|
self.base_url = url
|
||||||
|
|
||||||
|
if test_url:
|
||||||
|
# Test url
|
||||||
|
r = requests.get(self.base_url, timeout=30)
|
||||||
|
|
||||||
|
self.parser = self.guess_parser(r.text, r.headers)()
|
||||||
|
|
||||||
|
print("Using " + self.parser.__class__.__name__ + " as parser")
|
||||||
|
|
||||||
|
else:
|
||||||
|
self.parser = None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def guess_parser(text, headers):
|
||||||
|
|
||||||
|
server = headers["Server"] if "Server" in headers else ""
|
||||||
|
|
||||||
|
# try nginx
|
||||||
|
parser = NginxParser()
|
||||||
|
if parser.page_is_valid(text):
|
||||||
|
return NginxParser
|
||||||
|
|
||||||
|
# Try apache
|
||||||
|
parser = ApacheParser()
|
||||||
|
if parser.page_is_valid(text):
|
||||||
|
return ApacheParser
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
def crawl(self, address=None):
|
def crawl(self, address=None):
|
||||||
|
|
||||||
if address is None:
|
if address is None:
|
||||||
@ -53,6 +75,7 @@ class Crawler:
|
|||||||
f.write(report_saver.to_link_list())
|
f.write(report_saver.to_link_list())
|
||||||
|
|
||||||
|
|
||||||
c = Crawler("http://dl.upload8.in/files/Serial/Altered%20Carbon/")
|
if __name__ == "__main__":
|
||||||
c.crawl()
|
c = Crawler("https://repo.zenk-security.com/", True)
|
||||||
c.store_report("000002")
|
c.crawl()
|
||||||
|
c.store_report("000007")
|
||||||
|
182
parser.py
182
parser.py
@ -1,14 +1,43 @@
|
|||||||
from bs4 import BeautifulSoup
|
|
||||||
from urllib.parse import urljoin
|
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
from urllib.parse import urljoin
|
||||||
|
|
||||||
import humanfriendly
|
import humanfriendly
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
|
||||||
class PageParser:
|
class PageParser:
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.col_start = None
|
||||||
|
self.col_end = None
|
||||||
|
self.size_unknown = True
|
||||||
|
|
||||||
def get_links(self, text: str, base_url: str):
|
def get_links(self, text: str, base_url: str):
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_size_columns(cols):
|
||||||
|
|
||||||
|
for i in range(len(cols)):
|
||||||
|
|
||||||
|
if i == len(cols) - 1:
|
||||||
|
try:
|
||||||
|
humanfriendly.parse_size(cols[i])
|
||||||
|
return tuple([i, i])
|
||||||
|
except humanfriendly.InvalidSize:
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
humanfriendly.parse_size(cols[i] + cols[i + 1])
|
||||||
|
return tuple([i, i + 1])
|
||||||
|
except humanfriendly.InvalidSize:
|
||||||
|
try:
|
||||||
|
humanfriendly.parse_size(cols[i])
|
||||||
|
return tuple([i, i])
|
||||||
|
except humanfriendly.InvalidSize:
|
||||||
|
continue
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_parser_type(headers):
|
def get_parser_type(headers):
|
||||||
"""Get appropriate parser type for a a server based on its header"""
|
"""Get appropriate parser type for a a server based on its header"""
|
||||||
@ -26,26 +55,82 @@ class PageParser:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def file_type(link):
|
def file_type(link):
|
||||||
return "d" if link.endswith("/") else "f"
|
|
||||||
|
if link.endswith("/") or link.startswith("?"):
|
||||||
|
return "d"
|
||||||
|
return "f"
|
||||||
|
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def clean_page(text):
|
||||||
|
text = text.replace("<A", "<a")
|
||||||
|
text = text.replace("</A", "</a")
|
||||||
|
# text = text.replace("&", "&")
|
||||||
|
text = text.replace("<hr>", "")
|
||||||
|
|
||||||
|
return text
|
||||||
|
|
||||||
|
def get_size(self, cols):
|
||||||
|
|
||||||
|
# Figure out which column(s) is the size one
|
||||||
|
size_cols = self.get_size_columns(cols)
|
||||||
|
if size_cols is not None:
|
||||||
|
col_start, col_end = size_cols
|
||||||
|
self.size_unknown = False
|
||||||
|
|
||||||
|
size_human = cols[col_start] if col_start == col_end else cols[col_start] + cols[col_end]
|
||||||
|
|
||||||
|
try:
|
||||||
|
size = humanfriendly.parse_size(size_human)
|
||||||
|
except humanfriendly.InvalidSize:
|
||||||
|
size = 0
|
||||||
|
else:
|
||||||
|
size = 0
|
||||||
|
|
||||||
|
return size
|
||||||
|
|
||||||
|
|
||||||
class NginxParser(PageParser):
|
class NginxParser(PageParser):
|
||||||
def get_links(self, text, base_url: str):
|
def get_links(self, text, base_url: str):
|
||||||
|
|
||||||
links = dict()
|
links = dict()
|
||||||
soup = BeautifulSoup(text, "html.parser")
|
|
||||||
|
|
||||||
# Handle weird character formats and tag names
|
text = self.clean_page(text)
|
||||||
text = text.replace("<A", "<a")
|
|
||||||
text = text.replace("</A", "</a")
|
soup = BeautifulSoup(text, "html.parser")
|
||||||
text = text.replace("&", "&")
|
|
||||||
|
|
||||||
for link in soup.find("pre").find_all("a"):
|
for link in soup.find("pre").find_all("a"):
|
||||||
|
|
||||||
if link.text != "../":
|
parsed_link = self.parse_link(link, text, base_url)
|
||||||
|
if parsed_link is not None:
|
||||||
|
links[parsed_link[0]] = parsed_link[1]
|
||||||
|
|
||||||
|
return links
|
||||||
|
|
||||||
|
def page_is_valid(self, text):
|
||||||
|
# Handle weird character formats and tag names
|
||||||
|
text = self.clean_page(text)
|
||||||
|
|
||||||
|
soup = BeautifulSoup(text, "html.parser")
|
||||||
|
|
||||||
|
if soup.find("pre") is None:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# try to parse a single link
|
||||||
|
for link in soup.find("pre").find_all("a"):
|
||||||
|
if PageParser.should_save_link(link.text):
|
||||||
|
if self.parse_link(link, text, "") is None:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def parse_link(self, link, text, base_url):
|
||||||
|
|
||||||
|
try:
|
||||||
|
if PageParser.should_save_link(link.text):
|
||||||
target = link.get("href")
|
target = link.get("href")
|
||||||
full_link = urljoin(base_url, target)
|
full_link = urljoin(base_url, target)
|
||||||
file_type = PageParser.file_type(full_link)
|
file_type = PageParser.file_type(target)
|
||||||
|
|
||||||
if file_type == "f":
|
if file_type == "f":
|
||||||
extension = os.path.splitext(full_link)[1].strip(".")
|
extension = os.path.splitext(full_link)[1].strip(".")
|
||||||
@ -53,46 +138,30 @@ class NginxParser(PageParser):
|
|||||||
# Parse size
|
# Parse size
|
||||||
target_index = text.find("</a", text.find(target))
|
target_index = text.find("</a", text.find(target))
|
||||||
date_and_size = text[target_index:text.find("<a", target_index)]
|
date_and_size = text[target_index:text.find("<a", target_index)]
|
||||||
size = humanfriendly.parse_size(re.split("\s+", date_and_size)[3])
|
|
||||||
|
|
||||||
links[link.text] = dict(link=full_link, size=size, ext=extension, type=file_type)
|
cols = re.split("\s+", date_and_size)
|
||||||
|
size = self.get_size(cols)
|
||||||
|
|
||||||
|
return target, dict(link=full_link, size=size, ext=extension, type=file_type)
|
||||||
else:
|
else:
|
||||||
links[link.text] = dict(link=full_link, type=file_type)
|
return target, dict(link=full_link, type=file_type)
|
||||||
|
except Exception as e:
|
||||||
|
print("Couldn't parse link " + link.get("href") + str(e))
|
||||||
|
raise e
|
||||||
|
|
||||||
return links
|
return None
|
||||||
|
|
||||||
|
|
||||||
class ApacheParser(PageParser):
|
class ApacheParser(PageParser):
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self.col_start = None
|
|
||||||
self.col_end = None
|
|
||||||
self.size_unknown = True
|
|
||||||
|
|
||||||
def get_size_columns(self, cols):
|
|
||||||
|
|
||||||
for i in range(len(cols) - 1):
|
|
||||||
try:
|
|
||||||
humanfriendly.parse_size(cols[i] + cols[i + 1])
|
|
||||||
return tuple([i, i + 1])
|
|
||||||
except humanfriendly.InvalidSize:
|
|
||||||
try:
|
|
||||||
humanfriendly.parse_size(cols[i])
|
|
||||||
return tuple([i, i])
|
|
||||||
except humanfriendly.InvalidSize:
|
|
||||||
continue
|
|
||||||
|
|
||||||
def get_links(self, text, base_url: str):
|
def get_links(self, text, base_url: str):
|
||||||
|
|
||||||
links = dict()
|
links = dict()
|
||||||
soup = BeautifulSoup(text, "html.parser")
|
|
||||||
|
|
||||||
# Handle weird character formats and tag names
|
# Handle weird character formats and tag names
|
||||||
text = text.replace("<A", "<a")
|
text = self.clean_page(text)
|
||||||
text = text.replace("</A", "</a")
|
|
||||||
text = text.replace("&", "&")
|
|
||||||
|
|
||||||
|
|
||||||
|
soup = BeautifulSoup(text, "html.parser")
|
||||||
|
|
||||||
if soup.find("table"):
|
if soup.find("table"):
|
||||||
|
|
||||||
@ -109,20 +178,20 @@ class ApacheParser(PageParser):
|
|||||||
if PageParser.should_save_link(link.text):
|
if PageParser.should_save_link(link.text):
|
||||||
|
|
||||||
target = link.get("href")
|
target = link.get("href")
|
||||||
|
file_type = PageParser.file_type(target)
|
||||||
full_link = urljoin(base_url, target)
|
full_link = urljoin(base_url, target)
|
||||||
file_type = PageParser.file_type(full_link)
|
|
||||||
|
|
||||||
if file_type == "f":
|
if file_type == "f":
|
||||||
extension = os.path.splitext(full_link)[1].strip(".")
|
extension = os.path.splitext(full_link)[1].strip(".")
|
||||||
|
|
||||||
cols = row.find_all("td")
|
cols = row.find_all("td")
|
||||||
for i in range(len(cols)):
|
for i in range(len(cols)):
|
||||||
cols[i] = cols[i].string if cols[i].string is not None else ""
|
cols[i] = cols[i].string if cols[i].string is not None else "-"
|
||||||
size = self.get_size(cols)
|
size = self.get_size(cols)
|
||||||
|
|
||||||
links[link.text] = dict(link=full_link, size=size, ext=extension, type=file_type)
|
links[target] = dict(link=full_link, size=size, ext=extension, type=file_type)
|
||||||
else:
|
else:
|
||||||
links[link.text] = dict(link=full_link, type=file_type)
|
links[target] = dict(link=full_link, type=file_type)
|
||||||
else:
|
else:
|
||||||
|
|
||||||
for link in soup.find_all("a"):
|
for link in soup.find_all("a"):
|
||||||
@ -131,36 +200,33 @@ class ApacheParser(PageParser):
|
|||||||
|
|
||||||
target = link.get("href")
|
target = link.get("href")
|
||||||
full_link = urljoin(base_url, target)
|
full_link = urljoin(base_url, target)
|
||||||
file_type = PageParser.file_type(full_link)
|
file_type = PageParser.file_type(target)
|
||||||
|
|
||||||
if file_type == "f":
|
if file_type == "f":
|
||||||
extension = os.path.splitext(full_link)[1].strip(".")
|
extension = os.path.splitext(full_link)[1].strip(".")
|
||||||
|
|
||||||
target_index = text.find("</a", text.find(target))
|
target_index = text.find("</a", text.find(target))
|
||||||
date_and_size = text[target_index:text.find("<a", target_index)]
|
date_and_size = text[target_index:text.find("<a", target_index)] # in some cases we,re looking for </pre instead
|
||||||
|
date_and_size = text[target_index:text.find("</pre", target_index)] if text.find("<a", target_index) == -1 else date_and_size
|
||||||
|
|
||||||
cols = re.split("\s+", date_and_size)
|
cols = re.split("\s+", date_and_size)
|
||||||
size = self.get_size(cols)
|
size = self.get_size(cols)
|
||||||
|
|
||||||
links[link.text] = dict(link=full_link, size=size, ext=extension, type=file_type)
|
links[target] = dict(link=full_link, size=size, ext=extension, type=file_type)
|
||||||
else:
|
else:
|
||||||
links[link.text] = dict(link=full_link, type=file_type)
|
links[target] = dict(link=full_link, type=file_type)
|
||||||
|
|
||||||
return links
|
return links
|
||||||
|
|
||||||
def get_size(self, cols):
|
def page_is_valid(self, text):
|
||||||
if self.col_start is None:
|
|
||||||
# Figure out which column(s) is the size one
|
try:
|
||||||
size_cols = self.get_size_columns(cols)
|
links = self.get_links(text, "")
|
||||||
if size_cols is not None:
|
print(links)
|
||||||
self.col_start, self.col_end = size_cols
|
return True
|
||||||
self.size_unknown = False
|
except Exception as e:
|
||||||
|
print("This is not recognised Apache open directory: " + str(e))
|
||||||
|
|
||||||
|
|
||||||
if self.size_unknown:
|
|
||||||
size = 0
|
|
||||||
else:
|
|
||||||
size_human = cols[self.col_start] if self.col_start == self.col_end else cols[self.col_start] + cols[self.col_end]
|
|
||||||
size = humanfriendly.parse_size(size_human)
|
|
||||||
return size
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,4 +1,58 @@
|
|||||||
import os
|
import os
|
||||||
|
import json
|
||||||
|
|
||||||
|
|
||||||
|
class CrawTask:
|
||||||
|
|
||||||
|
def __init__(self, url, post_id, title):
|
||||||
|
self.url = url
|
||||||
|
self.post_id = post_id
|
||||||
|
self.post_title = title
|
||||||
|
|
||||||
|
|
||||||
|
class TaskQueue:
|
||||||
|
|
||||||
|
def __init__(self, file):
|
||||||
|
self.file = file
|
||||||
|
|
||||||
|
self.tasks = []
|
||||||
|
|
||||||
|
if os.path.isfile(self.file):
|
||||||
|
|
||||||
|
with open(self.file, "r") as f:
|
||||||
|
json_tasks = json.load(f)
|
||||||
|
|
||||||
|
for task in json_tasks:
|
||||||
|
self.tasks.append(CrawTask(task["url"], task["post_id"], task["post_title"]))
|
||||||
|
|
||||||
|
def push(self, task):
|
||||||
|
self.tasks.append(task)
|
||||||
|
self.update_file()
|
||||||
|
|
||||||
|
def pop(self):
|
||||||
|
if len(self.tasks) > 0:
|
||||||
|
t = self.tasks.pop()
|
||||||
|
self.update_file()
|
||||||
|
else:
|
||||||
|
t = None
|
||||||
|
|
||||||
|
return t
|
||||||
|
|
||||||
|
def update_file(self):
|
||||||
|
with open(self.file, "w") as f:
|
||||||
|
json.dump(self.tasks, f, default=dumper)
|
||||||
|
|
||||||
|
def is_queued(self, post_id):
|
||||||
|
|
||||||
|
for task in self.tasks:
|
||||||
|
if task.post_id == post_id:
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def dumper(obj):
|
||||||
|
return obj.__dict__
|
||||||
|
|
||||||
|
|
||||||
class RedditBot:
|
class RedditBot:
|
||||||
@ -11,8 +65,7 @@ class RedditBot:
|
|||||||
self.crawled = []
|
self.crawled = []
|
||||||
else:
|
else:
|
||||||
with open(log_file, "r") as f:
|
with open(log_file, "r") as f:
|
||||||
self.crawled = f.read().split("\n")
|
self.crawled = list(filter(None, f.read().split("\n")))
|
||||||
self.crawled = list(filter(None, self.crawled))
|
|
||||||
|
|
||||||
def log_crawl(self, post_id):
|
def log_crawl(self, post_id):
|
||||||
|
|
||||||
|
@ -91,6 +91,7 @@ class ReportSaver:
|
|||||||
out["ext_sizes"] = self.builder.get_ext_sizes()
|
out["ext_sizes"] = self.builder.get_ext_sizes()
|
||||||
out["ext_sizes_formatted"] = self.builder.get_ext_sizes_formatted()
|
out["ext_sizes_formatted"] = self.builder.get_ext_sizes_formatted()
|
||||||
out["report_time"] = str(self.builder.report_time)
|
out["report_time"] = str(self.builder.report_time)
|
||||||
|
out["total_count"] = len(self.builder.files)
|
||||||
|
|
||||||
return json.dumps(out)
|
return json.dumps(out)
|
||||||
|
|
||||||
@ -103,6 +104,7 @@ class ReportSaver:
|
|||||||
out["ext_count"] = self.builder.get_ext_counts()
|
out["ext_count"] = self.builder.get_ext_counts()
|
||||||
out["ext_sizes"] = self.builder.get_ext_sizes()
|
out["ext_sizes"] = self.builder.get_ext_sizes()
|
||||||
out["report_time"] = str(self.builder.report_time)
|
out["report_time"] = str(self.builder.report_time)
|
||||||
|
out["total_count"] = len(self.builder.files)
|
||||||
|
|
||||||
return json.dumps(out)
|
return json.dumps(out)
|
||||||
|
|
||||||
|
@ -0,0 +1,32 @@
|
|||||||
|
from unittest import TestCase
|
||||||
|
|
||||||
|
from parser import ApacheParser, NginxParser
|
||||||
|
from crawler import Crawler
|
||||||
|
|
||||||
|
|
||||||
|
class CrawlerTest(TestCase):
|
||||||
|
|
||||||
|
def test_guess_parser1(self):
|
||||||
|
|
||||||
|
with open("test_apache1.html", "r") as f:
|
||||||
|
text = f.read()
|
||||||
|
|
||||||
|
c = Crawler("http://some.website/", False)
|
||||||
|
|
||||||
|
self.assertEqual(c.guess_parser(text, {}), ApacheParser)
|
||||||
|
|
||||||
|
def test_guess_parser2(self):
|
||||||
|
with open("test_nginx1.html", "r") as f:
|
||||||
|
text = f.read()
|
||||||
|
|
||||||
|
c = Crawler("http://some.website", False)
|
||||||
|
|
||||||
|
self.assertEqual(c.guess_parser(text, {}), NginxParser)
|
||||||
|
|
||||||
|
def test_guess_parser3(self):
|
||||||
|
with open("test_invalid.html", "r") as f:
|
||||||
|
text = f.read()
|
||||||
|
|
||||||
|
c = Crawler("http://some.website", False)
|
||||||
|
|
||||||
|
self.assertEqual(c.guess_parser(text, {}), None)
|
@ -18,7 +18,7 @@ class NginxParserTest(TestCase):
|
|||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.parser = NginxParser()
|
self.parser = NginxParser()
|
||||||
|
|
||||||
root_page_file = open("test_nginx_root.html", "r")
|
root_page_file = open("test_nginx1.html", "r")
|
||||||
self.root_page = root_page_file.read()
|
self.root_page = root_page_file.read()
|
||||||
root_page_file.close()
|
root_page_file.close()
|
||||||
|
|
||||||
@ -57,7 +57,7 @@ class ApacheParserTest(TestCase):
|
|||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.parser = ApacheParser()
|
self.parser = ApacheParser()
|
||||||
|
|
||||||
root_page_file = open("test_apache_root.html", "r")
|
root_page_file = open("test_apache1.html", "r")
|
||||||
self.root_page = root_page_file.read()
|
self.root_page = root_page_file.read()
|
||||||
root_page_file.close()
|
root_page_file.close()
|
||||||
|
|
||||||
@ -76,7 +76,7 @@ class ApacheParserTest(TestCase):
|
|||||||
result = self.parser.get_links(self.root_page, "https://keisari.net/videos/")
|
result = self.parser.get_links(self.root_page, "https://keisari.net/videos/")
|
||||||
|
|
||||||
self.assertEqual(result["happyday.mp4"]["size"], 772000)
|
self.assertEqual(result["happyday.mp4"]["size"], 772000)
|
||||||
self.assertEqual(result["alex_räjähtää.mp4"]["size"], 715000)
|
self.assertEqual(result["alex_r%c3%a4j%c3%a4ht%c3%a4%c3%a4.mp4"]["size"], 715000)
|
||||||
|
|
||||||
def test_link_type(self):
|
def test_link_type(self):
|
||||||
result = self.parser.get_links(self.root_page, "https://keisari.net/videos/")
|
result = self.parser.get_links(self.root_page, "https://keisari.net/videos/")
|
||||||
@ -109,16 +109,67 @@ class ApacheParserTest2(TestCase):
|
|||||||
def test_link_size(self):
|
def test_link_size(self):
|
||||||
result = self.parser.get_links(self.root_page, self.base_url)
|
result = self.parser.get_links(self.root_page, self.base_url)
|
||||||
|
|
||||||
self.assertEqual(result["ƒhƒ‰ƒSƒ“ƒ{[ƒ‹Z.‘æ020˜b.u‚æ‚Ý‚ª‚¦‚éƒTƒCƒ„l“`àIŒå‹ó‚̃‹[ƒcv.wmv"]["size"], 179721000)
|
self.assertEqual(result["ƒhƒ‰ƒSƒ“ƒ{[ƒ‹Z.‘æ011˜b.u‰F’ˆˆê‚Ì‹íŽmƒTƒCƒ„l‚ß‚´‚ß‚éIv.wmv"]["size"], 232185000)
|
||||||
self.assertEqual(result["ƒhƒ‰ƒSƒ“ƒ{[ƒ‹Z.‘æ225˜b.u‹‚¢‚ºƒ`ƒrƒbƒRII‚P‚W†‘å‹êíIHv.wmv"]["size"], 347507000)
|
self.assertEqual(result["ƒhƒ‰ƒSƒ“ƒ{[ƒ‹Z.‘æ019˜b.ud—͂ƂÌí‚¢Iƒoƒuƒ‹ƒXŒN‚ð‚‚©‚Ü‚¦‚ëv.wmv"]["size"], 185385000)
|
||||||
|
|
||||||
def test_link_type(self):
|
def test_link_type(self):
|
||||||
result = self.parser.get_links(self.root_page, self.base_url)
|
result = self.parser.get_links(self.root_page, self.base_url)
|
||||||
|
|
||||||
self.assertEqual(result["ƒhƒ‰ƒSƒ“ƒ{[ƒ‹Z.‘æ225˜b.u‹‚¢‚ºƒ`ƒrƒbƒRII‚P‚W†‘å‹êíIHv.wmv"]["type"], "f")
|
self.assertEqual(result["ƒhƒ‰ƒSƒ“ƒ{[ƒ‹Z.‘æ011˜b.u‰F’ˆˆê‚Ì‹íŽmƒTƒCƒ„l‚ß‚´‚ß‚éIv.wmv"]["type"], "f")
|
||||||
self.assertEqual(result["ƒhƒ‰ƒSƒ“ƒ{[ƒ‹Z jpg/"]["type"], "d")
|
self.assertEqual(result["ƒhƒ‰ƒSƒ“ƒ{[ƒ‹Z%20jpg/"]["type"], "d")
|
||||||
|
|
||||||
def test_link_extension(self):
|
def test_link_extension(self):
|
||||||
result = self.parser.get_links(self.root_page, self.base_url)
|
result = self.parser.get_links(self.root_page, self.base_url)
|
||||||
|
|
||||||
self.assertEqual(result["ƒhƒ‰ƒSƒ“ƒ{[ƒ‹Z.‘æ225˜b.u‹‚¢‚ºƒ`ƒrƒbƒRII‚P‚W†‘å‹êíIHv.wmv"]["ext"], "wmv")
|
self.assertEqual(result["ƒhƒ‰ƒSƒ“ƒ{[ƒ‹Z.‘æ011˜b.u‰F’ˆˆê‚Ì‹íŽmƒTƒCƒ„l‚ß‚´‚ß‚éIv.wmv"]["ext"], "wmv")
|
||||||
|
|
||||||
|
|
||||||
|
class ApacheParserTest3(TestCase):
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
self.parser = ApacheParser()
|
||||||
|
|
||||||
|
root_page_file = open("test_apache3.html", "r")
|
||||||
|
self.root_page = root_page_file.read()
|
||||||
|
self.base_url = "http://files.duspectacle.com/mp3/Jardinets/"
|
||||||
|
root_page_file.close()
|
||||||
|
|
||||||
|
def test_link_count(self):
|
||||||
|
|
||||||
|
result = self.parser.get_links(self.root_page, self.base_url)
|
||||||
|
|
||||||
|
self.assertEqual(len(result), 21)
|
||||||
|
|
||||||
|
def test_link_size(self):
|
||||||
|
result = self.parser.get_links(self.root_page, self.base_url)
|
||||||
|
|
||||||
|
self.assertEqual(result["15%20Woodkid%20-%20Iron%20(Remix%20By%20Gucci%20Vump).mp3"]["size"], 9300000)
|
||||||
|
self.assertEqual(result["16%20Yellow%20Ostrich%20-%20WHALE.mp3"]["size"], 7100000)
|
||||||
|
|
||||||
|
def test_link_type(self):
|
||||||
|
result = self.parser.get_links(self.root_page, self.base_url)
|
||||||
|
|
||||||
|
self.assertEqual(result["15%20Woodkid%20-%20Iron%20(Remix%20By%20Gucci%20Vump).mp3"]["type"], "f")
|
||||||
|
self.assertEqual(result["01%20Jean%20Rochefort%20-%20Winnie%20et%20ses%20amis%20(introduction)/"]["type"], "d")
|
||||||
|
|
||||||
|
def test_link_extension(self):
|
||||||
|
result = self.parser.get_links(self.root_page, self.base_url)
|
||||||
|
|
||||||
|
self.assertEqual(result["15%20Woodkid%20-%20Iron%20(Remix%20By%20Gucci%20Vump).mp3"]["ext"], "mp3")
|
||||||
|
|
||||||
|
|
||||||
|
class ApacheParserTest4(TestCase):
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
self.parser = ApacheParser()
|
||||||
|
|
||||||
|
root_page_file = open("test_apache4.html", "r")
|
||||||
|
self.root_page = root_page_file.read()
|
||||||
|
self.base_url = "http://jenserserver.no-ip.biz/movieserver/serien/bigbangtheorie/S3/"
|
||||||
|
root_page_file.close()
|
||||||
|
|
||||||
|
def test_link_size(self):
|
||||||
|
result = self.parser.get_links(self.root_page, self.base_url)
|
||||||
|
|
||||||
|
self.assertEqual(result["The.Big.Bang.Theory.S03E06.Football.fuer.Nerds.German.WS.DVDRip.XviD-DELiCiOUS.avi"]["size"], 175000000)
|
||||||
|
self.assertEqual(result["The.Big.Bang.Theory.S03E03.Sex.oder.Pralinen.German.WS.DVDRip.XviD-DELiCiOUS.avi"]["size"], 0)
|
@ -1,5 +1,5 @@
|
|||||||
from unittest import TestCase
|
from unittest import TestCase
|
||||||
from reddit_bot import RedditBot
|
from reddit_bot import RedditBot, TaskQueue, CrawTask
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
|
||||||
@ -33,3 +33,62 @@ class RedditBotTest(TestCase):
|
|||||||
self.assertTrue(bot.has_crawled("000000"))
|
self.assertTrue(bot.has_crawled("000000"))
|
||||||
|
|
||||||
|
|
||||||
|
class TaskQueueTest(TestCase):
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
if os.path.isfile("task_queue_test.txt"):
|
||||||
|
os.remove("task_queue_test.txt")
|
||||||
|
|
||||||
|
def test_push_pop_test(self):
|
||||||
|
|
||||||
|
if os.path.isfile("task_queue_test.txt"):
|
||||||
|
os.remove("task_queue_test.txt")
|
||||||
|
|
||||||
|
tq = TaskQueue("task_queue_test.txt")
|
||||||
|
tq.push(CrawTask("http://awebsite.com/", "postid", "a title"))
|
||||||
|
|
||||||
|
task1 = tq.pop()
|
||||||
|
|
||||||
|
self.assertEqual(tq.pop(), None)
|
||||||
|
self.assertEqual(task1.url, "http://awebsite.com/")
|
||||||
|
self.assertEqual(task1.post_id, "postid")
|
||||||
|
|
||||||
|
def test_persistence(self):
|
||||||
|
|
||||||
|
if os.path.isfile("task_queue_test.txt"):
|
||||||
|
os.remove("task_queue_test.txt")
|
||||||
|
|
||||||
|
tq = TaskQueue("task_queue_test.txt")
|
||||||
|
tq.push(CrawTask("http://awebsite.com/", "postid", "a title"))
|
||||||
|
|
||||||
|
tq2 = TaskQueue("task_queue_test.txt")
|
||||||
|
task = tq2.pop()
|
||||||
|
|
||||||
|
self.assertEqual(task.url, "http://awebsite.com/")
|
||||||
|
self.assertEqual(task.post_id, "postid")
|
||||||
|
|
||||||
|
def test_multiple_tasks(self):
|
||||||
|
if os.path.isfile("task_queue_test.txt"):
|
||||||
|
os.remove("task_queue_test.txt")
|
||||||
|
|
||||||
|
tq = TaskQueue("task_queue_test.txt")
|
||||||
|
|
||||||
|
tq.push(CrawTask("http://awebsite.com/", "postid", "a title"))
|
||||||
|
tq.push(CrawTask("http://awebsite.com/", "postid", "a title"))
|
||||||
|
tq.push(CrawTask("http://awebsite.com/", "postid", "a title"))
|
||||||
|
|
||||||
|
self.assertIsNotNone(tq.pop())
|
||||||
|
self.assertIsNotNone(tq.pop())
|
||||||
|
self.assertIsNotNone(tq.pop())
|
||||||
|
self.assertIsNone(tq.pop())
|
||||||
|
|
||||||
|
def test_is_queued(self):
|
||||||
|
if os.path.isfile("task_queue_test.txt"):
|
||||||
|
os.remove("task_queue_test.txt")
|
||||||
|
|
||||||
|
tq = TaskQueue("task_queue_test.txt")
|
||||||
|
|
||||||
|
tq.push(CrawTask("http://awebsite.com/", "postid", "a title"))
|
||||||
|
|
||||||
|
self.assertTrue(tq.is_queued("postid"))
|
||||||
|
self.assertFalse(tq.is_queued("123456"))
|
32
spec/test_apache3.html
Normal file
32
spec/test_apache3.html
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
|
||||||
|
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>Index of /mp3/Jardinets</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>Index of /mp3/Jardinets</h1>
|
||||||
|
<pre><img src="/__ovh_icons/blank.gif" alt="Icon "> <a href="?C=N;O=D">Name</a> <a href="?C=M;O=A">Last modified</a> <a href="?C=S;O=A">Size</a> <a href="?C=D;O=A">Description</a><hr><img src="/__ovh_icons/back.gif" alt="[PARENTDIR]"> <a href="/mp3/">Parent Directory</a> -
|
||||||
|
<img src="/__ovh_icons/sound2.gif" alt="[SND]"> <a href="01%20Jean%20Rochefort%20-%20Winnie%20et%20ses%20amis%20(introduction)/">01 Jean Rochefort - ..></a> 2017-12-04 16:33 -
|
||||||
|
<img src="/__ovh_icons/sound2.gif" alt="[SND]"> <a href="02%20Krisma%20-%20Amore.mp3">02 Krisma - Amore.mp3</a> 2017-12-04 16:32 11M
|
||||||
|
<img src="/__ovh_icons/sound2.gif" alt="[SND]"> <a href="03%20Bernard%20Estardy%20-%20Cha%20Tatch%20Ka.mp3">03 Bernard Estardy -..></a> 2017-12-04 16:32 3.5M
|
||||||
|
<img src="/__ovh_icons/sound2.gif" alt="[SND]"> <a href="04%20Jamie%20Woon%20-%20Street.mp3">04 Jamie Woon - Stre..></a> 2017-12-04 16:32 5.0M
|
||||||
|
<img src="/__ovh_icons/sound2.gif" alt="[SND]"> <a href="05%20DyE%20-%20Fantasy.mp3">05 DyE - Fantasy.mp3</a> 2017-12-04 16:33 6.9M
|
||||||
|
<img src="/__ovh_icons/sound2.gif" alt="[SND]"> <a href="06%20Games%20-%20Planet%20Party.mp3">06 Games - Planet Pa..></a> 2017-12-04 16:33 5.6M
|
||||||
|
<img src="/__ovh_icons/sound2.gif" alt="[SND]"> <a href="07%20Yeasayer%20-%20Swallowing%20the%20Decibels.mp3">07 Yeasayer - Swallo..></a> 2017-12-04 16:33 11M
|
||||||
|
<img src="/__ovh_icons/sound2.gif" alt="[SND]"> <a href="08%20Pacific!%20-%20Venus%20Rising.mp3">08 Pacific! - Venus ..></a> 2017-12-04 16:32 5.7M
|
||||||
|
<img src="/__ovh_icons/sound2.gif" alt="[SND]"> <a href="09%20Jacky%20Chalard%20-%20Super%20Man%20-%20Super%20Cool%20(LP%20Version).mp3">09 Jacky Chalard - S..></a> 2017-12-04 16:33 11M
|
||||||
|
<img src="/__ovh_icons/sound2.gif" alt="[SND]"> <a href="10%20Piry%20-%20Heroi%20Moderno.mp3">10 Piry - Heroi Mode..></a> 2017-12-04 16:32 4.1M
|
||||||
|
<img src="/__ovh_icons/sound2.gif" alt="[SND]"> <a href="11%20Bahamas%20-%20Bahamas.mp3">11 Bahamas - Bahamas..></a> 2017-12-04 16:32 7.9M
|
||||||
|
<img src="/__ovh_icons/sound2.gif" alt="[SND]"> <a href="12%20Aeroplane%20-%20Fish%20In%20The%20Sky.mp3">12 Aeroplane - Fish ..></a> 2017-12-04 16:32 7.6M
|
||||||
|
<img src="/__ovh_icons/sound2.gif" alt="[SND]"> <a href="13%20Discodeine%20-%20Synchronize%20(feat%20Jarvis%20Cocker%20-%20radio%20edit).mp3">13 Discodeine - Sync..></a> 2017-12-04 16:33 6.8M
|
||||||
|
<img src="/__ovh_icons/sound2.gif" alt="[SND]"> <a href="14%20Lykke%20Li%20-%20I%20Follow%20Rivers%20(the%20Magician%20Remix).mp3">14 Lykke Li - I Foll..></a> 2017-12-04 16:33 7.3M
|
||||||
|
<img src="/__ovh_icons/sound2.gif" alt="[SND]"> <a href="15%20Woodkid%20-%20Iron%20(Remix%20By%20Gucci%20Vump).mp3">15 Woodkid - Iron (R..></a> 2017-12-04 16:33 9.3M
|
||||||
|
<img src="/__ovh_icons/sound2.gif" alt="[SND]"> <a href="16%20Yellow%20Ostrich%20-%20WHALE.mp3">16 Yellow Ostrich - ..></a> 2017-12-04 16:33 7.1M
|
||||||
|
<img src="/__ovh_icons/sound2.gif" alt="[SND]"> <a href="17%20Connan%20Mockasin%20-%20Unicorn%20in%20Uniform.mp3">17 Connan Mockasin -..></a> 2017-12-04 16:32 6.3M
|
||||||
|
<img src="/__ovh_icons/sound2.gif" alt="[SND]"> <a href="18%20Bruce%20Haack%20-%20Maybe%20This%20Song.mp3">18 Bruce Haack - May..></a> 2017-12-04 16:33 5.4M
|
||||||
|
<img src="/__ovh_icons/image2.gif" alt="[IMG]"> <a href="cover-small.jpg">cover-small.jpg</a> 2017-12-04 16:32 97K
|
||||||
|
<img src="/__ovh_icons/image2.gif" alt="[IMG]"> <a href="cover.jpg">cover.jpg</a> 2017-12-04 16:33 466K
|
||||||
|
<img src="/__ovh_icons/text.gif" alt="[TXT]"> <a href="playlist.txt">playlist.txt</a> 2017-12-04 16:33 955
|
||||||
|
<hr></pre>
|
||||||
|
</body></html>
|
38
spec/test_apache4.html
Normal file
38
spec/test_apache4.html
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>Index of /movieserver/serien/bigbangtheorie/S3</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>Index of /movieserver/serien/bigbangtheorie/S3</h1>
|
||||||
|
<table>
|
||||||
|
<tr><th valign="top"><img src="/icons/blank.gif" alt="[ICO]"></th><th><a href="?C=N;O=D">Name</a></th><th><a href="?C=M;O=A">Last modified</a></th><th><a href="?C=S;O=A">Size</a></th><th><a href="?C=D;O=A">Description</a></th></tr>
|
||||||
|
<tr><th colspan="5"><hr></th></tr>
|
||||||
|
<tr><td valign="top"><img src="/icons/back.gif" alt="[PARENTDIR]"></td><td><a href="/movieserver/serien/bigbangtheorie/">Parent Directory</a></td><td> </td><td align="right"> - </td><td> </td></tr>
|
||||||
|
<tr><td valign="top"><img src="/icons/movie.gif" alt="[VID]"></td><td><a href="The.Big.Bang.Theory.S03E01.Der.Nordpol.Plan.German.WS.DVDRip.XviD-DELiCiOUS.avi">The.Big.Bang.Theory.S03E01.Der.Nordpol.Plan.German.WS.DVDRip.XviD-DELiCiOUS.avi</a></td><td align="right">2017-01-17 18:52 </td><td align="right">6.8M</td><td> </td></tr>
|
||||||
|
<tr><td valign="top"><img src="/icons/movie.gif" alt="[VID]"></td><td><a href="The.Big.Bang.Theory.S03E02.Die.Grillenwette.German.WS.DVDRip.XviD-DELiCiOUS.avi">The.Big.Bang.Theory.S03E02.Die.Grillenwette.German.WS.DVDRip.XviD-DELiCiOUS.avi</a></td><td align="right">2014-05-16 17:14 </td><td align="right">175M</td><td> </td></tr>
|
||||||
|
<tr><td valign="top"><img src="/icons/movie.gif" alt="[VID]"></td><td><a href="The.Big.Bang.Theory.S03E03.Sex.oder.Pralinen.German.WS.DVDRip.XviD-DELiCiOUS.avi">The.Big.Bang.Theory.S03E03.Sex.oder.Pralinen.German.WS.DVDRip.XviD-DELiCiOUS.avi</a></td><td align="right">2017-01-17 19:38 </td><td align="right"> 0 </td><td> </td></tr>
|
||||||
|
<tr><td valign="top"><img src="/icons/movie.gif" alt="[VID]"></td><td><a href="The.Big.Bang.Theory.S03E04.Fuer.ihn.oder.mit.ihm.German.WS.DVDRip.XviD-DELiCiOUS.avi">The.Big.Bang.Theory.S03E04.Fuer.ihn.oder.mit.ihm.German.WS.DVDRip.XviD-DELiCiOUS.avi</a></td><td align="right">2014-05-16 17:16 </td><td align="right">175M</td><td> </td></tr>
|
||||||
|
<tr><td valign="top"><img src="/icons/movie.gif" alt="[VID]"></td><td><a href="The.Big.Bang.Theory.S03E05.Der.Mann.der.seine.Omi.liebte.German.WS.DVDRip.XviD-DELiCiOUS.avi">The.Big.Bang.Theory.S03E05.Der.Mann.der.seine.Omi.liebte.German.WS.DVDRip.XviD-DELiCiOUS.avi</a></td><td align="right">2014-05-16 17:17 </td><td align="right">174M</td><td> </td></tr>
|
||||||
|
<tr><td valign="top"><img src="/icons/movie.gif" alt="[VID]"></td><td><a href="The.Big.Bang.Theory.S03E06.Football.fuer.Nerds.German.WS.DVDRip.XviD-DELiCiOUS.avi">The.Big.Bang.Theory.S03E06.Football.fuer.Nerds.German.WS.DVDRip.XviD-DELiCiOUS.avi</a></td><td align="right">2014-05-16 17:17 </td><td align="right">175M</td><td> </td></tr>
|
||||||
|
<tr><td valign="top"><img src="/icons/movie.gif" alt="[VID]"></td><td><a href="The.Big.Bang.Theory.S03E07.Der.Gitarrist.auf.der.Couch.German.WS.DVDRip.XviD-DELiCiOUS.avi">The.Big.Bang.Theory.S03E07.Der.Gitarrist.auf.der.Couch.German.WS.DVDRip.XviD-DELiCiOUS.avi</a></td><td align="right">2014-05-16 17:18 </td><td align="right">175M</td><td> </td></tr>
|
||||||
|
<tr><td valign="top"><img src="/icons/movie.gif" alt="[VID]"></td><td><a href="The.Big.Bang.Theory.S03E08.Das.Suppentattoo.German.WS.DVDRip.XviD-DELiCiOUS.avi">The.Big.Bang.Theory.S03E08.Das.Suppentattoo.German.WS.DVDRip.XviD-DELiCiOUS.avi</a></td><td align="right">2014-05-16 17:18 </td><td align="right">174M</td><td> </td></tr>
|
||||||
|
<tr><td valign="top"><img src="/icons/movie.gif" alt="[VID]"></td><td><a href="The.Big.Bang.Theory.S03E09.Die.Racheformel.German.WS.DVDRip.XviD-DELiCiOUS.avi">The.Big.Bang.Theory.S03E09.Die.Racheformel.German.WS.DVDRip.XviD-DELiCiOUS.avi</a></td><td align="right">2014-05-16 17:20 </td><td align="right">174M</td><td> </td></tr>
|
||||||
|
<tr><td valign="top"><img src="/icons/movie.gif" alt="[VID]"></td><td><a href="The.Big.Bang.Theory.S03E10.Das.Gorilla.Projekt.German.WS.DVDRip.XviD-DELiCiOUS.avi">The.Big.Bang.Theory.S03E10.Das.Gorilla.Projekt.German.WS.DVDRip.XviD-DELiCiOUS.avi</a></td><td align="right">2014-05-16 17:20 </td><td align="right">174M</td><td> </td></tr>
|
||||||
|
<tr><td valign="top"><img src="/icons/movie.gif" alt="[VID]"></td><td><a href="The.Big.Bang.Theory.S03E11.Maedels.an.der.Bar.German.WS.DVDRip.XviD-DELiCiOUS.avi">The.Big.Bang.Theory.S03E11.Maedels.an.der.Bar.German.WS.DVDRip.XviD-DELiCiOUS.avi</a></td><td align="right">2014-05-16 17:21 </td><td align="right">175M</td><td> </td></tr>
|
||||||
|
<tr><td valign="top"><img src="/icons/movie.gif" alt="[VID]"></td><td><a href="The.Big.Bang.Theory.S03E12.Howards.Phasen.German.WS.DVDRip.XviD-DELiCiOUS.avi">The.Big.Bang.Theory.S03E12.Howards.Phasen.German.WS.DVDRip.XviD-DELiCiOUS.avi</a></td><td align="right">2014-05-16 17:21 </td><td align="right">175M</td><td> </td></tr>
|
||||||
|
<tr><td valign="top"><img src="/icons/movie.gif" alt="[VID]"></td><td><a href="The.Big.Bang.Theory.S03E13.Terror.in.der.Oestadt.der.Rosen.German.WS.DVDRip.XviD-DELiCiOUS.avi">The.Big.Bang.Theory.S03E13.Terror.in.der.Oestadt.der.Rosen.German.WS.DVDRip.XviD-DELiCiOUS.avi</a></td><td align="right">2014-05-16 17:22 </td><td align="right">175M</td><td> </td></tr>
|
||||||
|
<tr><td valign="top"><img src="/icons/movie.gif" alt="[VID]"></td><td><a href="The.Big.Bang.Theory.S03E14.Fast.wie.Einstein.German.WS.DVDRip.XviD-DELiCiOUS.avi">The.Big.Bang.Theory.S03E14.Fast.wie.Einstein.German.WS.DVDRip.XviD-DELiCiOUS.avi</a></td><td align="right">2014-05-16 17:23 </td><td align="right">175M</td><td> </td></tr>
|
||||||
|
<tr><td valign="top"><img src="/icons/movie.gif" alt="[VID]"></td><td><a href="The.Big.Bang.Theory.S03E15.Freiflug.nach.Genf.German.WS.DVDRip.XviD-DELiCiOUS.avi">The.Big.Bang.Theory.S03E15.Freiflug.nach.Genf.German.WS.DVDRip.XviD-DELiCiOUS.avi</a></td><td align="right">2014-05-16 17:24 </td><td align="right">175M</td><td> </td></tr>
|
||||||
|
<tr><td valign="top"><img src="/icons/movie.gif" alt="[VID]"></td><td><a href="The.Big.Bang.Theory.S03E16.Sheldon.pro.se.German.WS.DVDRip.XviD-DELiCiOUS.avi">The.Big.Bang.Theory.S03E16.Sheldon.pro.se.German.WS.DVDRip.XviD-DELiCiOUS.avi</a></td><td align="right">2014-05-16 17:24 </td><td align="right">175M</td><td> </td></tr>
|
||||||
|
<tr><td valign="top"><img src="/icons/movie.gif" alt="[VID]"></td><td><a href="The.Big.Bang.Theory.S03E17.Die.Herren.des.Rings.German.WS.DVDRip.XviD-DELiCiOUS.avi">The.Big.Bang.Theory.S03E17.Die.Herren.des.Rings.German.WS.DVDRip.XviD-DELiCiOUS.avi</a></td><td align="right">2014-05-16 17:25 </td><td align="right">175M</td><td> </td></tr>
|
||||||
|
<tr><td valign="top"><img src="/icons/movie.gif" alt="[VID]"></td><td><a href="The.Big.Bang.Theory.S03E18.Die.dunkle.Seite.des.Mondes.German.WS.DVDRip.XviD-DELiCiOUS.avi">The.Big.Bang.Theory.S03E18.Die.dunkle.Seite.des.Mondes.German.WS.DVDRip.XviD-DELiCiOUS.avi</a></td><td align="right">2014-05-16 17:25 </td><td align="right">175M</td><td> </td></tr>
|
||||||
|
<tr><td valign="top"><img src="/icons/movie.gif" alt="[VID]"></td><td><a href="The.Big.Bang.Theory.S03E19.Das.L.Wort.German.WS.DVDRip.XviD-DELiCiOUS.avi">The.Big.Bang.Theory.S03E19.Das.L.Wort.German.WS.DVDRip.XviD-DELiCiOUS.avi</a></td><td align="right">2014-05-16 17:27 </td><td align="right">175M</td><td> </td></tr>
|
||||||
|
<tr><td valign="top"><img src="/icons/movie.gif" alt="[VID]"></td><td><a href="The.Big.Bang.Theory.S03E20.Spaghetti.mit.Wuerstchen.German.WS.DVDRip.XviD-DELiCiOUS.avi">The.Big.Bang.Theory.S03E20.Spaghetti.mit.Wuerstchen.German.WS.DVDRip.XviD-DELiCiOUS.avi</a></td><td align="right">2014-05-16 17:27 </td><td align="right">175M</td><td> </td></tr>
|
||||||
|
<tr><td valign="top"><img src="/icons/movie.gif" alt="[VID]"></td><td><a href="The.Big.Bang.Theory.S03E21.Vierer.ohne.Sheldon.German.WS.DVDRip.XviD-DELiCiOUS.avi">The.Big.Bang.Theory.S03E21.Vierer.ohne.Sheldon.German.WS.DVDRip.XviD-DELiCiOUS.avi</a></td><td align="right">2014-05-16 17:28 </td><td align="right">175M</td><td> </td></tr>
|
||||||
|
<tr><td valign="top"><img src="/icons/movie.gif" alt="[VID]"></td><td><a href="The.Big.Bang.Theory.S03E22.Die.Wahrheit.ueber.den.Fahrstuhl.German.WS.DVDRip.XviD-DELiCiOUS.avi">The.Big.Bang.Theory.S03E22.Die.Wahrheit.ueber.den.Fahrstuhl.German.WS.DVDRip.XviD-DELiCiOUS.avi</a></td><td align="right">2014-05-16 17:28 </td><td align="right">175M</td><td> </td></tr>
|
||||||
|
<tr><td valign="top"><img src="/icons/movie.gif" alt="[VID]"></td><td><a href="The.Big.Bang.Theory.S03E23.Nie.mehr.dumme.Typen.German.WS.DVDRip.XviD-DELiCiOUS.avi">The.Big.Bang.Theory.S03E23.Nie.mehr.dumme.Typen.German.WS.DVDRip.XviD-DELiCiOUS.avi</a></td><td align="right">2014-05-16 17:29 </td><td align="right">174M</td><td> </td></tr>
|
||||||
|
<tr><th colspan="5"><hr></th></tr>
|
||||||
|
</table>
|
||||||
|
<address>Apache/2.4.10 (Debian) Server at jenserserver.no-ip.biz Port 80</address>
|
||||||
|
</body></html>
|
@ -23,7 +23,7 @@ function drawCharts(rData) {
|
|||||||
|
|
||||||
for(var ext in rData["ext_sizes"]) {
|
for(var ext in rData["ext_sizes"]) {
|
||||||
//Ignore file sizes below 0.5%
|
//Ignore file sizes below 0.5%
|
||||||
if (rData["ext_sizes"][ext] < 0.005 * rData["total_size"]) {
|
if (!isRelevant(rData, ext)) {
|
||||||
|
|
||||||
otherSize += rData["ext_sizes"][ext];
|
otherSize += rData["ext_sizes"][ext];
|
||||||
otherCount += rData["ext_count"][ext];
|
otherCount += rData["ext_count"][ext];
|
||||||
@ -40,6 +40,7 @@ function drawCharts(rData) {
|
|||||||
colors.push(getRandomColor());
|
colors.push(getRandomColor());
|
||||||
labels.push("other x" + otherCount + " (" + humanFileSize(otherSize) + ")");
|
labels.push("other x" + otherCount + " (" + humanFileSize(otherSize) + ")");
|
||||||
dataSetSize.push(otherSize);
|
dataSetSize.push(otherSize);
|
||||||
|
dataSetCount.push(otherCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
var ctx = document.getElementById('typesChart').getContext('2d');
|
var ctx = document.getElementById('typesChart').getContext('2d');
|
||||||
@ -64,6 +65,23 @@ function drawCharts(rData) {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
function isRelevant(rData, ext) {
|
||||||
|
|
||||||
|
console.log("Checking + " + ext);
|
||||||
|
console.log("total + " + rData["total_size"]);
|
||||||
|
console.log("size + " + rData["ext_count"][ext]);
|
||||||
|
console.log("min + " + 0.03 * rData["total_count"]);
|
||||||
|
|
||||||
|
if(rData["total_size"] === 0) {
|
||||||
|
return rData["ext_count"][ext] > 0.03 * rData["total_count"]
|
||||||
|
} else {
|
||||||
|
return rData["ext_sizes"][ext] > 0.005 * rData["total_size"]
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* https://stackoverflow.com/questions/1484506
|
* https://stackoverflow.com/questions/1484506
|
||||||
*/
|
*/
|
||||||
@ -80,6 +98,11 @@ function getRandomColor() {
|
|||||||
* https://stackoverflow.com/questions/10420352
|
* https://stackoverflow.com/questions/10420352
|
||||||
*/
|
*/
|
||||||
function humanFileSize(bytes) {
|
function humanFileSize(bytes) {
|
||||||
|
|
||||||
|
if(bytes === 0) {
|
||||||
|
return "? B"
|
||||||
|
}
|
||||||
|
|
||||||
var thresh = 1000;
|
var thresh = 1000;
|
||||||
if(Math.abs(bytes) < thresh) {
|
if(Math.abs(bytes) < thresh) {
|
||||||
return bytes + ' B';
|
return bytes + ' B';
|
||||||
|
Loading…
x
Reference in New Issue
Block a user