Fixed bugs, enhanced parser

This commit is contained in:
simon 2018-02-05 22:05:07 -05:00
parent f3dc1445e4
commit 23775ec126
12 changed files with 459 additions and 80 deletions

View File

@ -2,19 +2,41 @@ import requests
from parser import NginxParser, ApacheParser
from reports import ReportSaver, ReportBuilder
headers = {
'User-Agent': "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
}
class Crawler:
def __init__(self, url):
self.parser = NginxParser()
def __init__(self, url, test_url):
self.files = []
self.base_url = url
if test_url:
# Test url
r = requests.get(self.base_url, timeout=30)
self.parser = self.guess_parser(r.text, r.headers)()
print("Using " + self.parser.__class__.__name__ + " as parser")
else:
self.parser = None
@staticmethod
def guess_parser(text, headers):
server = headers["Server"] if "Server" in headers else ""
# try nginx
parser = NginxParser()
if parser.page_is_valid(text):
return NginxParser
# Try apache
parser = ApacheParser()
if parser.page_is_valid(text):
return ApacheParser
return None
def crawl(self, address=None):
if address is None:
@ -53,6 +75,7 @@ class Crawler:
f.write(report_saver.to_link_list())
c = Crawler("http://dl.upload8.in/files/Serial/Altered%20Carbon/")
c.crawl()
c.store_report("000002")
if __name__ == "__main__":
c = Crawler("https://repo.zenk-security.com/", True)
c.crawl()
c.store_report("000007")

182
parser.py
View File

@ -1,14 +1,43 @@
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import os
import re
from urllib.parse import urljoin
import humanfriendly
from bs4 import BeautifulSoup
class PageParser:
def __init__(self):
self.col_start = None
self.col_end = None
self.size_unknown = True
def get_links(self, text: str, base_url: str):
raise NotImplementedError()
@staticmethod
def get_size_columns(cols):
for i in range(len(cols)):
if i == len(cols) - 1:
try:
humanfriendly.parse_size(cols[i])
return tuple([i, i])
except humanfriendly.InvalidSize:
return None
try:
humanfriendly.parse_size(cols[i] + cols[i + 1])
return tuple([i, i + 1])
except humanfriendly.InvalidSize:
try:
humanfriendly.parse_size(cols[i])
return tuple([i, i])
except humanfriendly.InvalidSize:
continue
@staticmethod
def get_parser_type(headers):
"""Get appropriate parser type for a a server based on its header"""
@ -26,26 +55,82 @@ class PageParser:
@staticmethod
def file_type(link):
return "d" if link.endswith("/") else "f"
if link.endswith("/") or link.startswith("?"):
return "d"
return "f"
@staticmethod
def clean_page(text):
text = text.replace("<A", "<a")
text = text.replace("</A", "</a")
# text = text.replace("&", "&amp;")
text = text.replace("<hr>", "")
return text
def get_size(self, cols):
# Figure out which column(s) is the size one
size_cols = self.get_size_columns(cols)
if size_cols is not None:
col_start, col_end = size_cols
self.size_unknown = False
size_human = cols[col_start] if col_start == col_end else cols[col_start] + cols[col_end]
try:
size = humanfriendly.parse_size(size_human)
except humanfriendly.InvalidSize:
size = 0
else:
size = 0
return size
class NginxParser(PageParser):
def get_links(self, text, base_url: str):
links = dict()
soup = BeautifulSoup(text, "html.parser")
# Handle weird character formats and tag names
text = text.replace("<A", "<a")
text = text.replace("</A", "</a")
text = text.replace("&", "&amp;")
text = self.clean_page(text)
soup = BeautifulSoup(text, "html.parser")
for link in soup.find("pre").find_all("a"):
if link.text != "../":
parsed_link = self.parse_link(link, text, base_url)
if parsed_link is not None:
links[parsed_link[0]] = parsed_link[1]
return links
def page_is_valid(self, text):
# Handle weird character formats and tag names
text = self.clean_page(text)
soup = BeautifulSoup(text, "html.parser")
if soup.find("pre") is None:
return False
# try to parse a single link
for link in soup.find("pre").find_all("a"):
if PageParser.should_save_link(link.text):
if self.parse_link(link, text, "") is None:
return False
return True
def parse_link(self, link, text, base_url):
try:
if PageParser.should_save_link(link.text):
target = link.get("href")
full_link = urljoin(base_url, target)
file_type = PageParser.file_type(full_link)
file_type = PageParser.file_type(target)
if file_type == "f":
extension = os.path.splitext(full_link)[1].strip(".")
@ -53,46 +138,30 @@ class NginxParser(PageParser):
# Parse size
target_index = text.find("</a", text.find(target))
date_and_size = text[target_index:text.find("<a", target_index)]
size = humanfriendly.parse_size(re.split("\s+", date_and_size)[3])
links[link.text] = dict(link=full_link, size=size, ext=extension, type=file_type)
cols = re.split("\s+", date_and_size)
size = self.get_size(cols)
return target, dict(link=full_link, size=size, ext=extension, type=file_type)
else:
links[link.text] = dict(link=full_link, type=file_type)
return target, dict(link=full_link, type=file_type)
except Exception as e:
print("Couldn't parse link " + link.get("href") + str(e))
raise e
return links
return None
class ApacheParser(PageParser):
def __init__(self):
self.col_start = None
self.col_end = None
self.size_unknown = True
def get_size_columns(self, cols):
for i in range(len(cols) - 1):
try:
humanfriendly.parse_size(cols[i] + cols[i + 1])
return tuple([i, i + 1])
except humanfriendly.InvalidSize:
try:
humanfriendly.parse_size(cols[i])
return tuple([i, i])
except humanfriendly.InvalidSize:
continue
def get_links(self, text, base_url: str):
links = dict()
soup = BeautifulSoup(text, "html.parser")
# Handle weird character formats and tag names
text = text.replace("<A", "<a")
text = text.replace("</A", "</a")
text = text.replace("&", "&amp;")
text = self.clean_page(text)
soup = BeautifulSoup(text, "html.parser")
if soup.find("table"):
@ -109,20 +178,20 @@ class ApacheParser(PageParser):
if PageParser.should_save_link(link.text):
target = link.get("href")
file_type = PageParser.file_type(target)
full_link = urljoin(base_url, target)
file_type = PageParser.file_type(full_link)
if file_type == "f":
extension = os.path.splitext(full_link)[1].strip(".")
cols = row.find_all("td")
for i in range(len(cols)):
cols[i] = cols[i].string if cols[i].string is not None else ""
cols[i] = cols[i].string if cols[i].string is not None else "-"
size = self.get_size(cols)
links[link.text] = dict(link=full_link, size=size, ext=extension, type=file_type)
links[target] = dict(link=full_link, size=size, ext=extension, type=file_type)
else:
links[link.text] = dict(link=full_link, type=file_type)
links[target] = dict(link=full_link, type=file_type)
else:
for link in soup.find_all("a"):
@ -131,36 +200,33 @@ class ApacheParser(PageParser):
target = link.get("href")
full_link = urljoin(base_url, target)
file_type = PageParser.file_type(full_link)
file_type = PageParser.file_type(target)
if file_type == "f":
extension = os.path.splitext(full_link)[1].strip(".")
target_index = text.find("</a", text.find(target))
date_and_size = text[target_index:text.find("<a", target_index)]
date_and_size = text[target_index:text.find("<a", target_index)] # in some cases we,re looking for </pre instead
date_and_size = text[target_index:text.find("</pre", target_index)] if text.find("<a", target_index) == -1 else date_and_size
cols = re.split("\s+", date_and_size)
size = self.get_size(cols)
links[link.text] = dict(link=full_link, size=size, ext=extension, type=file_type)
links[target] = dict(link=full_link, size=size, ext=extension, type=file_type)
else:
links[link.text] = dict(link=full_link, type=file_type)
links[target] = dict(link=full_link, type=file_type)
return links
def get_size(self, cols):
if self.col_start is None:
# Figure out which column(s) is the size one
size_cols = self.get_size_columns(cols)
if size_cols is not None:
self.col_start, self.col_end = size_cols
self.size_unknown = False
def page_is_valid(self, text):
try:
links = self.get_links(text, "")
print(links)
return True
except Exception as e:
print("This is not recognised Apache open directory: " + str(e))
if self.size_unknown:
size = 0
else:
size_human = cols[self.col_start] if self.col_start == self.col_end else cols[self.col_start] + cols[self.col_end]
size = humanfriendly.parse_size(size_human)
return size

View File

@ -1,4 +1,58 @@
import os
import json
class CrawTask:
def __init__(self, url, post_id, title):
self.url = url
self.post_id = post_id
self.post_title = title
class TaskQueue:
def __init__(self, file):
self.file = file
self.tasks = []
if os.path.isfile(self.file):
with open(self.file, "r") as f:
json_tasks = json.load(f)
for task in json_tasks:
self.tasks.append(CrawTask(task["url"], task["post_id"], task["post_title"]))
def push(self, task):
self.tasks.append(task)
self.update_file()
def pop(self):
if len(self.tasks) > 0:
t = self.tasks.pop()
self.update_file()
else:
t = None
return t
def update_file(self):
with open(self.file, "w") as f:
json.dump(self.tasks, f, default=dumper)
def is_queued(self, post_id):
for task in self.tasks:
if task.post_id == post_id:
return True
return False
def dumper(obj):
return obj.__dict__
class RedditBot:
@ -11,8 +65,7 @@ class RedditBot:
self.crawled = []
else:
with open(log_file, "r") as f:
self.crawled = f.read().split("\n")
self.crawled = list(filter(None, self.crawled))
self.crawled = list(filter(None, f.read().split("\n")))
def log_crawl(self, post_id):

View File

@ -91,6 +91,7 @@ class ReportSaver:
out["ext_sizes"] = self.builder.get_ext_sizes()
out["ext_sizes_formatted"] = self.builder.get_ext_sizes_formatted()
out["report_time"] = str(self.builder.report_time)
out["total_count"] = len(self.builder.files)
return json.dumps(out)
@ -103,6 +104,7 @@ class ReportSaver:
out["ext_count"] = self.builder.get_ext_counts()
out["ext_sizes"] = self.builder.get_ext_sizes()
out["report_time"] = str(self.builder.report_time)
out["total_count"] = len(self.builder.files)
return json.dumps(out)

View File

@ -0,0 +1,32 @@
from unittest import TestCase
from parser import ApacheParser, NginxParser
from crawler import Crawler
class CrawlerTest(TestCase):
def test_guess_parser1(self):
with open("test_apache1.html", "r") as f:
text = f.read()
c = Crawler("http://some.website/", False)
self.assertEqual(c.guess_parser(text, {}), ApacheParser)
def test_guess_parser2(self):
with open("test_nginx1.html", "r") as f:
text = f.read()
c = Crawler("http://some.website", False)
self.assertEqual(c.guess_parser(text, {}), NginxParser)
def test_guess_parser3(self):
with open("test_invalid.html", "r") as f:
text = f.read()
c = Crawler("http://some.website", False)
self.assertEqual(c.guess_parser(text, {}), None)

View File

@ -18,7 +18,7 @@ class NginxParserTest(TestCase):
def setUp(self):
self.parser = NginxParser()
root_page_file = open("test_nginx_root.html", "r")
root_page_file = open("test_nginx1.html", "r")
self.root_page = root_page_file.read()
root_page_file.close()
@ -57,7 +57,7 @@ class ApacheParserTest(TestCase):
def setUp(self):
self.parser = ApacheParser()
root_page_file = open("test_apache_root.html", "r")
root_page_file = open("test_apache1.html", "r")
self.root_page = root_page_file.read()
root_page_file.close()
@ -76,7 +76,7 @@ class ApacheParserTest(TestCase):
result = self.parser.get_links(self.root_page, "https://keisari.net/videos/")
self.assertEqual(result["happyday.mp4"]["size"], 772000)
self.assertEqual(result["alex_räjähtää.mp4"]["size"], 715000)
self.assertEqual(result["alex_r%c3%a4j%c3%a4ht%c3%a4%c3%a4.mp4"]["size"], 715000)
def test_link_type(self):
result = self.parser.get_links(self.root_page, "https://keisari.net/videos/")
@ -109,16 +109,67 @@ class ApacheParserTest2(TestCase):
def test_link_size(self):
result = self.parser.get_links(self.root_page, self.base_url)
self.assertEqual(result["ƒhƒ‰ƒSƒ“ƒ{Z.æ020˜b.uæݪ¦éƒTƒCƒ„l“`àIŒåó̃[ƒcv.wmv"]["size"], 179721000)
self.assertEqual(result["ƒhƒ‰ƒSƒ“ƒ{Z.‘æ225˜b.u­¢ºƒ`ƒrƒbƒRIIPWåêíIHv.wmv"]["size"], 347507000)
self.assertEqual(result["ƒhƒ‰ƒSƒ“ƒ{Z.æ011˜b.u‰FˆˆêÌ­íŽmƒTƒCƒ„lß´ßéIv.wmv"]["size"], 232185000)
self.assertEqual(result["ƒhƒ‰ƒSƒ“ƒ{Z.‘æ019˜b.ud—ÍÆÌí¢IƒoƒuƒƒXŒNð©ܦëv.wmv"]["size"], 185385000)
def test_link_type(self):
result = self.parser.get_links(self.root_page, self.base_url)
self.assertEqual(result["ƒhƒ‰ƒSƒ“ƒ{Z.‘æ225˜b.u­¢ºƒ`ƒrƒbƒRIIPWåêíIHv.wmv"]["type"], "f")
self.assertEqual(result["ƒhƒ‰ƒSƒ“ƒ{Z jpg/"]["type"], "d")
self.assertEqual(result["ƒhƒ‰ƒSƒ“ƒ{Z.‘æ011˜b.u‰FˆˆêÌ­íŽmƒTƒCƒ„lß´ßéIv.wmv"]["type"], "f")
self.assertEqual(result["ƒhƒ‰ƒSƒ“ƒ{Z%20jpg/"]["type"], "d")
def test_link_extension(self):
result = self.parser.get_links(self.root_page, self.base_url)
self.assertEqual(result["ƒhƒ‰ƒSƒ“ƒ{Z.æ225˜b.u­¢ºƒ`ƒrƒbƒRIIPWåêíIHv.wmv"]["ext"], "wmv")
self.assertEqual(result["ƒhƒ‰ƒSƒ“ƒ{Z.æ011˜b.u‰FˆˆêÌ­íŽmƒTƒCƒ„lß´ßéIv.wmv"]["ext"], "wmv")
class ApacheParserTest3(TestCase):
def setUp(self):
self.parser = ApacheParser()
root_page_file = open("test_apache3.html", "r")
self.root_page = root_page_file.read()
self.base_url = "http://files.duspectacle.com/mp3/Jardinets/"
root_page_file.close()
def test_link_count(self):
result = self.parser.get_links(self.root_page, self.base_url)
self.assertEqual(len(result), 21)
def test_link_size(self):
result = self.parser.get_links(self.root_page, self.base_url)
self.assertEqual(result["15%20Woodkid%20-%20Iron%20(Remix%20By%20Gucci%20Vump).mp3"]["size"], 9300000)
self.assertEqual(result["16%20Yellow%20Ostrich%20-%20WHALE.mp3"]["size"], 7100000)
def test_link_type(self):
result = self.parser.get_links(self.root_page, self.base_url)
self.assertEqual(result["15%20Woodkid%20-%20Iron%20(Remix%20By%20Gucci%20Vump).mp3"]["type"], "f")
self.assertEqual(result["01%20Jean%20Rochefort%20-%20Winnie%20et%20ses%20amis%20(introduction)/"]["type"], "d")
def test_link_extension(self):
result = self.parser.get_links(self.root_page, self.base_url)
self.assertEqual(result["15%20Woodkid%20-%20Iron%20(Remix%20By%20Gucci%20Vump).mp3"]["ext"], "mp3")
class ApacheParserTest4(TestCase):
def setUp(self):
self.parser = ApacheParser()
root_page_file = open("test_apache4.html", "r")
self.root_page = root_page_file.read()
self.base_url = "http://jenserserver.no-ip.biz/movieserver/serien/bigbangtheorie/S3/"
root_page_file.close()
def test_link_size(self):
result = self.parser.get_links(self.root_page, self.base_url)
self.assertEqual(result["The.Big.Bang.Theory.S03E06.Football.fuer.Nerds.German.WS.DVDRip.XviD-DELiCiOUS.avi"]["size"], 175000000)
self.assertEqual(result["The.Big.Bang.Theory.S03E03.Sex.oder.Pralinen.German.WS.DVDRip.XviD-DELiCiOUS.avi"]["size"], 0)

View File

@ -1,5 +1,5 @@
from unittest import TestCase
from reddit_bot import RedditBot
from reddit_bot import RedditBot, TaskQueue, CrawTask
import os
@ -33,3 +33,62 @@ class RedditBotTest(TestCase):
self.assertTrue(bot.has_crawled("000000"))
class TaskQueueTest(TestCase):
def tearDown(self):
if os.path.isfile("task_queue_test.txt"):
os.remove("task_queue_test.txt")
def test_push_pop_test(self):
if os.path.isfile("task_queue_test.txt"):
os.remove("task_queue_test.txt")
tq = TaskQueue("task_queue_test.txt")
tq.push(CrawTask("http://awebsite.com/", "postid", "a title"))
task1 = tq.pop()
self.assertEqual(tq.pop(), None)
self.assertEqual(task1.url, "http://awebsite.com/")
self.assertEqual(task1.post_id, "postid")
def test_persistence(self):
if os.path.isfile("task_queue_test.txt"):
os.remove("task_queue_test.txt")
tq = TaskQueue("task_queue_test.txt")
tq.push(CrawTask("http://awebsite.com/", "postid", "a title"))
tq2 = TaskQueue("task_queue_test.txt")
task = tq2.pop()
self.assertEqual(task.url, "http://awebsite.com/")
self.assertEqual(task.post_id, "postid")
def test_multiple_tasks(self):
if os.path.isfile("task_queue_test.txt"):
os.remove("task_queue_test.txt")
tq = TaskQueue("task_queue_test.txt")
tq.push(CrawTask("http://awebsite.com/", "postid", "a title"))
tq.push(CrawTask("http://awebsite.com/", "postid", "a title"))
tq.push(CrawTask("http://awebsite.com/", "postid", "a title"))
self.assertIsNotNone(tq.pop())
self.assertIsNotNone(tq.pop())
self.assertIsNotNone(tq.pop())
self.assertIsNone(tq.pop())
def test_is_queued(self):
if os.path.isfile("task_queue_test.txt"):
os.remove("task_queue_test.txt")
tq = TaskQueue("task_queue_test.txt")
tq.push(CrawTask("http://awebsite.com/", "postid", "a title"))
self.assertTrue(tq.is_queued("postid"))
self.assertFalse(tq.is_queued("123456"))

32
spec/test_apache3.html Normal file
View File

@ -0,0 +1,32 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
<html>
<head>
<title>Index of /mp3/Jardinets</title>
</head>
<body>
<h1>Index of /mp3/Jardinets</h1>
<pre><img src="/__ovh_icons/blank.gif" alt="Icon "> <a href="?C=N;O=D">Name</a> <a href="?C=M;O=A">Last modified</a> <a href="?C=S;O=A">Size</a> <a href="?C=D;O=A">Description</a><hr><img src="/__ovh_icons/back.gif" alt="[PARENTDIR]"> <a href="/mp3/">Parent Directory</a> -
<img src="/__ovh_icons/sound2.gif" alt="[SND]"> <a href="01%20Jean%20Rochefort%20-%20Winnie%20et%20ses%20amis%20(introduction)/">01 Jean Rochefort - ..&gt;</a> 2017-12-04 16:33 -
<img src="/__ovh_icons/sound2.gif" alt="[SND]"> <a href="02%20Krisma%20-%20Amore.mp3">02 Krisma - Amore.mp3</a> 2017-12-04 16:32 11M
<img src="/__ovh_icons/sound2.gif" alt="[SND]"> <a href="03%20Bernard%20Estardy%20-%20Cha%20Tatch%20Ka.mp3">03 Bernard Estardy -..&gt;</a> 2017-12-04 16:32 3.5M
<img src="/__ovh_icons/sound2.gif" alt="[SND]"> <a href="04%20Jamie%20Woon%20-%20Street.mp3">04 Jamie Woon - Stre..&gt;</a> 2017-12-04 16:32 5.0M
<img src="/__ovh_icons/sound2.gif" alt="[SND]"> <a href="05%20DyE%20-%20Fantasy.mp3">05 DyE - Fantasy.mp3</a> 2017-12-04 16:33 6.9M
<img src="/__ovh_icons/sound2.gif" alt="[SND]"> <a href="06%20Games%20-%20Planet%20Party.mp3">06 Games - Planet Pa..&gt;</a> 2017-12-04 16:33 5.6M
<img src="/__ovh_icons/sound2.gif" alt="[SND]"> <a href="07%20Yeasayer%20-%20Swallowing%20the%20Decibels.mp3">07 Yeasayer - Swallo..&gt;</a> 2017-12-04 16:33 11M
<img src="/__ovh_icons/sound2.gif" alt="[SND]"> <a href="08%20Pacific!%20-%20Venus%20Rising.mp3">08 Pacific! - Venus ..&gt;</a> 2017-12-04 16:32 5.7M
<img src="/__ovh_icons/sound2.gif" alt="[SND]"> <a href="09%20Jacky%20Chalard%20-%20Super%20Man%20-%20Super%20Cool%20(LP%20Version).mp3">09 Jacky Chalard - S..&gt;</a> 2017-12-04 16:33 11M
<img src="/__ovh_icons/sound2.gif" alt="[SND]"> <a href="10%20Piry%20-%20Heroi%20Moderno.mp3">10 Piry - Heroi Mode..&gt;</a> 2017-12-04 16:32 4.1M
<img src="/__ovh_icons/sound2.gif" alt="[SND]"> <a href="11%20Bahamas%20-%20Bahamas.mp3">11 Bahamas - Bahamas..&gt;</a> 2017-12-04 16:32 7.9M
<img src="/__ovh_icons/sound2.gif" alt="[SND]"> <a href="12%20Aeroplane%20-%20Fish%20In%20The%20Sky.mp3">12 Aeroplane - Fish ..&gt;</a> 2017-12-04 16:32 7.6M
<img src="/__ovh_icons/sound2.gif" alt="[SND]"> <a href="13%20Discodeine%20-%20Synchronize%20(feat%20Jarvis%20Cocker%20-%20radio%20edit).mp3">13 Discodeine - Sync..&gt;</a> 2017-12-04 16:33 6.8M
<img src="/__ovh_icons/sound2.gif" alt="[SND]"> <a href="14%20Lykke%20Li%20-%20I%20Follow%20Rivers%20(the%20Magician%20Remix).mp3">14 Lykke Li - I Foll..&gt;</a> 2017-12-04 16:33 7.3M
<img src="/__ovh_icons/sound2.gif" alt="[SND]"> <a href="15%20Woodkid%20-%20Iron%20(Remix%20By%20Gucci%20Vump).mp3">15 Woodkid - Iron (R..&gt;</a> 2017-12-04 16:33 9.3M
<img src="/__ovh_icons/sound2.gif" alt="[SND]"> <a href="16%20Yellow%20Ostrich%20-%20WHALE.mp3">16 Yellow Ostrich - ..&gt;</a> 2017-12-04 16:33 7.1M
<img src="/__ovh_icons/sound2.gif" alt="[SND]"> <a href="17%20Connan%20Mockasin%20-%20Unicorn%20in%20Uniform.mp3">17 Connan Mockasin -..&gt;</a> 2017-12-04 16:32 6.3M
<img src="/__ovh_icons/sound2.gif" alt="[SND]"> <a href="18%20Bruce%20Haack%20-%20Maybe%20This%20Song.mp3">18 Bruce Haack - May..&gt;</a> 2017-12-04 16:33 5.4M
<img src="/__ovh_icons/image2.gif" alt="[IMG]"> <a href="cover-small.jpg">cover-small.jpg</a> 2017-12-04 16:32 97K
<img src="/__ovh_icons/image2.gif" alt="[IMG]"> <a href="cover.jpg">cover.jpg</a> 2017-12-04 16:33 466K
<img src="/__ovh_icons/text.gif" alt="[TXT]"> <a href="playlist.txt">playlist.txt</a> 2017-12-04 16:33 955
<hr></pre>
</body></html>

38
spec/test_apache4.html Normal file
View File

@ -0,0 +1,38 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
<html>
<head>
<title>Index of /movieserver/serien/bigbangtheorie/S3</title>
</head>
<body>
<h1>Index of /movieserver/serien/bigbangtheorie/S3</h1>
<table>
<tr><th valign="top"><img src="/icons/blank.gif" alt="[ICO]"></th><th><a href="?C=N;O=D">Name</a></th><th><a href="?C=M;O=A">Last modified</a></th><th><a href="?C=S;O=A">Size</a></th><th><a href="?C=D;O=A">Description</a></th></tr>
<tr><th colspan="5"><hr></th></tr>
<tr><td valign="top"><img src="/icons/back.gif" alt="[PARENTDIR]"></td><td><a href="/movieserver/serien/bigbangtheorie/">Parent Directory</a></td><td>&nbsp;</td><td align="right"> - </td><td>&nbsp;</td></tr>
<tr><td valign="top"><img src="/icons/movie.gif" alt="[VID]"></td><td><a href="The.Big.Bang.Theory.S03E01.Der.Nordpol.Plan.German.WS.DVDRip.XviD-DELiCiOUS.avi">The.Big.Bang.Theory.S03E01.Der.Nordpol.Plan.German.WS.DVDRip.XviD-DELiCiOUS.avi</a></td><td align="right">2017-01-17 18:52 </td><td align="right">6.8M</td><td>&nbsp;</td></tr>
<tr><td valign="top"><img src="/icons/movie.gif" alt="[VID]"></td><td><a href="The.Big.Bang.Theory.S03E02.Die.Grillenwette.German.WS.DVDRip.XviD-DELiCiOUS.avi">The.Big.Bang.Theory.S03E02.Die.Grillenwette.German.WS.DVDRip.XviD-DELiCiOUS.avi</a></td><td align="right">2014-05-16 17:14 </td><td align="right">175M</td><td>&nbsp;</td></tr>
<tr><td valign="top"><img src="/icons/movie.gif" alt="[VID]"></td><td><a href="The.Big.Bang.Theory.S03E03.Sex.oder.Pralinen.German.WS.DVDRip.XviD-DELiCiOUS.avi">The.Big.Bang.Theory.S03E03.Sex.oder.Pralinen.German.WS.DVDRip.XviD-DELiCiOUS.avi</a></td><td align="right">2017-01-17 19:38 </td><td align="right"> 0 </td><td>&nbsp;</td></tr>
<tr><td valign="top"><img src="/icons/movie.gif" alt="[VID]"></td><td><a href="The.Big.Bang.Theory.S03E04.Fuer.ihn.oder.mit.ihm.German.WS.DVDRip.XviD-DELiCiOUS.avi">The.Big.Bang.Theory.S03E04.Fuer.ihn.oder.mit.ihm.German.WS.DVDRip.XviD-DELiCiOUS.avi</a></td><td align="right">2014-05-16 17:16 </td><td align="right">175M</td><td>&nbsp;</td></tr>
<tr><td valign="top"><img src="/icons/movie.gif" alt="[VID]"></td><td><a href="The.Big.Bang.Theory.S03E05.Der.Mann.der.seine.Omi.liebte.German.WS.DVDRip.XviD-DELiCiOUS.avi">The.Big.Bang.Theory.S03E05.Der.Mann.der.seine.Omi.liebte.German.WS.DVDRip.XviD-DELiCiOUS.avi</a></td><td align="right">2014-05-16 17:17 </td><td align="right">174M</td><td>&nbsp;</td></tr>
<tr><td valign="top"><img src="/icons/movie.gif" alt="[VID]"></td><td><a href="The.Big.Bang.Theory.S03E06.Football.fuer.Nerds.German.WS.DVDRip.XviD-DELiCiOUS.avi">The.Big.Bang.Theory.S03E06.Football.fuer.Nerds.German.WS.DVDRip.XviD-DELiCiOUS.avi</a></td><td align="right">2014-05-16 17:17 </td><td align="right">175M</td><td>&nbsp;</td></tr>
<tr><td valign="top"><img src="/icons/movie.gif" alt="[VID]"></td><td><a href="The.Big.Bang.Theory.S03E07.Der.Gitarrist.auf.der.Couch.German.WS.DVDRip.XviD-DELiCiOUS.avi">The.Big.Bang.Theory.S03E07.Der.Gitarrist.auf.der.Couch.German.WS.DVDRip.XviD-DELiCiOUS.avi</a></td><td align="right">2014-05-16 17:18 </td><td align="right">175M</td><td>&nbsp;</td></tr>
<tr><td valign="top"><img src="/icons/movie.gif" alt="[VID]"></td><td><a href="The.Big.Bang.Theory.S03E08.Das.Suppentattoo.German.WS.DVDRip.XviD-DELiCiOUS.avi">The.Big.Bang.Theory.S03E08.Das.Suppentattoo.German.WS.DVDRip.XviD-DELiCiOUS.avi</a></td><td align="right">2014-05-16 17:18 </td><td align="right">174M</td><td>&nbsp;</td></tr>
<tr><td valign="top"><img src="/icons/movie.gif" alt="[VID]"></td><td><a href="The.Big.Bang.Theory.S03E09.Die.Racheformel.German.WS.DVDRip.XviD-DELiCiOUS.avi">The.Big.Bang.Theory.S03E09.Die.Racheformel.German.WS.DVDRip.XviD-DELiCiOUS.avi</a></td><td align="right">2014-05-16 17:20 </td><td align="right">174M</td><td>&nbsp;</td></tr>
<tr><td valign="top"><img src="/icons/movie.gif" alt="[VID]"></td><td><a href="The.Big.Bang.Theory.S03E10.Das.Gorilla.Projekt.German.WS.DVDRip.XviD-DELiCiOUS.avi">The.Big.Bang.Theory.S03E10.Das.Gorilla.Projekt.German.WS.DVDRip.XviD-DELiCiOUS.avi</a></td><td align="right">2014-05-16 17:20 </td><td align="right">174M</td><td>&nbsp;</td></tr>
<tr><td valign="top"><img src="/icons/movie.gif" alt="[VID]"></td><td><a href="The.Big.Bang.Theory.S03E11.Maedels.an.der.Bar.German.WS.DVDRip.XviD-DELiCiOUS.avi">The.Big.Bang.Theory.S03E11.Maedels.an.der.Bar.German.WS.DVDRip.XviD-DELiCiOUS.avi</a></td><td align="right">2014-05-16 17:21 </td><td align="right">175M</td><td>&nbsp;</td></tr>
<tr><td valign="top"><img src="/icons/movie.gif" alt="[VID]"></td><td><a href="The.Big.Bang.Theory.S03E12.Howards.Phasen.German.WS.DVDRip.XviD-DELiCiOUS.avi">The.Big.Bang.Theory.S03E12.Howards.Phasen.German.WS.DVDRip.XviD-DELiCiOUS.avi</a></td><td align="right">2014-05-16 17:21 </td><td align="right">175M</td><td>&nbsp;</td></tr>
<tr><td valign="top"><img src="/icons/movie.gif" alt="[VID]"></td><td><a href="The.Big.Bang.Theory.S03E13.Terror.in.der.Oestadt.der.Rosen.German.WS.DVDRip.XviD-DELiCiOUS.avi">The.Big.Bang.Theory.S03E13.Terror.in.der.Oestadt.der.Rosen.German.WS.DVDRip.XviD-DELiCiOUS.avi</a></td><td align="right">2014-05-16 17:22 </td><td align="right">175M</td><td>&nbsp;</td></tr>
<tr><td valign="top"><img src="/icons/movie.gif" alt="[VID]"></td><td><a href="The.Big.Bang.Theory.S03E14.Fast.wie.Einstein.German.WS.DVDRip.XviD-DELiCiOUS.avi">The.Big.Bang.Theory.S03E14.Fast.wie.Einstein.German.WS.DVDRip.XviD-DELiCiOUS.avi</a></td><td align="right">2014-05-16 17:23 </td><td align="right">175M</td><td>&nbsp;</td></tr>
<tr><td valign="top"><img src="/icons/movie.gif" alt="[VID]"></td><td><a href="The.Big.Bang.Theory.S03E15.Freiflug.nach.Genf.German.WS.DVDRip.XviD-DELiCiOUS.avi">The.Big.Bang.Theory.S03E15.Freiflug.nach.Genf.German.WS.DVDRip.XviD-DELiCiOUS.avi</a></td><td align="right">2014-05-16 17:24 </td><td align="right">175M</td><td>&nbsp;</td></tr>
<tr><td valign="top"><img src="/icons/movie.gif" alt="[VID]"></td><td><a href="The.Big.Bang.Theory.S03E16.Sheldon.pro.se.German.WS.DVDRip.XviD-DELiCiOUS.avi">The.Big.Bang.Theory.S03E16.Sheldon.pro.se.German.WS.DVDRip.XviD-DELiCiOUS.avi</a></td><td align="right">2014-05-16 17:24 </td><td align="right">175M</td><td>&nbsp;</td></tr>
<tr><td valign="top"><img src="/icons/movie.gif" alt="[VID]"></td><td><a href="The.Big.Bang.Theory.S03E17.Die.Herren.des.Rings.German.WS.DVDRip.XviD-DELiCiOUS.avi">The.Big.Bang.Theory.S03E17.Die.Herren.des.Rings.German.WS.DVDRip.XviD-DELiCiOUS.avi</a></td><td align="right">2014-05-16 17:25 </td><td align="right">175M</td><td>&nbsp;</td></tr>
<tr><td valign="top"><img src="/icons/movie.gif" alt="[VID]"></td><td><a href="The.Big.Bang.Theory.S03E18.Die.dunkle.Seite.des.Mondes.German.WS.DVDRip.XviD-DELiCiOUS.avi">The.Big.Bang.Theory.S03E18.Die.dunkle.Seite.des.Mondes.German.WS.DVDRip.XviD-DELiCiOUS.avi</a></td><td align="right">2014-05-16 17:25 </td><td align="right">175M</td><td>&nbsp;</td></tr>
<tr><td valign="top"><img src="/icons/movie.gif" alt="[VID]"></td><td><a href="The.Big.Bang.Theory.S03E19.Das.L.Wort.German.WS.DVDRip.XviD-DELiCiOUS.avi">The.Big.Bang.Theory.S03E19.Das.L.Wort.German.WS.DVDRip.XviD-DELiCiOUS.avi</a></td><td align="right">2014-05-16 17:27 </td><td align="right">175M</td><td>&nbsp;</td></tr>
<tr><td valign="top"><img src="/icons/movie.gif" alt="[VID]"></td><td><a href="The.Big.Bang.Theory.S03E20.Spaghetti.mit.Wuerstchen.German.WS.DVDRip.XviD-DELiCiOUS.avi">The.Big.Bang.Theory.S03E20.Spaghetti.mit.Wuerstchen.German.WS.DVDRip.XviD-DELiCiOUS.avi</a></td><td align="right">2014-05-16 17:27 </td><td align="right">175M</td><td>&nbsp;</td></tr>
<tr><td valign="top"><img src="/icons/movie.gif" alt="[VID]"></td><td><a href="The.Big.Bang.Theory.S03E21.Vierer.ohne.Sheldon.German.WS.DVDRip.XviD-DELiCiOUS.avi">The.Big.Bang.Theory.S03E21.Vierer.ohne.Sheldon.German.WS.DVDRip.XviD-DELiCiOUS.avi</a></td><td align="right">2014-05-16 17:28 </td><td align="right">175M</td><td>&nbsp;</td></tr>
<tr><td valign="top"><img src="/icons/movie.gif" alt="[VID]"></td><td><a href="The.Big.Bang.Theory.S03E22.Die.Wahrheit.ueber.den.Fahrstuhl.German.WS.DVDRip.XviD-DELiCiOUS.avi">The.Big.Bang.Theory.S03E22.Die.Wahrheit.ueber.den.Fahrstuhl.German.WS.DVDRip.XviD-DELiCiOUS.avi</a></td><td align="right">2014-05-16 17:28 </td><td align="right">175M</td><td>&nbsp;</td></tr>
<tr><td valign="top"><img src="/icons/movie.gif" alt="[VID]"></td><td><a href="The.Big.Bang.Theory.S03E23.Nie.mehr.dumme.Typen.German.WS.DVDRip.XviD-DELiCiOUS.avi">The.Big.Bang.Theory.S03E23.Nie.mehr.dumme.Typen.German.WS.DVDRip.XviD-DELiCiOUS.avi</a></td><td align="right">2014-05-16 17:29 </td><td align="right">174M</td><td>&nbsp;</td></tr>
<tr><th colspan="5"><hr></th></tr>
</table>
<address>Apache/2.4.10 (Debian) Server at jenserserver.no-ip.biz Port 80</address>
</body></html>

View File

@ -23,7 +23,7 @@ function drawCharts(rData) {
for(var ext in rData["ext_sizes"]) {
//Ignore file sizes below 0.5%
if (rData["ext_sizes"][ext] < 0.005 * rData["total_size"]) {
if (!isRelevant(rData, ext)) {
otherSize += rData["ext_sizes"][ext];
otherCount += rData["ext_count"][ext];
@ -40,6 +40,7 @@ function drawCharts(rData) {
colors.push(getRandomColor());
labels.push("other x" + otherCount + " (" + humanFileSize(otherSize) + ")");
dataSetSize.push(otherSize);
dataSetCount.push(otherCount);
}
var ctx = document.getElementById('typesChart').getContext('2d');
@ -64,6 +65,23 @@ function drawCharts(rData) {
});
}
function isRelevant(rData, ext) {
console.log("Checking + " + ext);
console.log("total + " + rData["total_size"]);
console.log("size + " + rData["ext_count"][ext]);
console.log("min + " + 0.03 * rData["total_count"]);
if(rData["total_size"] === 0) {
return rData["ext_count"][ext] > 0.03 * rData["total_count"]
} else {
return rData["ext_sizes"][ext] > 0.005 * rData["total_size"]
}
}
/**
* https://stackoverflow.com/questions/1484506
*/
@ -80,6 +98,11 @@ function getRandomColor() {
* https://stackoverflow.com/questions/10420352
*/
function humanFileSize(bytes) {
if(bytes === 0) {
return "? B"
}
var thresh = 1000;
if(Math.abs(bytes) < thresh) {
return bytes + ' B';