Initial commit

This commit is contained in:
simon 2018-02-27 16:31:54 -05:00
commit cd09d2b791
45 changed files with 190867 additions and 0 deletions

20327
1001freefonts.com/links.txt Normal file

File diff suppressed because it is too large Load Diff

95
1001freefonts.com/run.py Normal file
View File

@ -0,0 +1,95 @@
import requests
from bs4 import BeautifulSoup
import multiprocessing
import os
headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate, br",
"Upgrade-Insecure-Requests": "1",
"Referer": "https://www.1001freefonts.com/"
}
def request_timeout(url):
while True:
try:
return requests.get(url, timeout=30, headers=headers)
except Exception as e:
print("!", end="", flush=True)
continue
def get_dl_links(url):
print(url)
r = request_timeout(url)
soup = BeautifulSoup(r.text, "html.parser")
for a in soup.findAll("a"):
href = a.get("href")
if href is not None and href.find("/d/") != -1:
with open("links.txt", "a") as f:
f.write(href + "\n")
def get_fonts():
letters = list("abcdefghijklmnopqrstuvwxyz")
letters.append("num")
all_page_links = []
for letter in letters:
print(letter)
r = request_timeout("https://www.1001freefonts.com/" + letter + "fonts.php")
soup = BeautifulSoup(r.text, "html.parser")
page_max = soup.find("div", attrs={"class": "pagingLabelWrapper"})
page_max = page_max.text.split(" ")[-1]
page_max = int(page_max)
print(page_max)
for i in range(1, page_max+1):
all_page_links.append("https://www.1001freefonts.com/" + letter + "fonts" + str(i) + ".php")
pool = multiprocessing.Pool(processes=25)
pool.map(get_dl_links, all_page_links)
def download_font(url):
file_path = "fonts" + url[url.rfind("/"):]
if os.path.exists(file_path):
return
print(file_path)
r = requests.get(url, stream=True, headers=headers)
if r.status_code != 200:
print(r.status_code)
return
with open(file_path, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
def download_all():
pool = multiprocessing.Pool(processes=25)
with open("links.txt", "r") as f:
pool.map(download_font, f.read().splitlines())
# get_fonts()
download_all()

9
README.md Normal file
View File

@ -0,0 +1,9 @@
## Scripts for downloading content from a bunch of websites
### Setup:
```sudo pip3 install python-guerrillamail bs4 pdfkit youtube-dl```
### About
Feel free to contribute or suggest new websites using the Issue feature.

View File

13866
abstractfonts.com/fonts.txt Normal file

File diff suppressed because it is too large Load Diff

View File

122
abstractfonts.com/run.py Normal file
View File

@ -0,0 +1,122 @@
import requests
from bs4 import BeautifulSoup
import multiprocessing
import os
proxy_index = 0
proxies = {
"http": ""
}
headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate, br",
"Upgrade-Insecure-Requests": "1"
}
def already_downloaded(font_id):
with open("downloaded.txt", "r") as f:
return font_id in f.read().splitlines()
def flag_downloaded(font_id):
with open("downloaded.txt", "a") as f:
f.write(font_id + "\n")
def get_new_proxy():
global proxy_index
with open("proxies.txt", "r") as f:
line = f.read().splitlines()[proxy_index]
proxies["http"] = line
print("Switched to proxy " + line)
proxy_index += 1
def request_timeout(url):
while True:
try:
return requests.get(url, timeout=30)
except Exception as e:
print("!", end="", flush=True)
continue
def get_dl_links(url):
print(url)
r = request_timeout(url)
soup = BeautifulSoup(r.text, "html.parser")
for a in soup.findAll("a"):
if a.get("data-font-id") is not None:
with open("fonts.txt", "a") as f:
f.write(a.get("data-font-id") + "\n")
def get_fonts():
letters = list("ABCDEFGHIJKLMNOPQRSTUVWXYZ")
letters.append("Numbers")
all_page_links = []
for letter in letters:
all_page_links.append("http://www.abstractfonts.com/alpha/" + letter)
pool = multiprocessing.Pool(processes=25)
pool.map(get_dl_links, all_page_links)
def download_font(font_id):
if already_downloaded(font_id):
return
while True:
try:
r = requests.get("http://www.abstractfonts.com/download/" + font_id, stream=True, proxies=proxies, headers=headers, timeout=5)
if r.status_code == 404:
print(str(r.status_code) + " - http://www.abstractfonts.com/download/" + font_id)
get_new_proxy()
return
if "Content-Disposition" not in r.headers:
print(r.text)
get_new_proxy()
return
file_path = "fonts/" + r.headers["Content-Disposition"][r.headers["Content-Disposition"].rfind("\"", 0, -2) + 1:-1]
if os.path.exists(file_path):
return
print(file_path)
with open(file_path, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
flag_downloaded(font_id)
break
except:
get_new_proxy()
continue
return
# get_fonts()
get_new_proxy()
pool = multiprocessing.Pool(processes=100)
with open("fonts.txt", "r") as f1:
pool.map(download_font, f1.read().splitlines())

0
craftsy.com/courses.txt Normal file
View File

170
craftsy.com/ripper.py Normal file
View File

@ -0,0 +1,170 @@
import requests
import pathlib
import os
import json
headers_login = {
'User-Agent': "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
"Accept": "application/json, text/plain, */*",
"Content-Type": "application/json",
"Referer": "https://unlimited.craftsy.com/login",
"X-Requested-By": "Craftsy"
}
headers = {
'User-Agent': "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
"Accept": "application/json, text/plain, */*"
}
def login(email, password):
r1 = requests.get("https://unlimited.craftsy.com/login", headers=headers_login)
payload = json.dumps({"email": email, "password": password})
r2 = requests.post("https://api.craftsy.com/login/", data=payload, headers=headers_login, cookies=r1.cookies)
print(r2.text)
return r2
def get_course_info(r_login, course_id):
while True:
try:
r = requests.get("https://api.craftsy.com/m/playlists/" + course_id, headers=headers_login,
cookies=r_login.cookies, timeout=5)
break
except:
print("!", end="", flush=True)
continue
course_info = json.loads(r.text)
return course_info
def get_materials(r_login, course_id):
materials = []
while True:
try:
r = requests.get("https://api.craftsy.com/m/playlists/" + course_id + "/materials", headers=headers_login,
cookies=r_login.cookies, timeout=5)
break
except:
print("!", end="", flush=True)
continue
try:
material_info = json.loads(r.text)
for material in material_info:
materials.append((material["materialName"], material["materialPath"]))
except:
print("Err mat!", end="", flush=True)
return materials
def get_episodes(course_info):
episodes = []
course_name = course_info["name"]
print(course_name)
for episode in course_info["episodes"]:
episodes.append((course_name, episode["name"], episode["episodeId"]))
return episodes
def download_episode(episode, r_login):
while True:
try:
r = requests.get("https://api.craftsy.com/m/videos/secure/episodes/" + str(episode[2]), headers=headers,
cookies=r_login.cookies, timeout=5)
break
except Exception as e:
print("!", end="", flush=True)
continue
episode_info = []
try:
episode_info = json.loads(r.text)
except:
print("Err episode!", end="", flush=True)
for source in episode_info:
if source["format"] == "mp4":
path = episode[0]
print(path + os.sep + str(episode[1]) + ".mp4")
pathlib.Path(path).mkdir(parents=True, exist_ok=True)
if os.path.exists(path + os.sep + str(episode[2]) + " - " + episode[1].replace("/", "") + ".mp4"):
print("Skipping...")
continue
while True:
try:
response = requests.get(source["url"], stream=True, timeout=5)
with open(path + os.sep + str(episode[2]) + " - " + episode[1].replace("/", "") + ".mp4", 'wb') as f:
for chunk in response.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
break
except Exception as e:
print("!", end="", flush=True)
continue
def download_material(r_login, material, course_info):
path = course_info["name"]
print(path + os.sep + material[0] + os.path.splitext(material[1])[1])
pathlib.Path(path).mkdir(parents=True, exist_ok=True)
if os.path.exists(path + os.sep + material[0] + os.path.splitext(material[1])[1]):
print("Skipping...")
return
while True:
try:
response = requests.get(material[1], stream=True, timeout=5, cookies=r_login.cookies)
with open(path + os.sep + material[0] + os.path.splitext(material[1])[1], 'wb') as f:
for chunk in response.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
break
except:
print("!", end="", flush=True)
continue
rLogin = login("", "")
for course in open("courses.txt").read().splitlines():
print(course)
course_info = get_course_info(rLogin, course)
for material in get_materials(rLogin, course):
download_material(rLogin, material, course_info)
print(material)
for episode in get_episodes(course_info):
download_episode(episode, rLogin)
print(episode)

34106
dafont.com/links.txt Normal file

File diff suppressed because it is too large Load Diff

100
dafont.com/run.py Normal file
View File

@ -0,0 +1,100 @@
import requests
from bs4 import BeautifulSoup
import multiprocessing
import os
headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate, br",
"Upgrade-Insecure-Requests": "1",
"Referer": "https://www.dafont.com/"
}
def request_timeout(url):
while True:
try:
return requests.get(url, timeout=30, headers=headers)
except Exception as e:
print("!", end="", flush=True)
continue
def get_dl_links(url):
print(url)
r = request_timeout(url)
soup = BeautifulSoup(r.text, "html.parser")
for a in soup.findAll("a"):
href = a.get("href")
if href is not None and href.startswith("//dl"):
with open("links.txt", "a") as f:
f.write("https://www.dafont.com" + href + "\n")
def get_fonts():
letters = list("abcdefghijklmnopqrstuvwxyz")
letters.append("%23")
page_links = []
all_page_links = []
for letter in letters:
print(letter)
r = request_timeout("https://www.dafont.com/alpha.php?lettre=" + letter)
soup = BeautifulSoup(r.text, "html.parser")
for a in soup.findAll("a"):
if a.get("href") is not None and a.get("href").find("&page=") != -1:
page_links.append("https://" + a.get("href"))
page_max = page_links[-2]
page_max = int(page_max[page_max.rfind("=") + 1:])
print(page_max)
for i in range(1, page_max+1):
all_page_links.append("https://www.dafont.com/alpha.php?lettre=" + letter + "&page=" + str(i))
pool = multiprocessing.Pool(processes=25)
pool.map(get_dl_links, all_page_links)
def download_font(url):
file_path = "fonts/" + url[url.rfind("/")+4:] + ".zip"
if os.path.exists(file_path):
return
print(file_path)
r = requests.get(url, stream=True, headers=headers)
if r.status_code != 200:
print(r.status_code)
return
with open(file_path, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
def download_all():
pool = multiprocessing.Pool(processes=25)
with open("links.txt", "r") as f:
pool.map(download_font, f.read().splitlines())
# get_fonts()
download_all()

65
fontfabric.com/run.py Normal file
View File

@ -0,0 +1,65 @@
import requests
from bs4 import BeautifulSoup
import multiprocessing
fonts = []
def request_timeout(url):
while True:
try:
return requests.get(url, timeout=30)
except Exception as e:
print("!", end="", flush=True)
continue
def get_fonts():
for page in range(1, 4):
r = request_timeout("http://www.fontfabric.com/category/free/page/" + str(page))
soup = BeautifulSoup(r.text, "html.parser")
for link in soup.find("div", attrs={"class": "recent-leads fix"}).findAll("a"):
href = link.get("href")
if href is not None and href not in fonts and href.find("#") == -1 and href.find("category/") == -1:
fonts.append(link.get("href"))
print(len(fonts))
def download_font(url):
r = request_timeout(url)
soup = BeautifulSoup(r.text, "html.parser")
for a in soup.findAll("a"):
onclick = a.get("onclick")
if onclick is not None and onclick.startswith("window.location"):
dl_link = "http://www.fontfabric.com" + onclick[onclick.find("'")+1:onclick.rfind("'")]
file_path = "fonts" + dl_link[dl_link.rfind("/"):]
r_dl = requests.get(dl_link, stream=True, cookies=r.cookies)
if r_dl.status_code != 200:
print(r_dl.status_code)
return
print(file_path)
with open(file_path, 'wb') as f:
for chunk in r_dl.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
def download_all():
pool = multiprocessing.Pool(processes=25)
pool.map(download_font, fonts)
get_fonts()
download_all()

7458
fontfreak.com/fonts.txt Normal file

File diff suppressed because it is too large Load Diff

72
fontfreak.com/run.py Normal file
View File

@ -0,0 +1,72 @@
import requests
from bs4 import BeautifulSoup
import multiprocessing
import os
fonts = []
def request_timeout(url):
while True:
try:
return requests.get(url, timeout=30)
except Exception as e:
print("!", end="", flush=True)
continue
def get_fonts():
letters = list("abcdefghijklmnopqrstuvwxyz")
letters.append("no")
pool = multiprocessing.Pool(processes=25)
pool.map(get_dl_links, letters)
def get_dl_links(letter):
for page in range(1, 11):
r = request_timeout("http://www.fontfreak.com/fonts-" + letter + str(page) + ".htm")
soup = BeautifulSoup(r.text, "html.parser")
for a in soup.findAll("a"):
if a.text is not None and a.text == "click here to download":
with open("fonts.txt", "a") as f:
f.write("http://www.fontfreak.com/" + a.get("href") + "\n")
def download_font(url):
r = request_timeout(url)
soup = BeautifulSoup(r.text, "html.parser")
dl_link = soup.find("a", attrs={"title": "DOWNLOAD FONT"})
if dl_link is not None:
dl_url = "http://www.fontfreak.com/" + dl_link.get("href")
file_path = "fonts/" + dl_url[dl_url.rfind("/")+1:]
if os.path.exists(file_path):
return
r = requests.get(dl_url, stream=True)
print(file_path)
with open(file_path, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
else:
print("no dl" + url)
get_fonts()
pool = multiprocessing.Pool(processes=25)
with open("fonts.txt", "r") as f:
pool.map(download_font, f.read().splitlines())

37570
fontmeme.com/fonts.txt Normal file

File diff suppressed because it is too large Load Diff

0
fontmeme.com/proxies.txt Normal file
View File

115
fontmeme.com/run.py Normal file
View File

@ -0,0 +1,115 @@
import requests
from bs4 import BeautifulSoup
import multiprocessing
import os
proxy_index = 0
headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate, br",
"Upgrade-Insecure-Requests": "1"
}
proxies = {
'https': '',
}
def request_timeout(url):
while True:
try:
return requests.get(url, timeout=30)
except Exception as e:
print("!", end="", flush=True)
continue
def get_fonts():
for i in range(3758):
print(i)
r = request_timeout("https://fontmeme.com/fonts/page/" + str(i))
soup = BeautifulSoup(r.text, "html.parser")
for div in soup.findAll("div"):
if div.get("id") is not None and div.get("id") == "ptitle":
for child in div.children:
if child.get("href") is not None:
with open("fonts.txt", "a") as f:
f.write(child.get("href") + '\n')
def get_new_proxy():
global proxy_index
with open("proxies.txt", "r") as f:
line = f.read().splitlines()[proxy_index]
proxies["https"] = line
print("Switched to proxy " + line)
proxy_index += 1
def download_font(font_url):
file_path = "fonts/" + font_url[font_url[:-1].rfind("/")+1:-6] + ".zip"
if os.path.exists(file_path):
return
r1 = request_timeout(font_url)
dl_link_index = r1.text.find("https://fontmeme.com/fonts/download/")
if dl_link_index != -1:
dl_link = r1.text[dl_link_index: r1.text.find("'", dl_link_index)]
headers["Referer"] = font_url
try:
r = requests.get(dl_link, stream=True, headers=headers, proxies=proxies, cookies=r1.cookies, timeout=10)
except:
get_new_proxy()
return
if r.status_code != 200:
print(r.status_code)
return
reached_limit = False
with open(file_path, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
with open(file_path, "rb") as f:
if f.read().find(b"PK") != 0:
reached_limit = True
if reached_limit:
os.remove(file_path)
print("You have reached the maximum permitted downloads")
get_new_proxy()
def download_all():
pool = multiprocessing.Pool(processes=100)
with open("fonts.txt", "r") as f:
pool.map(download_font, f.read().splitlines())
# get_fonts()
# get_new_proxy()
download_all()

27390
fontspace.com/fonts.txt Normal file

File diff suppressed because it is too large Load Diff

111
fontspace.com/run.py Normal file
View File

@ -0,0 +1,111 @@
import requests
from bs4 import BeautifulSoup
import multiprocessing
import os
from urllib.parse import urljoin
headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate, br",
"Upgrade-Insecure-Requests": "1",
"Referer": "http://www.fontspace.com"
}
def request_timeout(url):
while True:
try:
return requests.get(url, timeout=30)
except Exception as e:
print("!", end="", flush=True)
continue
def get_dl_links(page_url):
print(page_url)
r_page = request_timeout(page_url)
soup_page = BeautifulSoup(r_page.text, "html.parser")
for dl_link in soup_page.findAll("a", attrs={"class": "box-button transparent"}):
with open("fonts.txt", "a") as f:
f.write(dl_link.get("href") + "\n")
def get_fonts():
lists = list("abcdefghijklmnopqrstuvwxyz")
lists.append("letter")
page_links = []
for page in lists:
print(page)
r = request_timeout("http://www.fontspace.com/list/" + page)
soup = BeautifulSoup(r.text, "html.parser")
for a in soup.findAll("a"):
if a.get("href") is not None and a.get("href").find("?p=") != -1:
page_links.append(a.get("href"))
page_max = page_links[-2]
page_max = int(page_max[page_max.rfind("=") + 1:])
print(page_max)
for i in range(1, page_max):
page_links.append("http://www.fontspace.com/list/" + page + "?p=" + str(i))
pool = multiprocessing.Pool(processes=25)
pool.map(get_dl_links, page_links)
def download_font(dl_url):
full_url = urljoin("http://www.fontspace.com", dl_url)
file_path = "fonts" + full_url[full_url.rfind("/"):]
if os.path.exists(file_path):
return
print(file_path)
r = requests.get(full_url, stream=True, headers=headers, cookies=cookies)
if r.status_code != 200:
print(r.status_code)
return
with open(file_path, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
def get_cookie():
r = request_timeout("http://www.fontspace.com/list/a?text=&p=2")
return r.cookies
def download_all(cookies):
pool = multiprocessing.Pool(processes=25)
with open("fonts.txt", "r") as f:
pool.map(download_font, f.read().splitlines())
# get_fonts()
cookies = get_cookie()
download_all(cookies)

47830
fontstruct.com/fonts.txt Normal file

File diff suppressed because it is too large Load Diff

142
fontstruct.com/run.py Normal file
View File

@ -0,0 +1,142 @@
import requests
from bs4 import BeautifulSoup
import multiprocessing
import os
username = ""
password = ""
headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate, br",
"Upgrade-Insecure-Requests": "1",
"Referer": "https://fontstruct.com/",
"Connection": "keep-alive"
}
font_ids = []
def request_timeout(url):
while True:
try:
return requests.get(url, timeout=30, headers=headers)
except Exception as e:
print("!", end="", flush=True)
continue
def login():
r1 = request_timeout("https://fontstruct.com/login")
print(r1.cookies)
login_headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate, br",
"Upgrade-Insecure-Requests": "1",
"Referer": "https://fontstruct.com/login",
"Connection": "keep-alive"
}
payload = {"_username": username, "_password": password, "_csrf_token": "", "_submit": "Sign+In"}
r = requests.post("https://fontstruct.com/login_check", headers=login_headers, data=payload, cookies=r1.cookies)
print(r.cookies)
print(len(r.text))
print(r.headers)
return r.history[0]
def get_font_ids(page_url):
print(page_url)
r = request_timeout(page_url)
soup = BeautifulSoup(r.text, "html.parser")
for a in soup.findAll("a"):
href = a.get("href")
if href is not None and href.startswith("/fontstructions") and href.find("/license/") == -1 and\
href.find("/vote_breakdown/") == -1:
font_id = href[href.find("show/")+5:href.rfind("/")]
if font_id not in font_ids:
font_ids.append(font_id)
with open("fonts.txt", "a") as f:
f.write(font_id + "\n")
def get_fonts():
page_urls = []
for page_num in range(1, 1428):
page_urls.append("https://fontstruct.com/gallery?filters=all&page=" + str(page_num))
pool = multiprocessing.Pool(processes=25)
pool.map(get_font_ids, page_urls)
def download_font(font_id):
dl_headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate, br",
"Upgrade-Insecure-Requests": "1",
"Referer": "https://fontstruct.com/fontstructions/download/" + font_id,
"Connection": "keep-alive"
}
dl_url = "https://fontstruct.com/font_archives/download/" + font_id
while True:
r = requests.get(dl_url, stream=True, headers=dl_headers, cookies=cookies)
if r.status_code == 403:
return
if r.status_code == 500:
continue
if "Content-Disposition" not in r.headers:
print(r.text)
return
file_path = "fonts/" + r.headers["Content-Disposition"][r.headers["Content-Disposition"].rfind("'") + 1:]
if os.path.exists(file_path):
return
print(file_path)
with open(file_path, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
return
def download_all():
pool = multiprocessing.Pool(processes=25)
with open("fonts.txt", "r") as f:
pool.map(download_font, f.read().splitlines())
cookies = login().cookies
# get_fonts()
download_all()

BIN
lynda.com/courses.txt Normal file

Binary file not shown.

View File

@ -0,0 +1,73 @@
import requests
import bs4
import json
URL = "https://lynda.com"
def request_timeout(url):
while True:
try:
return requests.get(url, timeout=30)
except:
print("!", end="", flush=True)
continue
def get_categories():
categories = []
r = requests.get(URL)
soup = bs4.BeautifulSoup(r.text, "html.parser")
for i in soup.find_all("i"):
if i.get("class") is not None and len(i.get("class")) > 1 and "cat-" in i.get("class")[1]:
category_id = i.get("class")[1][4:]
category_name = i.get("title")[:i.get("title").find("-") - 1]
categories.append((category_name, category_id))
return categories
def get_courses(category):
last_len = 0
courses = []
page = 0
while True:
page += 1
r = request_timeout("https://lynda.com/ajax/category/" + category[1] + "/courses?page=" + str(page))
soup = bs4.BeautifulSoup(json.loads(r.text)["html"], "html.parser")
for link in soup.find_all("a"):
if link.get("href") is not None:
if link.find("h3") is not None:
course_link = link.get("href")
course_name = link.find("h3").string
course_id = course_link.split("/")[-1:][0][:-7]
courses.append((course_name, course_id, course_link))
print("Page " + str(page) + " (" + str(len(courses)) + ")")
if last_len == len(courses):
break
last_len = len(courses)
return courses
file = open("courses.txt", "w")
for category in get_categories():
print(category)
for course in get_courses(category):
print(course[0])
file.write(category[1] + "\0" + course[0] + "\0" + course[1] + "\0" + course[2] + "\n")
file.flush()
file.close()

11
lynda.com/tmp.py Normal file
View File

@ -0,0 +1,11 @@
terms = ["data science", "big data", "hadoop", "python", "data mining", "text mining", "deep learning", "blender",
"unity", "zbrush", "substance"]
for line in open("courses.txt"):
category, name, course_id, url = line.split("\0")
for term in terms:
if term in name.lower():
print(url[:-1])

View File

@ -0,0 +1,79 @@
import requests
import bs4
import os
headers = {
'User-Agent': "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "en-US,en;q=0.5",
}
URL = "https://www.models-resource.com"
def get_consoles():
consoles = []
response = requests.get(URL)
soup = bs4.BeautifulSoup(response.text, "html.parser")
for console in soup.find(id="leftnav-consoles"):
if type(console) == bs4.element.Tag and console.get("href") is not None:
consoles.append((console.text, URL + console.get("href")))
return consoles
def get_games(console, letter):
games = []
print(console[0] + " - " + letter)
print(console[1] + letter + ".html")
response = requests.get(console[1] + letter + ".html")
soup = bs4.BeautifulSoup(response.text, "html.parser")
for link in soup.find_all("a"):
for child in link.findChildren():
if child.get("class") is not None and child.get("class") == ['gameiconcontainer']:
game_name = child.find("div").find("span").string
games.append((game_name, URL + link.get("href")))
return games
def get_models(game):
models = []
response = requests.get(game[1])
soup = bs4.BeautifulSoup(response.text, "html.parser")
for link in soup.find_all("a"):
for div in link.find_all("div"):
if div.get("class") == ["iconcontainer"]:
model_url = div.find("div", attrs={"class": "iconbody"}).find("img").get("src").replace("sheet_icons",
"big_icons")
model_id = model_url.split("/")[4][:-4]
model_download = "https://www.models-resource.com/download/" + model_id + "/"
model_name = div.find("div").find("span").string
models.append((model_name, URL + model_url, model_download))
return models
file = open("links.txt", "w")
for console in get_consoles():
for letter in "0ABCDEFGHIJKLMNOPQRSTUVWXYZ":
for game in get_games(console, letter):
for model in get_models(game):
file.write(console[0] + os.sep + game[0] + os.sep + model[0] + os.sep + "\0" + model[1] + "\0" +
model[2] + "\n")
file.close()

View File

@ -0,0 +1,39 @@
import pathlib
import requests
import os
file = open("links.txt", "r")
for line in file.read().splitlines():
path, preview, link = line.split("\0")
if os.path.isfile("models/" + path + "preview.png"):
continue
print("models/" + path)
pathlib.Path("models/" + path).mkdir(parents=True, exist_ok=True)
while True:
try:
response = requests.get(preview, stream=True, timeout=5)
with open("models/" + path + "preview.png", 'wb') as f:
for chunk in response.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
response2 = requests.get(link, stream=True, timeout=5)
file_extension = os.path.splitext(response2.headers["Content-Disposition"])[1][:-2]
with open("models/" + path + path.split("/")[-2:-1][0] + file_extension, 'wb') as f:
for chunk in response2.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
break
except:
print("!", end="", flush=True)
continue

Binary file not shown.

1
poliigon.com/counter Normal file
View File

@ -0,0 +1 @@
1

163
poliigon.com/ripper.py Normal file
View File

@ -0,0 +1,163 @@
import random
import string
import requests
import time
from guerrillamail import GuerrillaMailSession
proxies = {
# 'https': 'https://##.##.##.##:##',
}
headers = {
'User-Agent': "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
"Referer": "https://www.poliigon.com/register",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
}
def read_email(email):
s = GuerrillaMailSession()
s.set_email_address(email)
print(s.get_session_state())
for email in s.get_email_list():
if email.subject == "Poliigon: Email Verification":
print("Got email")
body = s.get_email(s.get_email_list()[0].guid).body
link = body[body.index("https://www.poliigon.com"):body.index("https://www.poliigon.com") + 71]
return link
def download_file(url, cookies):
r = requests.get(url, stream=True, headers=headers, proxies=proxies, cookies=cookies)
if "X-Sendfile" in r.headers:
local_filename = r.headers["X-Sendfile"].split('/')[-1]
print(local_filename + "...")
with open(local_filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
return False
else:
print("Error")
return True
def rand_string():
return ''.join(random.choice(string.ascii_lowercase) for _ in range(12))
def get_next_num():
counter_file = open("counter")
counter = int(counter_file.read())
counter_file.close()
counter_file = open("counter", 'w')
counter_file.write(str(counter + 1))
return counter
def decrement():
counter_file = open("counter")
counter = int(counter_file.read())
counter_file.close()
counter_file = open("counter", 'w')
counter_file.write(str(counter - 1))
def login(email, password):
r_login_token = requests.get("https://www.poliigon.com/login", headers=headers, proxies=proxies)
token = r_login_token.text[r_login_token.text.index("<input name=\"_token\" type=\"hidden\" value=\"") + 42:
r_login_token.text.index("<input name=\"_token\" type=\"hidden\" value=\"") + 82]
# Login
payload = {"_token": token, "email": email, "password": password}
r_login = requests.post("https://www.poliigon.com/login", headers=headers, proxies=proxies, data=payload,
cookies=r_login_token.cookies)
return r_login
def create_account_and_login():
email = rand_string() + "@sharklasers.com"
print("email is " + email)
f_name = rand_string()
l_name = rand_string()
password = rand_string()
print("Password is " + password)
# Get Cookie
r = requests.get("https://www.poliigon.com/register", headers=headers, proxies=proxies)
session_cookie = r.cookies['laravel_session']
print("Got cookie: " + session_cookie)
body = r.text
# Get token
token = body[body.index("<input name=\"_token\" type=\"hidden\" value=\"") + 42:
body.index("<input name=\"_token\" type=\"hidden\" value=\"")+82]
print("Got token: " + token + " " + str(len(token)))
# Register
payload = {"_token": token, "first_name": f_name, "last_name": l_name, "email": email,
"email_confirmation": email, "password": password, "password_confirmation": password}
r2 = requests.post("https://www.poliigon.com/register", headers=headers, data=payload,
cookies=r.cookies, proxies=proxies)
# verify
r3 = requests.get("https://www.poliigon.com/verify", headers=headers, proxies=proxies, cookies=r.cookies)
if r2.text != "Error in exception handler.":
print("Sucessful register")
time.sleep(35)
counter = 5
while counter > 0:
counter -= 1
link = read_email(email)
if link is None:
time.sleep(5)
else:
break
if "https" in link:
# Verify email
print("Verifying " + link)
print(requests.get(link, headers=headers, proxies=proxies, cookies=r.cookies))
# Email verified, now login
return login(email, password)
else:
print(r2.text)
while True:
rLogin = create_account_and_login()
error = False
while not error:
error = download_file("https://www.poliigon.com/multiple_download/" + str(get_next_num()) + "/1K",
rLogin.cookies)
if error:
decrement()

112
software.intel.com/run.py Normal file
View File

@ -0,0 +1,112 @@
import requests
from bs4 import BeautifulSoup
import os
import pdfkit
from urllib.parse import urljoin
import youtube_dl
articles = []
videos = []
kits = []
def get_articles():
for page in range(0, 10):
r = requests.get("https://software.intel.com/en-us/ai-academy/library?page=" + str(page))
soup = BeautifulSoup(r.text, "html.parser")
for link in soup.find_all("a"):
if link.get("href") is not None and link.get("href").startswith("/en-us/articles/"):
if link.string is not None:
articles.append((link.get("href"), link.string))
if link.get("href") is not None and link.get("href").startswith("/en-us/videos/"):
if link.string is not None:
videos.append((link.get("href"), link.string))
print(str(len(articles)) + " articles")
print(str(len(videos)) + " videos")
def get_kits():
r = requests.get("https://software.intel.com/en-us/ai-academy/students/kits")
soup = BeautifulSoup(r.text, "html.parser")
for link in soup.find_all("a"):
if link.string is not None and link.string == "Get Started":
kits.append(link.get("href"))
def download_article(article):
if not os.path.exists("articles"):
os.mkdir("articles")
if not os.path.isfile("articles/" + article[1] + ".pdf"):
pdfkit.from_url(urljoin("https://software.intel.com/", article[0]), "articles/" + article[1] + ".pdf")
def download_video(video):
if not os.path.exists("videos"):
os.mkdir("videos")
options = {"outtmpl": "videos/%(title)s.%(ext)s"}
ytd = youtube_dl.YoutubeDL(options)
ytd.download([urljoin("https://software.intel.com/", video[0])])
def download_file(url, destination):
while True:
try:
response = requests.get(url, stream=True, timeout=10)
if not os.path.exists(destination) and response.status_code == 200:
with open(destination, 'wb') as f:
for chunk in response.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
break
except:
print("!")
def download_kit(kit_url):
if not os.path.exists("kits"):
os.mkdir("kits")
kit_url = urljoin("https://software.intel.com/", kit_url)
r = requests.get(kit_url)
soup = BeautifulSoup(r.text, "html.parser")
kit_title = soup.find("title").string
if not os.path.exists("kits/" + kit_title):
os.mkdir("kits/" + kit_title)
pdfkit.from_url(kit_url, "kits/" + kit_title + "/kit.pdf")
for link in soup.find_all("a"):
target = link.get("href")
if target is not None and target.endswith(".zip"):
download_file(urljoin("https://software.intel.com/", target), "kits/" + kit_title + "/" + os.path.split(target)[1])
# get_articles()
get_kits()
for k in kits:
download_kit(k)
#
# for a in articles:
# download_article(a)
#
# for v in videos:
# download_video(v)

View File

@ -0,0 +1,34 @@
import pathlib
import requests
import os
file = open("links.txt", "r")
for line in file.read().splitlines():
path, link = line.split("\0")
pathlib.Path("sounds/" + path.strip()).mkdir(parents=True, exist_ok=True)
# if os.path.exists("sounds/" + path + "/" + path.split("/")[-2:-1][0] + ".zip") or \
# os.path.exists("sounds/" + path + "/" + path.split("/")[-2:-1][0] + ".mp3"):
# continue
print("sounds/" + path)
while True:
# try:
response = requests.get(link, stream=True, timeout=5)
file_extension = os.path.splitext(response.headers["Content-Disposition"])[1][:-2]
with open("sounds/" + path + path.split("/")[-2:-1][0] + file_extension, 'wb') as f:
for chunk in response.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
break
# except:
# print("!", end="", flush=True)
# continue

Binary file not shown.

View File

@ -0,0 +1,79 @@
import requests
import bs4
import os
headers = {
'User-Agent': "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "en-US,en;q=0.5",
}
URL = "https://www.sounds-resource.com"
def get_consoles():
consoles = []
response = requests.get(URL)
soup = bs4.BeautifulSoup(response.text, "html.parser")
for console in soup.find(id="leftnav-consoles"):
if type(console) == bs4.element.Tag and console.get("href") is not None:
consoles.append((console.text, URL + console.get("href")))
return consoles
def get_games(console, letter):
games = []
print(console[0] + " - " + letter)
print(console[1] + letter + ".html")
response = requests.get(console[1] + letter + ".html")
soup = bs4.BeautifulSoup(response.text, "html.parser")
for link in soup.find_all("a"):
for child in link.findChildren():
if child.get("class") is not None and child.get("class") == ['gameiconcontainer']:
game_name = child.find("div").find("span").string
games.append((game_name, URL + link.get("href")))
return games
def get_sounds(game):
sounds = []
response = requests.get(game[1])
soup = bs4.BeautifulSoup(response.text, "html.parser")
for row in soup.find_all("tr"):
if row.get("class") is not None and "altrow" in row.get("class")[0]:
for child in row.children:
if child is not None and isinstance(child, bs4.Tag) and child.get("style") == "padding-left: 10px;":
sound_name = child.string
sound_url = child.find("a").get("href")
sound_dl = "https://www.sounds-resource.com/download/" + sound_url.split("/")[-2:-1][0] + "/"
sounds.append((sound_name, sound_dl))
return sounds
file = open("links.txt", "w")
for console in get_consoles():
for letter in "0ABCDEFGHIJKLMNOPQRSTUVWXYZ":
for game in get_games(console, letter):
for sound in get_sounds(game):
file.write(console[0] + os.sep + game[0] + os.sep + sound[0] + os.sep + "\0" + sound[1] + "\n")
file.close()

View File

@ -0,0 +1,183 @@
import requests
from bs4 import BeautifulSoup
import re
import os
import mimetypes
headers = {
'User-Agent': "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
}
def get_systems():
systems = []
response = requests.get("http://spritedatabase.net/", headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')
links = soup.find_all("a")
for link in links:
if "system" in link.get('href'):
systems.append((link.text.strip(), "http://spritedatabase.net/" + link.get('href')))
return systems
def get_games(system):
games = []
response = requests.get(system[1], headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')
links = soup.find_all("a")
for link in links:
if link.get('href') is not None and "game/" in link.get('href'):
games.append((link.text.strip().replace("/", ""), "http://spritedatabase.net/" + link.get('href')))
return games
def get_sprites(game):
print(game[0])
sprites = []
while True:
try:
response = requests.get(game[1], headers=headers, timeout=5)
break
except:
print("!", end="", flush=True)
continue
soup = BeautifulSoup(response.text, 'html.parser')
links = soup.find_all("a")
for link in links:
if link.get('href') is not None and "file/" in link.get('href'):
print(".", end="", flush=True)
# Skip 'Latest files' thing
if link.parent.get("class") is None:
continue
file_name = link.find(text=True)
file_name = file_name.replace("zip", "")
file_name = file_name.replace("mp3", "")
file_name = file_name.replace("png", "")
file_name = file_name.replace("gif", "")
file_name = file_name.replace("ogg", "")
file_name = re.sub('[^A-Za-z0-9 ]+', '', file_name)
file_name = file_name.strip()
sprites.append((file_name, "http://spritedatabase.net/" + link.get('href')))
print("")
return sprites
def get_download_link(link):
while True:
try:
response = requests.get(link, headers=headers, timeout=5)
break
except:
print("!", end="", flush=True)
continue
soup = BeautifulSoup(response.text, 'html.parser')
images = soup.find_all("img")
for image in images:
if image.get("style") is not None and "border: 1px solid" in image.get("style"):
download_link = image.get("src")
if "layout/format" in download_link:
for div in soup.find_all("div"):
if div.get("class") is not None and str(div.get("class")) == "['dlcapsule']":
link = div.find("a").get("href")
if "files/" in link:
return "http://spritedatabase.net/" + link
else:
return link
else:
return "http://spritedatabase.net/" + download_link
def download_all(folder, sprite):
if not os.path.isdir(folder):
os.mkdir(folder)
link = get_download_link(sprite[1])
if link is None:
print("ERROR: " + sprite[1])
return
if "drive.google" in link or "mediafire" in link:
print("I can't download external link. Link: " + link)
open("links", "a").write(link + "\n")
else:
print(folder + os.sep + sprite[0])
while True:
try:
response = requests.get(link, stream=True, headers=headers, timeout=5)
extension = ""
if response.headers["Content-Type"] is not None:
extension = mimetypes.guess_extension(response.headers["Content-Type"])
if extension is None:
extension = ""
if not os.path.exists(folder + os.sep + sprite[0] + extension) and response.status_code == 200:
with open(folder + os.sep + sprite[0] + extension, 'wb') as f:
for chunk in response.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
break
except:
print("!")
mimetypes.init()
for system in get_systems():
if not os.path.exists(system[0]):
os.mkdir(system[0])
for game in get_games(system):
sprites = get_sprites(game)
if os.path.exists(system[0] + os.sep + game[0]):
print(str(len(os.listdir(system[0] + os.sep + game[0]))) + "/" + str(len(sprites)))
if os.path.exists(system[0] + os.sep + game[0]) and len(os.listdir(system[0] + os.sep + game[0])) >= len(sprites):
print("Skipping existing folder with " + str(len(os.listdir(system[0] + os.sep + game[0]))) + "/" + str(len(sprites)) + " existing sprites")
continue
for sprite in sprites:
download_all(str(system[0] + os.sep + game[0]), sprite)

37
sproutvideo.com/run.py Normal file
View File

@ -0,0 +1,37 @@
import requests
from bs4 import BeautifulSoup
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("password")
parser.add_argument("url")
parser.add_argument("--user")
args = parser.parse_args()
headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate, br",
"Upgrade-Insecure-Requests": "1",
"Referer": args.url
}
payload = {"email": args.user if args.user is not None else "", "password": args.password,
"host": "unknown", "url": "unknown", "queryParams": ""}
print(payload)
r = requests.post(args.url.replace("embed", "video_password"), headers=headers, data=payload)
print(r.cookies)
soup = BeautifulSoup(r.text, "html.parser")
try:
print(soup.find("a", attrs={"class": "hd-download"}).get("href"))
print(soup.find("a", attrs={"class": "sd-download"}).get("href"))
except AttributeError:
print("Wrong password/username")

209
sproutvideo.com/tmp.html Normal file
View File

@ -0,0 +1,209 @@
<!DOCTYPE html>
<html>
<head>
<!--[if IE]><script type="text/javascript">document.documentMode<9&&(document.location.href=document.location.href+(/\?/.test(document.location.href)?"&forceIE8=true":"?forceIE8=true"));</script><![endif]-->
<meta name="ROBOTS" content="NOINDEX, NOFOLLOW">
<link rel="prefetch" href="https://images.sproutvideo.com/d9a5d2f848be6e5f49bb7f1b09e93f80/89152ce323a8ed764ae122614e78c922/poster_frames/frame_0000.jpg" as="image">
<link rel="preconnect" href="//d1ajyp3swh7ygp.cloudfront.com">
<link rel="preconnect" href="//hls.videos.sproutvideo.com">
<link href='https://fonts.googleapis.com/css?family=Open+Sans' rel='stylesheet' type='text/css'>
<link href='//d1ajyp3swh7ygp.cloudfront.net/hls_player-0d10c5b6.css' rel='stylesheet' type='text/css'>
<!--[if IE ]>
<style type="text/css">
.player-subtitle-cue {
font-size: 2em;
}
.player-select:before {
display: none;
}
</style>
<![endif]-->
<script type="text/javascript">var dat = 'eyJzZXNzaW9uSUQiOiI2MWYxNTQ3Yi1mY2VkLTQ3MzEtODVlNC1kYWE1Y2MxMDdmNWIiLCJob3N0IjoidW5rbm93biIsImhhc19oZCI6dHJ1ZSwiaGFzX3NkIjp0cnVlLCJmdWxsSG9zdCI6InZpZGVvcy5zcHJvdXR2aWRlby5jb20iLCJ1cmwiOiJ1bmtub3duIiwiZHVyYXRpb24iOjY5NjAsInZpZGVvVWlkIjoiYTQ5YmRkYjExMzFjZTNjYTJjIiwidXNlclVpZCI6IjdlOWJkZWIyMWIxZWU3Y2RmMCIsInByaXZhY3lUb2tlbiI6IjBiNmE5NTA2NDZiZGI4M2YiLCJ1aWQiOiI5NTI0NWQxMi05N2RhLTQwZDktOTFhYy1kZWM2YzdkMmQ0MjgiLCJ1c2VyQWdlbnQiOiJNb3ppbGxhLzUuMCAoWDExOyBMaW51eCB4ODZfNjQ7IHJ2OjUyLjApIEdlY2tvLzIwMTAwMTAxIEZpcmVmb3gvNTIuMCIsInBvc3Ryb2xsIjpmYWxzZSwic3VidGl0bGVzIjpmYWxzZSwiYXV0b3BsYXkiOnRydWUsImxvb3AiOmZhbHNlLCJub0JpZ1BsYXkiOmZhbHNlLCJxdWFsaXR5Ijoic2QiLCJmbGFzaFBsYXllciI6Imh0dHBzOi8vYy5zcHJvdXR2aWRlby5jb20vcGxheWVyLTAuNC40LjIyLnN3ZiIsInRyYXNwYXJlbnQiOmZhbHNlLCJ0IjpudWxsLCJjb2xvcnMiOiIiLCJzM191c2VyX2hhc2giOiJkOWE1ZDJmODQ4YmU2ZTVmNDliYjdmMWIwOWU5M2Y4MCIsInMzX3ZpZGVvX2hhc2giOiI4OTE1MmNlMzIzYThlZDc2NGFlMTIyNjE0ZTc4YzkyMiIsImhscyI6dHJ1ZSwidGl0bGUiOiJDbGllbnQgV2VsY29tZXMgRGVjZW1iZXIgMjAxNSBTbWFsbGVyLm1wNCIsIndpZHRoIjoxMjgwLCJoZWlnaHQiOjcyMCwidm9sdW1lIjoxLCJjYyI6bnVsbCwic2lnbmF0dXJlIjp7IkNsb3VkRnJvbnQtUG9saWN5IjoiZXlKVGRHRjBaVzFsYm5RaU9sdDdJbEpsYzI5MWNtTmxJam9pYUhSMGNITTZMeTlvYkhNeUxuWnBaR1Z2Y3k1emNISnZkWFIyYVdSbGJ5NWpiMjB2WkRsaE5XUXlaamcwT0dKbE5tVTFaalE1WW1JM1pqRmlNRGxsT1RObU9EQXZPRGt4TlRKalpUTXlNMkU0WldRM05qUmhaVEV5TWpZeE5HVTNPR001TWpJdktpSXNJa052Ym1ScGRHbHZiaUk2ZXlKRVlYUmxUR1Z6YzFSb1lXNGlPbnNpUVZkVE9rVndiMk5vVkdsdFpTSTZNVFV4T1RjNE5UazFObjE5ZlYxOSIsIkNsb3VkRnJvbnQtU2lnbmF0dXJlIjoiblpCeGQzSkxwS1BaWGNSdDkxTTgwbVBwU0RVcC10dVkzSEg1RkV6cFZWQzdRU2c4STZXY0Jack1lV1l0Si1MWnh+Z2x6RkkySEEtRDJReEowZFNEbU9acGJpTDN3UFV+NEhxOElRTFVZQ1V0ZnFBTi11Y2VpeGZNUTZyWngtMVI1bnh2MG84VTZRdGlZdWotRXJXTDczckZnN0hydHdrcHdPcDRwakNFV3g5blJOMGZ+UWhaV1BncTJBVkFkRkZNeDItTkljQmpOcFBrRDdSTWEyeHJ4TlZ4Z1hXRUNqVUhBUzc3ZmNGaDVHaTNNNnRKdFBOZ0lZUGNwc2hFdm9EWlFSRVZ6fjRDWEZSeGVKaXF5MjBiV0IybW9wbFNsR2czZWJOcjJ+aVYyS09xNVVXclh5LW00V29rdlBBS0F1eE5maE1SZUtlflJ2NkhhMWlqRnBBYXdRX18iLCJDbG91ZEZyb250LUtleS1QYWlyLUlkIjoiQVBLQUlCNURHQ0dBUUo0R0dJVVEifSwiZm9yY2VCdWZmZXIiOmZhbHNlLCJ2ZXJzaW9uIjozLCJkZWJ1ZyI6ZmFsc2UsImNkU2lnIjoiUG9saWN5PWV5SlRkR0YwWlcxbGJuUWlPbHQ3SWxKbGMyOTFjbU5sSWpvaWFIUjBjSE02THk5b2JITXlMblpwWkdWdmN5NXpjSEp2ZFhSMmFXUmxieTVqYjIwdlkzSnZjM05rYjIxaGFXNHVlRzFzSWl3aVEyOXVaR2wwYVc5dUlqcDdJa1JoZEdWTVpYTnpWR2hoYmlJNmV5SkJWMU02UlhCdlkyaFVhVzFsSWpveE5URTVOelkwTXpnMmZYMTlYWDBfJlNpZ25hdHVyZT1NSzVjWWUwajlaR2RYQzNwNFBNekx6STdDQWE3alpldzJyYVdqaGJTZGZvMU9vLWt0ajNNenpabHdRb1ppeW1INUolN0VPUzFud0R2R3R3dWw1cTlYclZVUmxlbFIwbWlLd2hQUVVLcE1Za1FVd2VRTm1SYlM5SU44STNScU9xUWVuJTdFbGZSVzZndURrTlo1ZGI1VlA4RHdtcVVtbTlITUx4VzU3bDNpUmlvVm1PcUVwdnFLdTl3VnVQdHJPZW0lN0VYUEtoTkolN0UlN0VpbWJ3YjJOTVp0MW9MZW5QYjc5YmtwRlRPcSU3RUEtdGpkZzRHcTRtS2RLZVlxMGw5aWJ2ekpkMThnVkE1cGtrMTV2WEJtJTdFQ282LWVGa0N6UkpobjUzOXpPckVFRHdnUnd4TFFLV21yVVJINE82VVdUOGRscENudDdSbkRJby1TQUxNbW5TeUh0QlRSSklxMFFpd19fJktleS1QYWlyLUlkPUFQS0FJQjVER0NHQVFKNEdHSVVRIiwiYmFja2dyb3VuZFZpZGVvIjpmYWxzZSwiZmJTaWciOnsic2lnIjoicDdhMlVQa3Erd2x4alUvbEZIMk96U3FRcUZnPSIsImV4cGlyZXMiOjE1MTk3NzUxNTZ9LCJtb2JpbGUiOmZhbHNlfQ==';</script>
<body>
<div class="player paused sd no-cc " style="background-image: url('https://images.sproutvideo.com/d9a5d2f848be6e5f49bb7f1b09e93f80/89152ce323a8ed764ae122614e78c922/poster_frames/frame_0000.jpg'); background-size:contain; background-position: center;background-repeat: no-repeat;">
<div class="player-big-play-button">
<svg viewBox="0 0 26 30"><polygon points="0,0 0,30 26,15"/></svg>
</div>
<div class="player-buffer">
<div class="spinner">
<div class="rect1"></div>
<div class="rect2"></div>
<div class="rect3"></div>
<div class="rect4"></div>
<div class="rect5"></div>
</div>
</div>
<div class="player-video-holder">
<video width='100%' height='100%' id='video-a49bddb1131ce3ca2c-html' crossorigin='anonymous' preload="auto" style="display:none;"></video>
</div>
<div class="player-control-bar hidden">
<div class="player-play-pause player-button">
<svg id="play" viewBox="0 0 26 30">
<polygon points="0,0 0,30 26,15"/>
</svg>
<svg id="pause" viewBox="0 0 12 20">
<path d="M1,20h2c0.6,0,1-0.4,1-1V1c0-0.6-0.4-1-1-1H1C0.4,0,0,0.4,0,1v18C0,19.6,0.4,20,1,20z"/>
<path d="M11,0H9C8.4,0,8,0.4,8,1v18c0,0.6,0.4,1,1,1h2c0.6,0,1-0.4,1-1V1C12,0.4,11.6,0,11,0z"/>
</svg>
</div>
<div class="player-volume player-button">
<div class="volume-bar">
<div class="volume-bar-background"></div>
<div class="volume-track-background"></div>
<div class="volume-track-controller"></div>
<div class="volume-track-status"></div>
</div>
<svg viewBox="0 0 17.3 13.6">
<path id="speaker" d="m7.89999,0.17501l-3.4,3.3l-3.4,0c-0.7,0 -1.1,0.6 -1.1,1.1l0,4.5c0,0.6 0.4,1.1 1.1,1.1l3.4,0l3.5,3.3c0,0 1,0.6 1,-1.2c0,-0.9 0,-4.9 0,-10.9c0,-1.9 -1.1,-1.2 -1.1,-1.2l0,0z"/>
<path id="wave-one" d="m10.99999,3.57501c-0.2,0 -0.4,0.2 -0.4,0.4l0,0.8c0,0.2 0.2,0.4 0.4,0.5c0.7,0.2 1.3,0.9 1.3,1.6c0,0.8 -0.5,1.4 -1.3,1.6c-0.2,0.1 -0.4,0.2 -0.4,0.5l0,0.9c0,0.2 0.2,0.4 0.4,0.4c1.7,-0.2 3,-1.6 3,-3.4s-1.3,-3.1 -3,-3.3z"/>
<path id="wave-two" d="m10.59999,0.57501l0,0.8c0,0.2 0.2,0.4 0.4,0.4c2.6,0.2 4.6,2.4 4.6,5s-2,4.8 -4.6,5c-0.2,0 -0.4,0.2 -0.4,0.4l0,0.8c0,0.2 0.2,0.4 0.4,0.4c3.5,-0.2 6.3,-3.2 6.3,-6.7s-2.7,-6.3 -6.3,-6.5c-0.2,0 -0.4,0.2 -0.4,0.4z"/>
<path id="mute" d="m15.69999,6.87501l1.4,-1.4c0.2,-0.2 0.2,-0.5 0,-0.7l-0.7,-0.7c-0.2,-0.2 -0.5,-0.2 -0.7,0l-1.4,1.4l-1.3,-1.3c-0.2,-0.2 -0.5,-0.2 -0.7,0l-0.7,0.7c-0.2,0.2 -0.2,0.5 0,0.7l1.4,1.3l-1.4,1.4c-0.2,0.2 -0.2,0.5 0,0.7l0.7,0.7c0.2,0.2 0.5,0.2 0.7,0l1.4,-1.4l1.4,1.4c0.2,0.2 0.5,0.2 0.7,0l0.5,-0.8c0.2,-0.2 0.2,-0.5 0,-0.7l-1.3,-1.3z"/>
</svg>
</div>
<div class="player-progress-time">00:00</div>
<div class="player-tracks">
<div class="player-track-background"></div>
<div class="player-track-loaded"></div>
<div class="player-track-controller"></div>
<div class="player-track-progress"></div>
<div class="player-track-time">
<div class="player-track-time-background"></div>
<div class="player-track-time-gradient"></div>
<div class="player-track-time-time">00:00</div>
</div>
</div>
<div class="player-total-time">00:00</div>
<div class="player-download-btn player-button">
<svg viewBox="0 0 26 26">
<path d="m25,17h-2c-0.6,0-1,0.4-1,1v2.5c0,0.3-0.2,0.5-0.5,0.5h-17c-0.3,0-0.5-0.2-0.5-0.5v-2.5c0-0.6-0.4-1-1-1h-2c-0.6,0-1,0.4-1,1v6c0,0.6 0.4,1 1,1h24c0.6,0 1-0.4 1-1v-6c0-0.6-0.4-1-1-1z"/>
<path d="m12.3,16.7c0.2,0.2 0.5,0.3 0.7,0.3s0.5-0.1 0.7-0.3l6-6c0.2-0.2 0.3-0.4 0.3-0.7s-0.1-0.5-0.3-0.7l-1.4-1.4c-0.2-0.2-0.4-0.3-0.7-0.3-0.3,0-0.5,0.1-0.7,0.3l-1,1c-0.3,0.3-0.9,0.1-0.9-0.4v-6.5c0-0.6-0.4-1-1-1h-2c-0.6,0-1,0.4-1,1v6.6c0,0.4-0.5,0.7-0.9,0.4l-1-1c-0.2-0.2-0.4-0.3-0.7-0.3-0.3,0-0.5,0.1-0.7,0.3l-1.4,1.4c-0.2,0.2-0.3,0.4-0.3,0.7s0.1,0.5 0.3,0.7l6,5.9z"/>
</svg>
</div>
<div class="player-cc player-button">
<svg viewBox="0 0 24 17">
<path d="M21,0H3C1.3,0,0,1.3,0,3v11c0,1.7,1.3,3,3,3h18c1.7,0,3-1.3,3-3V3C24,1.3,22.7,0,21,0z M10.2,11.9c-0.5,0.2-1,0.3-1.6,0.3
c-0.6,0-1.2-0.1-1.7-0.3s-1-0.5-1.3-0.8C5.3,10.7,5,10.3,4.8,9.8C4.6,9.3,4.5,8.7,4.5,8.1c0-0.6,0.1-1.2,0.3-1.7
C5,5.9,5.3,5.5,5.6,5.1C6,4.8,6.5,4.5,7,4.3C7.5,4.1,8,4,8.7,4c0.2,0,0.4,0,0.7,0.1c0.2,0,0.5,0.1,0.7,0.2c0.2,0.1,0.5,0.2,0.7,0.4
c0.2,0.1,0.4,0.3,0.6,0.5L10,6.2C9.8,6,9.6,5.9,9.3,5.8C9.1,5.7,8.8,5.6,8.5,5.6c-0.3,0-0.6,0.1-0.9,0.2C7.3,5.9,7.1,6.1,6.9,6.3
C6.7,6.5,6.5,6.8,6.4,7.1c-0.1,0.3-0.2,0.6-0.2,1c0,0.4,0.1,0.7,0.2,1c0.1,0.3,0.3,0.6,0.5,0.8s0.4,0.4,0.7,0.5
c0.3,0.1,0.6,0.2,0.9,0.2c0.4,0,0.7-0.1,0.9-0.2c0.3-0.1,0.5-0.4,0.7-0.6l1.4,1.1C11.1,11.3,10.7,11.7,10.2,11.9z M18.9,11.9
c-0.5,0.2-1,0.3-1.6,0.3c-0.6,0-1.2-0.1-1.7-0.3c-0.5-0.2-1-0.5-1.3-0.8c-0.4-0.4-0.7-0.8-0.9-1.3c-0.2-0.5-0.3-1.1-0.3-1.7
c0-0.6,0.1-1.2,0.3-1.7c0.2-0.5,0.5-0.9,0.9-1.3c0.4-0.4,0.8-0.6,1.3-0.8C16.1,4.1,16.7,4,17.3,4c0.2,0,0.4,0,0.7,0.1
c0.2,0,0.5,0.1,0.7,0.2c0.2,0.1,0.5,0.2,0.7,0.4c0.2,0.1,0.4,0.3,0.6,0.5l-1.3,1.1C18.4,6,18.2,5.9,18,5.8
c-0.2-0.1-0.5-0.2-0.9-0.2c-0.3,0-0.6,0.1-0.9,0.2c-0.3,0.1-0.5,0.3-0.7,0.5c-0.2,0.2-0.4,0.5-0.5,0.8c-0.1,0.3-0.2,0.6-0.2,1
c0,0.4,0.1,0.7,0.2,1c0.1,0.3,0.3,0.6,0.5,0.8c0.2,0.2,0.4,0.4,0.7,0.5c0.3,0.1,0.6,0.2,0.9,0.2c0.4,0,0.7-0.1,0.9-0.2
c0.3-0.1,0.5-0.4,0.7-0.6l1.4,1.1C19.8,11.3,19.4,11.7,18.9,11.9z"/>
</svg>
</div>
<div class="player-settings player-button">
<svg viewBox="0 0 15.998 15.998">
<path style="fill-rule:evenodd;clip-rule:evenodd;" d="M13.998,7c-0.553,0-1.08-0.443-1.291-0.952 c-0.21-0.508-0.15-1.194,0.24-1.585l0.707-0.706c0.391-0.391,0.391-1.024,0.001-1.415c-0.391-0.391-1.024-0.391-1.415,0 c0,0-0.316,0.316-0.707,0.707S10.457,3.5,9.949,3.29C9.442,3.08,8.998,2.553,8.998,2V1c0-0.553-0.447-1-1-1s-1,0.447-1,1v1 c0,0.553-0.442,1.08-0.95,1.291s-1.192,0.15-1.583-0.24L3.756,2.344c-0.391-0.391-1.024-0.39-1.413,0 C1.952,2.734,1.952,3.367,2.342,3.758l0.709,0.708C3.441,4.856,3.51,5.545,3.338,6.062C3.168,6.58,2.648,7.016,2.097,7.01L1,7 C0.448,7,0,7.449,0,8c0,0.553,0.448,1,1,1h1.001c0.552,0,1.087,0.438,1.331,0.925c0.245,0.486,0.188,1.159-0.207,1.546l-0.783,0.77 c-0.391,0.391-0.39,1.025,0,1.414c0.391,0.391,1.024,0.391,1.414,0.001l0.708-0.708c0.391-0.391,1.075-0.451,1.584-0.24 c0.508,0.211,0.95,0.738,0.95,1.291v1.001c0,0.552,0.448,1,1,0.999c0.553,0,1-0.447,1-0.999v-1.001c0-0.553,0.444-1.08,0.951-1.289 c0.508-0.211,1.193-0.15,1.584,0.24l0.707,0.707c0.391,0.391,1.024,0.391,1.413,0c0.391-0.391,0.392-1.024,0.002-1.414l-0.708-0.708 c-0.391-0.391-0.451-1.076-0.24-1.584S13.445,9,13.998,9h1c0.553,0,1-0.447,1-1s-0.447-1-1-1H13.998z M7.998,10 c-1.103,0-2-0.897-2-2s0.897-2,2-2s2,0.897,2,2S9.101,10,7.998,10z"/>
</svg>
</div>
<div class="player-fullscreen player-button">
<svg viewBox="0 0 15 15">
<path d="M4.5,13H2v-2.5C2,10.2,1.8,10,1.5,10h-1C0.2,10,0,10.2,0,10.5V14c0,0.6,0.4,1,1,1h3.5C4.8,15,5,14.8,5,14.5v-1
C5,13.2,4.8,13,4.5,13z"/>
<path d="M4.5,0H1C0.4,0,0,0.4,0,1v3.5C0,4.8,0.2,5,0.5,5h1C1.8,5,2,4.8,2,4.5V2h2.5C4.8,2,5,1.8,5,1.5v-1C5,0.2,4.8,0,4.5,0z"/>
<path d="M14,0h-3.5C10.2,0,10,0.2,10,0.5v1C10,1.8,10.2,2,10.5,2H13v2.5C13,4.8,13.2,5,13.5,5h1C14.8,5,15,4.8,15,4.5V1
C15,0.4,14.6,0,14,0z"/>
<path d="M14.5,10h-1c-0.3,0-0.5,0.2-0.5,0.5V13h-2.5c-0.3,0-0.5,0.2-0.5,0.5v1c0,0.3,0.2,0.5,0.5,0.5H14c0.6,0,1-0.4,1-1v-3.5
C15,10.2,14.8,10,14.5,10z"/>
</svg>
</div>
</div>
<div class="player-settings-menu player-menu">
<div class="player-resolution player-option">
<label>Resolution</label>
<div class="player-setting">
<div class="player-select">
<select class="player-resolution-select">
<option selected>Auto</option>
</select>
</div>
</div>
</div>
<div class="player-speed player-option">
<label>Speed</label>
<div class="player-setting">
<div class="player-select">
<select class="player-speed-select">
<option value="0.25">0.25X</option>
<option value="0.5">0.5X</option>
<option value="1" selected>1X</option>
<option value="1.25">1.25X</option>
<option value="1.5">1.5X</option>
<option value="2">2X</option>
</select>
</div>
</div>
</div>
</div>
<div class='player-mobile-muted'>
<svg viewBox="0 0 17.3 13.6">
<path id="speaker" d="m7.89999,0.17501l-3.4,3.3l-3.4,0c-0.7,0 -1.1,0.6 -1.1,1.1l0,4.5c0,0.6 0.4,1.1 1.1,1.1l3.4,0l3.5,3.3c0,0 1,0.6 1,-1.2c0,-0.9 0,-4.9 0,-10.9c0,-1.9 -1.1,-1.2 -1.1,-1.2l0,0z"/>
<path id="mute" d="m15.69999,6.87501l1.4,-1.4c0.2,-0.2 0.2,-0.5 0,-0.7l-0.7,-0.7c-0.2,-0.2 -0.5,-0.2 -0.7,0l-1.4,1.4l-1.3,-1.3c-0.2,-0.2 -0.5,-0.2 -0.7,0l-0.7,0.7c-0.2,0.2 -0.2,0.5 0,0.7l1.4,1.3l-1.4,1.4c-0.2,0.2 -0.2,0.5 0,0.7l0.7,0.7c0.2,0.2 0.5,0.2 0.7,0l1.4,-1.4l1.4,1.4c0.2,0.2 0.5,0.2 0.7,0l0.5,-0.8c0.2,-0.2 0.2,-0.5 0,-0.7l-1.3,-1.3z"/>
</svg>
</div>
<div class="player-stats">
<div><div>Video ID:</div><span>a49bddb1131ce3ca2c</span></div>
<div><div>User ID:</div><span>7e9bdeb21b1ee7cdf0</span></div>
<div><div>Playback:</div><span class="stat-playback"></span></div>
<div><div>Dimensions:</div><span class="stat-dimensions"></span></div>
<div><div>Resolution:</div><span class="stat-resolution"></span></div>
<div><div>Level Cap:</div><span class="stat-levelcap"></span></div>
<div><div>Speed:</div><span><span class="sparkline"></span><span class="stat-speed"></span></span></div>
</div>
<div class="player-download-sheet player-sheet">
<div class="player-card-btn player-card-close">
<svg viewBox="0 0 8.071 8.07">
<path d="M7.924,6.51L5.45,4.035l2.475-2.475c0.196-0.195,0.196-0.512,0-0.707L7.217,0.146 c-0.195-0.195-0.512-0.195-0.707,0L4.036,2.621L1.561,0.146c-0.195-0.195-0.512-0.195-0.707,0L0.147,0.854 c-0.196,0.195-0.196,0.512,0,0.707l2.475,2.475L0.147,6.51c-0.196,0.195-0.196,0.512,0,0.707l0.707,0.707 c0.195,0.195,0.512,0.195,0.707,0l2.475-2.475L6.51,7.924c0.195,0.195,0.512,0.195,0.707,0l0.707-0.707 C8.121,7.021,8.121,6.705,7.924,6.51z"/>
</svg>
</div>
<div class="player-download-options">
<h2>Download Video</h2>
<ul>
<li><a class='sd-download' href="https://sproutvideo.com/videos/a49bddb1131ce3ca2c/player_download?expires=1519775155&amp;type=sd&amp;uid=95245d12-97da-40d9-91ac-dec6c7d2d428&amp;auth=bb6a9aa8199938b2d31b9693f23235f3&amp;signature=qSz0ZuxEMVeI8QfYatTIbyS2WYw%3D" target="_blank">SD</a></li>
<li><a class='hd-download' href="https://sproutvideo.com/videos/a49bddb1131ce3ca2c/player_download?expires=1519775155&amp;type=hd&amp;uid=95245d12-97da-40d9-91ac-dec6c7d2d428&amp;auth=bb6a9aa8199938b2d31b9693f23235f3&amp;signature=sULsVrwr8cXXlNI3I%2BvYqNg51K8%3D" target="_blank">HD</a></li>
</ul>
</div>
</div>
</div>
<script src="//d1ajyp3swh7ygp.cloudfront.net/jquery.min.js"></script>
<script src="https://src.litix.io/core/2/mux.js"></script>
<!--[if lte IE 7]>
<script type="text/javascript" src="//d1ajyp3swh7ygp.cloudfront.net/json2.js"></script>
<![endif]-->
<!--[if IE]>
<script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/babel-polyfill/6.8.0/polyfill.min.js"></script>
<![endif]-->
<script type="text/javascript" src="//d1ajyp3swh7ygp.cloudfront.net/es6.min-8cdbfc06.js"></script>
</body>
</html>

View File

@ -0,0 +1,78 @@
import requests
import bs4
import os
headers = {
'User-Agent': "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "en-US,en;q=0.5",
}
URL = "https://www.textures-resource.com"
def get_consoles():
consoles = []
response = requests.get(URL)
soup = bs4.BeautifulSoup(response.text, "html.parser")
for console in soup.find(id="leftnav-consoles"):
if type(console) == bs4.element.Tag and console.get("href") is not None:
consoles.append((console.text, URL + console.get("href")))
return consoles
def get_games(console, letter):
games = []
print(console[0] + " - " + letter)
print(console[1] + letter + ".html")
response = requests.get(console[1] + letter + ".html")
soup = bs4.BeautifulSoup(response.text, "html.parser")
for link in soup.find_all("a"):
for child in link.findChildren():
if child.get("class") is not None and child.get("class") == ['gameiconcontainer']:
game_name = child.find("div").find("span").string
games.append((game_name, URL + link.get("href")))
return games
def get_textures(game):
textures = []
response = requests.get(game[1])
soup = bs4.BeautifulSoup(response.text, "html.parser")
for link in soup.find_all("a"):
for div in link.find_all("div"):
if div.get("class") == ["iconcontainer"]:
texture_url = div.find("div", attrs={"class": "iconbody"}).find("img").get("src")
texture_id = texture_url.split("/")[4][:-4]
model_download = "https://www.textures-resource.com/download/" + texture_id + "/"
model_name = div.find("div").find("span").string
textures.append((model_name, URL + texture_url, model_download))
return textures
file = open("links.txt", "w")
for console in get_consoles():
for letter in "0ABCDEFGHIJKLMNOPQRSTUVWXYZ":
for game in get_games(console, letter):
for model in get_textures(game):
file.write(console[0] + os.sep + game[0] + os.sep + model[0] + os.sep + "\0" + model[1] + "\0" +
model[2] + "\n")
file.close()

View File

@ -0,0 +1,39 @@
import pathlib
import requests
import os
file = open("links.txt", "r")
for line in file.read().splitlines():
path, preview, link = line.split("\0")
if os.path.isfile("textures/" + path + "preview.png"):
continue
print("textures/" + path)
pathlib.Path("textures/" + path).mkdir(parents=True, exist_ok=True)
while True:
try:
response = requests.get(preview, stream=True, timeout=5)
with open("textures/" + path + "preview.png", 'wb') as f:
for chunk in response.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
response2 = requests.get(link, stream=True, timeout=5)
file_extension = os.path.splitext(response2.headers["Content-Disposition"])[1][:-2]
with open("textures/" + path + path.split("/")[-2:-1][0] + file_extension, 'wb') as f:
for chunk in response2.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
break
except:
print("!", end="", flush=True)
continue

Binary file not shown.

3
viditut.com/README.md Normal file
View File

@ -0,0 +1,3 @@
crawler_courses.py -> courses.txt
courses.txt -> crawler_videos.py -> links.txt
links.txt -> downloader.py -> (Downloaded videos)

0
viditut.com/courses.txt Normal file
View File

View File

@ -0,0 +1,73 @@
import requests
import bs4
import json
URL = "https://viditut.com"
def request_timeout(url):
while True:
try:
return requests.get(url, timeout=30)
except:
print("!", end="", flush=True)
continue
def get_categories():
categories = []
r = requests.get(URL)
soup = bs4.BeautifulSoup(r.text, "html.parser")
for i in soup.find_all("i"):
if i.get("class") is not None and len(i.get("class")) > 1 and "cat-" in i.get("class")[1]:
category_id = i.get("class")[1][4:]
category_name = i.get("title")[:i.get("title").find("-") - 1]
categories.append((category_name, category_id))
return categories
def get_courses(category):
last_len = 0
courses = []
page = 0
while True:
page += 1
r = request_timeout("https://viditut.com/ajax/category/" + category[1] + "/courses?page=" + str(page))
soup = bs4.BeautifulSoup(json.loads(r.text)["html"], "html.parser")
for link in soup.find_all("a"):
if link.get("href") is not None:
if link.find("h3") is not None:
course_link = link.get("href")
course_name = link.find("h3").string
course_id = course_link.split("/")[-1:][0][:-7]
courses.append((course_name, course_id, course_link))
print("Page " + str(page) + " (" + str(len(courses)) + ")")
if last_len == len(courses):
break
last_len = len(courses)
return courses
file = open("courses.txt", "w")
for category in get_categories():
print(category)
for course in get_courses(category):
print(course[0])
file.write(category[1] + "\0" + course[0] + "\0" + course[1] + "\0" + course[2] + "\n")
file.flush()
file.close()

View File

@ -0,0 +1,68 @@
import requests
import bs4
import json
def request_timeout(url):
while True:
try:
return requests.get(url, timeout=30)
except:
print("!", end="", flush=True)
continue
def get_videos(course):
videos = []
r = request_timeout(course[2])
soup = bs4.BeautifulSoup(r.text, "html.parser")
for link in soup.find_all("a"):
if link.get("class") is not None and str(link.get("class")) == "['item-name', 'video-name', 'ga']":
video_id = link.get("data-ga-value")
video_name = link.text.replace("\n", "").strip()
videos.append((video_name, video_id))
return videos
def get_links(course, video):
links = []
r = request_timeout("https://viditut.com/ajax/course/" + course[1] + "/" + video[1] + "/play")
json_obj = json.loads(r.text)
if len(json.loads(r.text)) > 0:
json_obj = json_obj[0]
else:
return links
for quality in json_obj["qualities"]:
links.append((quality, json_obj["urls"][quality]))
return links
file = open("courses.txt", "r")
fileout = open("links1.txt", "w")
for line in file.read().splitlines():
category, course_name, course_id, course_url = line.split("\0")
course = (course_name, course_id, course_url)
print(course_name)
for video in get_videos(course):
for link in get_links(course, video):
fileout.write(category + "/" + course_name + "/" + video[0] + "\0" + link[0] + "\0" + link[1] + "\n")
fileout.flush()
fileout.close()
file.close()

38
viditut.com/downloader.py Normal file
View File

@ -0,0 +1,38 @@
import pathlib
import os
import requests
file = open("links1.txt", "r")
i = 0
for line in file.read().splitlines():
path, quality, link = line.split("\0")
if quality != "720":
continue
i += 1
pathlib.Path(os.path.split(path)[0]).mkdir(parents=True, exist_ok=True)
if os.path.isfile(os.path.split(path)[0] + os.sep + str(i) + " -" + os.path.split(path)[1] +
"[" + quality + "].mp4"):
continue
print(path)
while True:
try:
response = requests.get(link, stream=True, timeout=5)
with open(os.path.split(path)[0] + os.sep + str(i) + " -" + os.path.split(path)[1] +
"[" + quality + "].mp4", 'wb') as f:
for chunk in response.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
break
except:
print("!", end="", flush=True)
continue

BIN
viditut.com/links.txt Normal file

Binary file not shown.