mirror of
https://github.com/simon987/Misc-Download-Scripts.git
synced 2025-04-01 12:02:59 +00:00
Initial commit
This commit is contained in:
commit
cd09d2b791
20327
1001freefonts.com/links.txt
Normal file
20327
1001freefonts.com/links.txt
Normal file
File diff suppressed because it is too large
Load Diff
95
1001freefonts.com/run.py
Normal file
95
1001freefonts.com/run.py
Normal file
@ -0,0 +1,95 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import multiprocessing
|
||||
import os
|
||||
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.5",
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
"Referer": "https://www.1001freefonts.com/"
|
||||
}
|
||||
|
||||
|
||||
def request_timeout(url):
|
||||
while True:
|
||||
try:
|
||||
return requests.get(url, timeout=30, headers=headers)
|
||||
except Exception as e:
|
||||
print("!", end="", flush=True)
|
||||
continue
|
||||
|
||||
|
||||
def get_dl_links(url):
|
||||
|
||||
print(url)
|
||||
|
||||
r = request_timeout(url)
|
||||
soup = BeautifulSoup(r.text, "html.parser")
|
||||
|
||||
for a in soup.findAll("a"):
|
||||
|
||||
href = a.get("href")
|
||||
|
||||
if href is not None and href.find("/d/") != -1:
|
||||
with open("links.txt", "a") as f:
|
||||
f.write(href + "\n")
|
||||
|
||||
|
||||
def get_fonts():
|
||||
letters = list("abcdefghijklmnopqrstuvwxyz")
|
||||
letters.append("num")
|
||||
|
||||
all_page_links = []
|
||||
|
||||
for letter in letters:
|
||||
|
||||
print(letter)
|
||||
|
||||
r = request_timeout("https://www.1001freefonts.com/" + letter + "fonts.php")
|
||||
|
||||
soup = BeautifulSoup(r.text, "html.parser")
|
||||
|
||||
page_max = soup.find("div", attrs={"class": "pagingLabelWrapper"})
|
||||
page_max = page_max.text.split(" ")[-1]
|
||||
page_max = int(page_max)
|
||||
print(page_max)
|
||||
|
||||
for i in range(1, page_max+1):
|
||||
all_page_links.append("https://www.1001freefonts.com/" + letter + "fonts" + str(i) + ".php")
|
||||
|
||||
pool = multiprocessing.Pool(processes=25)
|
||||
pool.map(get_dl_links, all_page_links)
|
||||
|
||||
|
||||
def download_font(url):
|
||||
file_path = "fonts" + url[url.rfind("/"):]
|
||||
|
||||
if os.path.exists(file_path):
|
||||
return
|
||||
|
||||
print(file_path)
|
||||
r = requests.get(url, stream=True, headers=headers)
|
||||
|
||||
if r.status_code != 200:
|
||||
print(r.status_code)
|
||||
return
|
||||
|
||||
with open(file_path, 'wb') as f:
|
||||
for chunk in r.iter_content(chunk_size=1024):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
|
||||
|
||||
def download_all():
|
||||
pool = multiprocessing.Pool(processes=25)
|
||||
|
||||
with open("links.txt", "r") as f:
|
||||
pool.map(download_font, f.read().splitlines())
|
||||
|
||||
|
||||
|
||||
# get_fonts()
|
||||
download_all()
|
9
README.md
Normal file
9
README.md
Normal file
@ -0,0 +1,9 @@
|
||||
## Scripts for downloading content from a bunch of websites
|
||||
|
||||
### Setup:
|
||||
|
||||
```sudo pip3 install python-guerrillamail bs4 pdfkit youtube-dl```
|
||||
|
||||
### About
|
||||
|
||||
Feel free to contribute or suggest new websites using the Issue feature.
|
0
abstractfonts.com/downloaded.txt
Normal file
0
abstractfonts.com/downloaded.txt
Normal file
13866
abstractfonts.com/fonts.txt
Normal file
13866
abstractfonts.com/fonts.txt
Normal file
File diff suppressed because it is too large
Load Diff
0
abstractfonts.com/proxies.txt
Normal file
0
abstractfonts.com/proxies.txt
Normal file
122
abstractfonts.com/run.py
Normal file
122
abstractfonts.com/run.py
Normal file
@ -0,0 +1,122 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import multiprocessing
|
||||
import os
|
||||
|
||||
|
||||
proxy_index = 0
|
||||
|
||||
proxies = {
|
||||
"http": ""
|
||||
}
|
||||
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.5",
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
"Upgrade-Insecure-Requests": "1"
|
||||
}
|
||||
|
||||
|
||||
def already_downloaded(font_id):
|
||||
with open("downloaded.txt", "r") as f:
|
||||
return font_id in f.read().splitlines()
|
||||
|
||||
|
||||
def flag_downloaded(font_id):
|
||||
with open("downloaded.txt", "a") as f:
|
||||
f.write(font_id + "\n")
|
||||
|
||||
|
||||
def get_new_proxy():
|
||||
|
||||
global proxy_index
|
||||
|
||||
with open("proxies.txt", "r") as f:
|
||||
line = f.read().splitlines()[proxy_index]
|
||||
proxies["http"] = line
|
||||
print("Switched to proxy " + line)
|
||||
proxy_index += 1
|
||||
|
||||
|
||||
def request_timeout(url):
|
||||
while True:
|
||||
try:
|
||||
return requests.get(url, timeout=30)
|
||||
except Exception as e:
|
||||
print("!", end="", flush=True)
|
||||
continue
|
||||
|
||||
|
||||
def get_dl_links(url):
|
||||
print(url)
|
||||
r = request_timeout(url)
|
||||
soup = BeautifulSoup(r.text, "html.parser")
|
||||
|
||||
for a in soup.findAll("a"):
|
||||
if a.get("data-font-id") is not None:
|
||||
with open("fonts.txt", "a") as f:
|
||||
f.write(a.get("data-font-id") + "\n")
|
||||
|
||||
|
||||
def get_fonts():
|
||||
letters = list("ABCDEFGHIJKLMNOPQRSTUVWXYZ")
|
||||
letters.append("Numbers")
|
||||
|
||||
all_page_links = []
|
||||
|
||||
for letter in letters:
|
||||
all_page_links.append("http://www.abstractfonts.com/alpha/" + letter)
|
||||
|
||||
pool = multiprocessing.Pool(processes=25)
|
||||
pool.map(get_dl_links, all_page_links)
|
||||
|
||||
|
||||
def download_font(font_id):
|
||||
|
||||
if already_downloaded(font_id):
|
||||
return
|
||||
|
||||
while True:
|
||||
try:
|
||||
r = requests.get("http://www.abstractfonts.com/download/" + font_id, stream=True, proxies=proxies, headers=headers, timeout=5)
|
||||
|
||||
if r.status_code == 404:
|
||||
print(str(r.status_code) + " - http://www.abstractfonts.com/download/" + font_id)
|
||||
get_new_proxy()
|
||||
return
|
||||
|
||||
if "Content-Disposition" not in r.headers:
|
||||
print(r.text)
|
||||
get_new_proxy()
|
||||
return
|
||||
|
||||
file_path = "fonts/" + r.headers["Content-Disposition"][r.headers["Content-Disposition"].rfind("\"", 0, -2) + 1:-1]
|
||||
|
||||
if os.path.exists(file_path):
|
||||
return
|
||||
|
||||
print(file_path)
|
||||
|
||||
with open(file_path, 'wb') as f:
|
||||
for chunk in r.iter_content(chunk_size=1024):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
|
||||
flag_downloaded(font_id)
|
||||
break
|
||||
except:
|
||||
get_new_proxy()
|
||||
continue
|
||||
|
||||
return
|
||||
|
||||
|
||||
# get_fonts()
|
||||
get_new_proxy()
|
||||
|
||||
pool = multiprocessing.Pool(processes=100)
|
||||
|
||||
with open("fonts.txt", "r") as f1:
|
||||
pool.map(download_font, f1.read().splitlines())
|
0
craftsy.com/courses.txt
Normal file
0
craftsy.com/courses.txt
Normal file
170
craftsy.com/ripper.py
Normal file
170
craftsy.com/ripper.py
Normal file
@ -0,0 +1,170 @@
|
||||
import requests
|
||||
import pathlib
|
||||
import os
|
||||
import json
|
||||
|
||||
|
||||
headers_login = {
|
||||
'User-Agent': "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
|
||||
"Accept": "application/json, text/plain, */*",
|
||||
"Content-Type": "application/json",
|
||||
"Referer": "https://unlimited.craftsy.com/login",
|
||||
"X-Requested-By": "Craftsy"
|
||||
}
|
||||
|
||||
headers = {
|
||||
'User-Agent': "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
|
||||
"Accept": "application/json, text/plain, */*"
|
||||
}
|
||||
|
||||
|
||||
def login(email, password):
|
||||
|
||||
r1 = requests.get("https://unlimited.craftsy.com/login", headers=headers_login)
|
||||
|
||||
payload = json.dumps({"email": email, "password": password})
|
||||
r2 = requests.post("https://api.craftsy.com/login/", data=payload, headers=headers_login, cookies=r1.cookies)
|
||||
|
||||
print(r2.text)
|
||||
|
||||
return r2
|
||||
|
||||
|
||||
def get_course_info(r_login, course_id):
|
||||
while True:
|
||||
try:
|
||||
r = requests.get("https://api.craftsy.com/m/playlists/" + course_id, headers=headers_login,
|
||||
cookies=r_login.cookies, timeout=5)
|
||||
break
|
||||
except:
|
||||
print("!", end="", flush=True)
|
||||
continue
|
||||
|
||||
course_info = json.loads(r.text)
|
||||
|
||||
return course_info
|
||||
|
||||
|
||||
def get_materials(r_login, course_id):
|
||||
|
||||
materials = []
|
||||
|
||||
while True:
|
||||
try:
|
||||
r = requests.get("https://api.craftsy.com/m/playlists/" + course_id + "/materials", headers=headers_login,
|
||||
cookies=r_login.cookies, timeout=5)
|
||||
break
|
||||
except:
|
||||
print("!", end="", flush=True)
|
||||
continue
|
||||
|
||||
try:
|
||||
material_info = json.loads(r.text)
|
||||
|
||||
for material in material_info:
|
||||
materials.append((material["materialName"], material["materialPath"]))
|
||||
except:
|
||||
print("Err mat!", end="", flush=True)
|
||||
|
||||
return materials
|
||||
|
||||
|
||||
def get_episodes(course_info):
|
||||
|
||||
episodes = []
|
||||
|
||||
course_name = course_info["name"]
|
||||
print(course_name)
|
||||
|
||||
for episode in course_info["episodes"]:
|
||||
episodes.append((course_name, episode["name"], episode["episodeId"]))
|
||||
|
||||
return episodes
|
||||
|
||||
|
||||
def download_episode(episode, r_login):
|
||||
while True:
|
||||
try:
|
||||
r = requests.get("https://api.craftsy.com/m/videos/secure/episodes/" + str(episode[2]), headers=headers,
|
||||
cookies=r_login.cookies, timeout=5)
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
print("!", end="", flush=True)
|
||||
continue
|
||||
|
||||
episode_info = []
|
||||
try:
|
||||
episode_info = json.loads(r.text)
|
||||
except:
|
||||
print("Err episode!", end="", flush=True)
|
||||
|
||||
for source in episode_info:
|
||||
if source["format"] == "mp4":
|
||||
path = episode[0]
|
||||
print(path + os.sep + str(episode[1]) + ".mp4")
|
||||
pathlib.Path(path).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if os.path.exists(path + os.sep + str(episode[2]) + " - " + episode[1].replace("/", "") + ".mp4"):
|
||||
print("Skipping...")
|
||||
continue
|
||||
|
||||
while True:
|
||||
try:
|
||||
response = requests.get(source["url"], stream=True, timeout=5)
|
||||
|
||||
with open(path + os.sep + str(episode[2]) + " - " + episode[1].replace("/", "") + ".mp4", 'wb') as f:
|
||||
for chunk in response.iter_content(chunk_size=1024):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
break
|
||||
except Exception as e:
|
||||
print("!", end="", flush=True)
|
||||
continue
|
||||
|
||||
|
||||
def download_material(r_login, material, course_info):
|
||||
|
||||
path = course_info["name"]
|
||||
print(path + os.sep + material[0] + os.path.splitext(material[1])[1])
|
||||
pathlib.Path(path).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if os.path.exists(path + os.sep + material[0] + os.path.splitext(material[1])[1]):
|
||||
print("Skipping...")
|
||||
return
|
||||
|
||||
while True:
|
||||
try:
|
||||
response = requests.get(material[1], stream=True, timeout=5, cookies=r_login.cookies)
|
||||
|
||||
with open(path + os.sep + material[0] + os.path.splitext(material[1])[1], 'wb') as f:
|
||||
for chunk in response.iter_content(chunk_size=1024):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
break
|
||||
except:
|
||||
print("!", end="", flush=True)
|
||||
continue
|
||||
|
||||
|
||||
rLogin = login("", "")
|
||||
|
||||
|
||||
for course in open("courses.txt").read().splitlines():
|
||||
|
||||
print(course)
|
||||
|
||||
course_info = get_course_info(rLogin, course)
|
||||
|
||||
for material in get_materials(rLogin, course):
|
||||
download_material(rLogin, material, course_info)
|
||||
print(material)
|
||||
|
||||
for episode in get_episodes(course_info):
|
||||
download_episode(episode, rLogin)
|
||||
print(episode)
|
||||
|
||||
|
||||
|
||||
|
||||
|
34106
dafont.com/links.txt
Normal file
34106
dafont.com/links.txt
Normal file
File diff suppressed because it is too large
Load Diff
100
dafont.com/run.py
Normal file
100
dafont.com/run.py
Normal file
@ -0,0 +1,100 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import multiprocessing
|
||||
import os
|
||||
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.5",
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
"Referer": "https://www.dafont.com/"
|
||||
}
|
||||
|
||||
|
||||
def request_timeout(url):
|
||||
while True:
|
||||
try:
|
||||
return requests.get(url, timeout=30, headers=headers)
|
||||
except Exception as e:
|
||||
print("!", end="", flush=True)
|
||||
continue
|
||||
|
||||
|
||||
def get_dl_links(url):
|
||||
|
||||
print(url)
|
||||
|
||||
r = request_timeout(url)
|
||||
soup = BeautifulSoup(r.text, "html.parser")
|
||||
|
||||
for a in soup.findAll("a"):
|
||||
|
||||
href = a.get("href")
|
||||
|
||||
if href is not None and href.startswith("//dl"):
|
||||
with open("links.txt", "a") as f:
|
||||
f.write("https://www.dafont.com" + href + "\n")
|
||||
|
||||
|
||||
def get_fonts():
|
||||
letters = list("abcdefghijklmnopqrstuvwxyz")
|
||||
letters.append("%23")
|
||||
|
||||
page_links = []
|
||||
all_page_links = []
|
||||
|
||||
for letter in letters:
|
||||
|
||||
print(letter)
|
||||
|
||||
r = request_timeout("https://www.dafont.com/alpha.php?lettre=" + letter)
|
||||
|
||||
soup = BeautifulSoup(r.text, "html.parser")
|
||||
|
||||
for a in soup.findAll("a"):
|
||||
if a.get("href") is not None and a.get("href").find("&page=") != -1:
|
||||
page_links.append("https://" + a.get("href"))
|
||||
|
||||
page_max = page_links[-2]
|
||||
page_max = int(page_max[page_max.rfind("=") + 1:])
|
||||
|
||||
print(page_max)
|
||||
|
||||
for i in range(1, page_max+1):
|
||||
all_page_links.append("https://www.dafont.com/alpha.php?lettre=" + letter + "&page=" + str(i))
|
||||
|
||||
pool = multiprocessing.Pool(processes=25)
|
||||
pool.map(get_dl_links, all_page_links)
|
||||
|
||||
|
||||
def download_font(url):
|
||||
file_path = "fonts/" + url[url.rfind("/")+4:] + ".zip"
|
||||
|
||||
if os.path.exists(file_path):
|
||||
return
|
||||
|
||||
print(file_path)
|
||||
r = requests.get(url, stream=True, headers=headers)
|
||||
|
||||
if r.status_code != 200:
|
||||
print(r.status_code)
|
||||
return
|
||||
|
||||
with open(file_path, 'wb') as f:
|
||||
for chunk in r.iter_content(chunk_size=1024):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
|
||||
|
||||
def download_all():
|
||||
pool = multiprocessing.Pool(processes=25)
|
||||
|
||||
with open("links.txt", "r") as f:
|
||||
pool.map(download_font, f.read().splitlines())
|
||||
|
||||
|
||||
|
||||
# get_fonts()
|
||||
download_all()
|
65
fontfabric.com/run.py
Normal file
65
fontfabric.com/run.py
Normal file
@ -0,0 +1,65 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import multiprocessing
|
||||
|
||||
fonts = []
|
||||
|
||||
|
||||
def request_timeout(url):
|
||||
while True:
|
||||
try:
|
||||
return requests.get(url, timeout=30)
|
||||
except Exception as e:
|
||||
print("!", end="", flush=True)
|
||||
continue
|
||||
|
||||
|
||||
def get_fonts():
|
||||
|
||||
for page in range(1, 4):
|
||||
r = request_timeout("http://www.fontfabric.com/category/free/page/" + str(page))
|
||||
soup = BeautifulSoup(r.text, "html.parser")
|
||||
|
||||
for link in soup.find("div", attrs={"class": "recent-leads fix"}).findAll("a"):
|
||||
|
||||
href = link.get("href")
|
||||
|
||||
if href is not None and href not in fonts and href.find("#") == -1 and href.find("category/") == -1:
|
||||
fonts.append(link.get("href"))
|
||||
|
||||
print(len(fonts))
|
||||
|
||||
|
||||
def download_font(url):
|
||||
|
||||
r = request_timeout(url)
|
||||
soup = BeautifulSoup(r.text, "html.parser")
|
||||
|
||||
for a in soup.findAll("a"):
|
||||
|
||||
onclick = a.get("onclick")
|
||||
|
||||
if onclick is not None and onclick.startswith("window.location"):
|
||||
dl_link = "http://www.fontfabric.com" + onclick[onclick.find("'")+1:onclick.rfind("'")]
|
||||
file_path = "fonts" + dl_link[dl_link.rfind("/"):]
|
||||
r_dl = requests.get(dl_link, stream=True, cookies=r.cookies)
|
||||
|
||||
if r_dl.status_code != 200:
|
||||
print(r_dl.status_code)
|
||||
return
|
||||
|
||||
print(file_path)
|
||||
|
||||
with open(file_path, 'wb') as f:
|
||||
for chunk in r_dl.iter_content(chunk_size=1024):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
|
||||
|
||||
def download_all():
|
||||
pool = multiprocessing.Pool(processes=25)
|
||||
pool.map(download_font, fonts)
|
||||
|
||||
|
||||
get_fonts()
|
||||
download_all()
|
7458
fontfreak.com/fonts.txt
Normal file
7458
fontfreak.com/fonts.txt
Normal file
File diff suppressed because it is too large
Load Diff
72
fontfreak.com/run.py
Normal file
72
fontfreak.com/run.py
Normal file
@ -0,0 +1,72 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import multiprocessing
|
||||
import os
|
||||
|
||||
|
||||
fonts = []
|
||||
|
||||
|
||||
def request_timeout(url):
|
||||
while True:
|
||||
try:
|
||||
return requests.get(url, timeout=30)
|
||||
except Exception as e:
|
||||
print("!", end="", flush=True)
|
||||
continue
|
||||
|
||||
|
||||
def get_fonts():
|
||||
|
||||
letters = list("abcdefghijklmnopqrstuvwxyz")
|
||||
letters.append("no")
|
||||
|
||||
pool = multiprocessing.Pool(processes=25)
|
||||
pool.map(get_dl_links, letters)
|
||||
|
||||
|
||||
def get_dl_links(letter):
|
||||
|
||||
for page in range(1, 11):
|
||||
|
||||
r = request_timeout("http://www.fontfreak.com/fonts-" + letter + str(page) + ".htm")
|
||||
soup = BeautifulSoup(r.text, "html.parser")
|
||||
|
||||
for a in soup.findAll("a"):
|
||||
if a.text is not None and a.text == "click here to download":
|
||||
with open("fonts.txt", "a") as f:
|
||||
f.write("http://www.fontfreak.com/" + a.get("href") + "\n")
|
||||
|
||||
|
||||
def download_font(url):
|
||||
r = request_timeout(url)
|
||||
soup = BeautifulSoup(r.text, "html.parser")
|
||||
|
||||
dl_link = soup.find("a", attrs={"title": "DOWNLOAD FONT"})
|
||||
|
||||
if dl_link is not None:
|
||||
|
||||
dl_url = "http://www.fontfreak.com/" + dl_link.get("href")
|
||||
file_path = "fonts/" + dl_url[dl_url.rfind("/")+1:]
|
||||
|
||||
if os.path.exists(file_path):
|
||||
return
|
||||
|
||||
r = requests.get(dl_url, stream=True)
|
||||
|
||||
print(file_path)
|
||||
|
||||
with open(file_path, 'wb') as f:
|
||||
for chunk in r.iter_content(chunk_size=1024):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
else:
|
||||
print("no dl" + url)
|
||||
|
||||
|
||||
get_fonts()
|
||||
|
||||
pool = multiprocessing.Pool(processes=25)
|
||||
with open("fonts.txt", "r") as f:
|
||||
pool.map(download_font, f.read().splitlines())
|
||||
|
37570
fontmeme.com/fonts.txt
Normal file
37570
fontmeme.com/fonts.txt
Normal file
File diff suppressed because it is too large
Load Diff
0
fontmeme.com/proxies.txt
Normal file
0
fontmeme.com/proxies.txt
Normal file
115
fontmeme.com/run.py
Normal file
115
fontmeme.com/run.py
Normal file
@ -0,0 +1,115 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import multiprocessing
|
||||
import os
|
||||
|
||||
|
||||
proxy_index = 0
|
||||
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.5",
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
"Upgrade-Insecure-Requests": "1"
|
||||
}
|
||||
|
||||
proxies = {
|
||||
'https': '',
|
||||
}
|
||||
|
||||
|
||||
def request_timeout(url):
|
||||
while True:
|
||||
try:
|
||||
return requests.get(url, timeout=30)
|
||||
except Exception as e:
|
||||
print("!", end="", flush=True)
|
||||
continue
|
||||
|
||||
|
||||
def get_fonts():
|
||||
for i in range(3758):
|
||||
|
||||
print(i)
|
||||
|
||||
r = request_timeout("https://fontmeme.com/fonts/page/" + str(i))
|
||||
|
||||
soup = BeautifulSoup(r.text, "html.parser")
|
||||
|
||||
for div in soup.findAll("div"):
|
||||
|
||||
if div.get("id") is not None and div.get("id") == "ptitle":
|
||||
for child in div.children:
|
||||
if child.get("href") is not None:
|
||||
|
||||
with open("fonts.txt", "a") as f:
|
||||
f.write(child.get("href") + '\n')
|
||||
|
||||
|
||||
def get_new_proxy():
|
||||
|
||||
global proxy_index
|
||||
|
||||
with open("proxies.txt", "r") as f:
|
||||
line = f.read().splitlines()[proxy_index]
|
||||
proxies["https"] = line
|
||||
print("Switched to proxy " + line)
|
||||
proxy_index += 1
|
||||
|
||||
|
||||
def download_font(font_url):
|
||||
|
||||
file_path = "fonts/" + font_url[font_url[:-1].rfind("/")+1:-6] + ".zip"
|
||||
|
||||
if os.path.exists(file_path):
|
||||
return
|
||||
|
||||
r1 = request_timeout(font_url)
|
||||
|
||||
dl_link_index = r1.text.find("https://fontmeme.com/fonts/download/")
|
||||
|
||||
if dl_link_index != -1:
|
||||
dl_link = r1.text[dl_link_index: r1.text.find("'", dl_link_index)]
|
||||
|
||||
headers["Referer"] = font_url
|
||||
|
||||
try:
|
||||
r = requests.get(dl_link, stream=True, headers=headers, proxies=proxies, cookies=r1.cookies, timeout=10)
|
||||
except:
|
||||
get_new_proxy()
|
||||
return
|
||||
|
||||
if r.status_code != 200:
|
||||
print(r.status_code)
|
||||
return
|
||||
|
||||
reached_limit = False
|
||||
|
||||
with open(file_path, 'wb') as f:
|
||||
for chunk in r.iter_content(chunk_size=1024):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
|
||||
with open(file_path, "rb") as f:
|
||||
if f.read().find(b"PK") != 0:
|
||||
reached_limit = True
|
||||
|
||||
if reached_limit:
|
||||
os.remove(file_path)
|
||||
print("You have reached the maximum permitted downloads")
|
||||
get_new_proxy()
|
||||
|
||||
|
||||
def download_all():
|
||||
pool = multiprocessing.Pool(processes=100)
|
||||
|
||||
with open("fonts.txt", "r") as f:
|
||||
pool.map(download_font, f.read().splitlines())
|
||||
|
||||
|
||||
# get_fonts()
|
||||
# get_new_proxy()
|
||||
download_all()
|
||||
|
||||
|
27390
fontspace.com/fonts.txt
Normal file
27390
fontspace.com/fonts.txt
Normal file
File diff suppressed because it is too large
Load Diff
111
fontspace.com/run.py
Normal file
111
fontspace.com/run.py
Normal file
@ -0,0 +1,111 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import multiprocessing
|
||||
import os
|
||||
from urllib.parse import urljoin
|
||||
|
||||
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.5",
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
"Referer": "http://www.fontspace.com"
|
||||
}
|
||||
|
||||
|
||||
def request_timeout(url):
|
||||
while True:
|
||||
try:
|
||||
return requests.get(url, timeout=30)
|
||||
except Exception as e:
|
||||
print("!", end="", flush=True)
|
||||
continue
|
||||
|
||||
|
||||
def get_dl_links(page_url):
|
||||
|
||||
print(page_url)
|
||||
|
||||
r_page = request_timeout(page_url)
|
||||
soup_page = BeautifulSoup(r_page.text, "html.parser")
|
||||
|
||||
for dl_link in soup_page.findAll("a", attrs={"class": "box-button transparent"}):
|
||||
with open("fonts.txt", "a") as f:
|
||||
f.write(dl_link.get("href") + "\n")
|
||||
|
||||
|
||||
def get_fonts():
|
||||
|
||||
lists = list("abcdefghijklmnopqrstuvwxyz")
|
||||
lists.append("letter")
|
||||
|
||||
page_links = []
|
||||
|
||||
for page in lists:
|
||||
|
||||
print(page)
|
||||
|
||||
r = request_timeout("http://www.fontspace.com/list/" + page)
|
||||
soup = BeautifulSoup(r.text, "html.parser")
|
||||
|
||||
for a in soup.findAll("a"):
|
||||
if a.get("href") is not None and a.get("href").find("?p=") != -1:
|
||||
page_links.append(a.get("href"))
|
||||
|
||||
page_max = page_links[-2]
|
||||
page_max = int(page_max[page_max.rfind("=") + 1:])
|
||||
|
||||
print(page_max)
|
||||
|
||||
for i in range(1, page_max):
|
||||
page_links.append("http://www.fontspace.com/list/" + page + "?p=" + str(i))
|
||||
|
||||
pool = multiprocessing.Pool(processes=25)
|
||||
pool.map(get_dl_links, page_links)
|
||||
|
||||
|
||||
def download_font(dl_url):
|
||||
|
||||
full_url = urljoin("http://www.fontspace.com", dl_url)
|
||||
file_path = "fonts" + full_url[full_url.rfind("/"):]
|
||||
|
||||
if os.path.exists(file_path):
|
||||
return
|
||||
|
||||
print(file_path)
|
||||
|
||||
r = requests.get(full_url, stream=True, headers=headers, cookies=cookies)
|
||||
|
||||
if r.status_code != 200:
|
||||
print(r.status_code)
|
||||
return
|
||||
|
||||
with open(file_path, 'wb') as f:
|
||||
for chunk in r.iter_content(chunk_size=1024):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
|
||||
|
||||
def get_cookie():
|
||||
|
||||
r = request_timeout("http://www.fontspace.com/list/a?text=&p=2")
|
||||
return r.cookies
|
||||
|
||||
|
||||
def download_all(cookies):
|
||||
|
||||
pool = multiprocessing.Pool(processes=25)
|
||||
|
||||
with open("fonts.txt", "r") as f:
|
||||
|
||||
pool.map(download_font, f.read().splitlines())
|
||||
|
||||
|
||||
|
||||
# get_fonts()
|
||||
cookies = get_cookie()
|
||||
|
||||
download_all(cookies)
|
||||
|
47830
fontstruct.com/fonts.txt
Normal file
47830
fontstruct.com/fonts.txt
Normal file
File diff suppressed because it is too large
Load Diff
142
fontstruct.com/run.py
Normal file
142
fontstruct.com/run.py
Normal file
@ -0,0 +1,142 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import multiprocessing
|
||||
import os
|
||||
|
||||
username = ""
|
||||
password = ""
|
||||
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.5",
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
"Referer": "https://fontstruct.com/",
|
||||
"Connection": "keep-alive"
|
||||
}
|
||||
|
||||
|
||||
font_ids = []
|
||||
|
||||
|
||||
def request_timeout(url):
|
||||
while True:
|
||||
try:
|
||||
return requests.get(url, timeout=30, headers=headers)
|
||||
except Exception as e:
|
||||
print("!", end="", flush=True)
|
||||
continue
|
||||
|
||||
|
||||
def login():
|
||||
|
||||
r1 = request_timeout("https://fontstruct.com/login")
|
||||
print(r1.cookies)
|
||||
|
||||
login_headers = {
|
||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.5",
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
"Referer": "https://fontstruct.com/login",
|
||||
"Connection": "keep-alive"
|
||||
}
|
||||
|
||||
payload = {"_username": username, "_password": password, "_csrf_token": "", "_submit": "Sign+In"}
|
||||
r = requests.post("https://fontstruct.com/login_check", headers=login_headers, data=payload, cookies=r1.cookies)
|
||||
print(r.cookies)
|
||||
print(len(r.text))
|
||||
print(r.headers)
|
||||
|
||||
return r.history[0]
|
||||
|
||||
|
||||
def get_font_ids(page_url):
|
||||
|
||||
print(page_url)
|
||||
|
||||
r = request_timeout(page_url)
|
||||
soup = BeautifulSoup(r.text, "html.parser")
|
||||
|
||||
for a in soup.findAll("a"):
|
||||
|
||||
href = a.get("href")
|
||||
|
||||
if href is not None and href.startswith("/fontstructions") and href.find("/license/") == -1 and\
|
||||
href.find("/vote_breakdown/") == -1:
|
||||
|
||||
font_id = href[href.find("show/")+5:href.rfind("/")]
|
||||
|
||||
if font_id not in font_ids:
|
||||
font_ids.append(font_id)
|
||||
with open("fonts.txt", "a") as f:
|
||||
f.write(font_id + "\n")
|
||||
|
||||
|
||||
def get_fonts():
|
||||
|
||||
page_urls = []
|
||||
|
||||
for page_num in range(1, 1428):
|
||||
page_urls.append("https://fontstruct.com/gallery?filters=all&page=" + str(page_num))
|
||||
|
||||
pool = multiprocessing.Pool(processes=25)
|
||||
pool.map(get_font_ids, page_urls)
|
||||
|
||||
|
||||
def download_font(font_id):
|
||||
|
||||
dl_headers = {
|
||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.5",
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
"Referer": "https://fontstruct.com/fontstructions/download/" + font_id,
|
||||
"Connection": "keep-alive"
|
||||
}
|
||||
|
||||
dl_url = "https://fontstruct.com/font_archives/download/" + font_id
|
||||
|
||||
while True:
|
||||
r = requests.get(dl_url, stream=True, headers=dl_headers, cookies=cookies)
|
||||
|
||||
if r.status_code == 403:
|
||||
return
|
||||
|
||||
if r.status_code == 500:
|
||||
continue
|
||||
|
||||
if "Content-Disposition" not in r.headers:
|
||||
print(r.text)
|
||||
return
|
||||
|
||||
file_path = "fonts/" + r.headers["Content-Disposition"][r.headers["Content-Disposition"].rfind("'") + 1:]
|
||||
|
||||
if os.path.exists(file_path):
|
||||
return
|
||||
|
||||
print(file_path)
|
||||
|
||||
with open(file_path, 'wb') as f:
|
||||
for chunk in r.iter_content(chunk_size=1024):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
return
|
||||
|
||||
|
||||
def download_all():
|
||||
pool = multiprocessing.Pool(processes=25)
|
||||
|
||||
with open("fonts.txt", "r") as f:
|
||||
pool.map(download_font, f.read().splitlines())
|
||||
|
||||
|
||||
cookies = login().cookies
|
||||
|
||||
# get_fonts()
|
||||
download_all()
|
||||
|
||||
|
BIN
lynda.com/courses.txt
Normal file
BIN
lynda.com/courses.txt
Normal file
Binary file not shown.
73
lynda.com/crawler-courses.py
Normal file
73
lynda.com/crawler-courses.py
Normal file
@ -0,0 +1,73 @@
|
||||
import requests
|
||||
import bs4
|
||||
import json
|
||||
|
||||
|
||||
URL = "https://lynda.com"
|
||||
|
||||
def request_timeout(url):
|
||||
while True:
|
||||
try:
|
||||
return requests.get(url, timeout=30)
|
||||
except:
|
||||
print("!", end="", flush=True)
|
||||
continue
|
||||
|
||||
|
||||
def get_categories():
|
||||
|
||||
categories = []
|
||||
|
||||
r = requests.get(URL)
|
||||
soup = bs4.BeautifulSoup(r.text, "html.parser")
|
||||
|
||||
for i in soup.find_all("i"):
|
||||
if i.get("class") is not None and len(i.get("class")) > 1 and "cat-" in i.get("class")[1]:
|
||||
category_id = i.get("class")[1][4:]
|
||||
category_name = i.get("title")[:i.get("title").find("-") - 1]
|
||||
|
||||
categories.append((category_name, category_id))
|
||||
|
||||
return categories
|
||||
|
||||
|
||||
def get_courses(category):
|
||||
last_len = 0
|
||||
courses = []
|
||||
page = 0
|
||||
while True:
|
||||
|
||||
page += 1
|
||||
r = request_timeout("https://lynda.com/ajax/category/" + category[1] + "/courses?page=" + str(page))
|
||||
soup = bs4.BeautifulSoup(json.loads(r.text)["html"], "html.parser")
|
||||
|
||||
for link in soup.find_all("a"):
|
||||
if link.get("href") is not None:
|
||||
if link.find("h3") is not None:
|
||||
course_link = link.get("href")
|
||||
course_name = link.find("h3").string
|
||||
course_id = course_link.split("/")[-1:][0][:-7]
|
||||
|
||||
courses.append((course_name, course_id, course_link))
|
||||
|
||||
print("Page " + str(page) + " (" + str(len(courses)) + ")")
|
||||
|
||||
if last_len == len(courses):
|
||||
break
|
||||
|
||||
last_len = len(courses)
|
||||
|
||||
return courses
|
||||
|
||||
|
||||
file = open("courses.txt", "w")
|
||||
|
||||
for category in get_categories():
|
||||
print(category)
|
||||
for course in get_courses(category):
|
||||
print(course[0])
|
||||
file.write(category[1] + "\0" + course[0] + "\0" + course[1] + "\0" + course[2] + "\n")
|
||||
file.flush()
|
||||
|
||||
|
||||
file.close()
|
11
lynda.com/tmp.py
Normal file
11
lynda.com/tmp.py
Normal file
@ -0,0 +1,11 @@
|
||||
|
||||
terms = ["data science", "big data", "hadoop", "python", "data mining", "text mining", "deep learning", "blender",
|
||||
"unity", "zbrush", "substance"]
|
||||
|
||||
for line in open("courses.txt"):
|
||||
|
||||
category, name, course_id, url = line.split("\0")
|
||||
|
||||
for term in terms:
|
||||
if term in name.lower():
|
||||
print(url[:-1])
|
79
models-resource.com/crawler.py
Normal file
79
models-resource.com/crawler.py
Normal file
@ -0,0 +1,79 @@
|
||||
import requests
|
||||
import bs4
|
||||
import os
|
||||
|
||||
headers = {
|
||||
'User-Agent': "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
"Accept-Language": "en-US,en;q=0.5",
|
||||
}
|
||||
|
||||
URL = "https://www.models-resource.com"
|
||||
|
||||
|
||||
def get_consoles():
|
||||
|
||||
consoles = []
|
||||
|
||||
response = requests.get(URL)
|
||||
soup = bs4.BeautifulSoup(response.text, "html.parser")
|
||||
|
||||
for console in soup.find(id="leftnav-consoles"):
|
||||
if type(console) == bs4.element.Tag and console.get("href") is not None:
|
||||
consoles.append((console.text, URL + console.get("href")))
|
||||
|
||||
return consoles
|
||||
|
||||
|
||||
def get_games(console, letter):
|
||||
|
||||
games = []
|
||||
|
||||
print(console[0] + " - " + letter)
|
||||
|
||||
print(console[1] + letter + ".html")
|
||||
response = requests.get(console[1] + letter + ".html")
|
||||
soup = bs4.BeautifulSoup(response.text, "html.parser")
|
||||
|
||||
for link in soup.find_all("a"):
|
||||
for child in link.findChildren():
|
||||
if child.get("class") is not None and child.get("class") == ['gameiconcontainer']:
|
||||
game_name = child.find("div").find("span").string
|
||||
|
||||
games.append((game_name, URL + link.get("href")))
|
||||
|
||||
return games
|
||||
|
||||
|
||||
def get_models(game):
|
||||
models = []
|
||||
|
||||
response = requests.get(game[1])
|
||||
soup = bs4.BeautifulSoup(response.text, "html.parser")
|
||||
|
||||
for link in soup.find_all("a"):
|
||||
|
||||
for div in link.find_all("div"):
|
||||
if div.get("class") == ["iconcontainer"]:
|
||||
|
||||
model_url = div.find("div", attrs={"class": "iconbody"}).find("img").get("src").replace("sheet_icons",
|
||||
"big_icons")
|
||||
model_id = model_url.split("/")[4][:-4]
|
||||
model_download = "https://www.models-resource.com/download/" + model_id + "/"
|
||||
|
||||
model_name = div.find("div").find("span").string
|
||||
models.append((model_name, URL + model_url, model_download))
|
||||
|
||||
return models
|
||||
|
||||
file = open("links.txt", "w")
|
||||
|
||||
for console in get_consoles():
|
||||
for letter in "0ABCDEFGHIJKLMNOPQRSTUVWXYZ":
|
||||
for game in get_games(console, letter):
|
||||
for model in get_models(game):
|
||||
file.write(console[0] + os.sep + game[0] + os.sep + model[0] + os.sep + "\0" + model[1] + "\0" +
|
||||
model[2] + "\n")
|
||||
|
||||
file.close()
|
39
models-resource.com/downloader.py
Normal file
39
models-resource.com/downloader.py
Normal file
@ -0,0 +1,39 @@
|
||||
import pathlib
|
||||
import requests
|
||||
import os
|
||||
|
||||
file = open("links.txt", "r")
|
||||
|
||||
|
||||
for line in file.read().splitlines():
|
||||
|
||||
path, preview, link = line.split("\0")
|
||||
|
||||
if os.path.isfile("models/" + path + "preview.png"):
|
||||
continue
|
||||
|
||||
print("models/" + path)
|
||||
|
||||
pathlib.Path("models/" + path).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
while True:
|
||||
try:
|
||||
response = requests.get(preview, stream=True, timeout=5)
|
||||
with open("models/" + path + "preview.png", 'wb') as f:
|
||||
for chunk in response.iter_content(chunk_size=1024):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
|
||||
response2 = requests.get(link, stream=True, timeout=5)
|
||||
|
||||
file_extension = os.path.splitext(response2.headers["Content-Disposition"])[1][:-2]
|
||||
|
||||
with open("models/" + path + path.split("/")[-2:-1][0] + file_extension, 'wb') as f:
|
||||
for chunk in response2.iter_content(chunk_size=1024):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
|
||||
break
|
||||
except:
|
||||
print("!", end="", flush=True)
|
||||
continue
|
BIN
models-resource.com/links.txt
Normal file
BIN
models-resource.com/links.txt
Normal file
Binary file not shown.
1
poliigon.com/counter
Normal file
1
poliigon.com/counter
Normal file
@ -0,0 +1 @@
|
||||
1
|
163
poliigon.com/ripper.py
Normal file
163
poliigon.com/ripper.py
Normal file
@ -0,0 +1,163 @@
|
||||
import random
|
||||
import string
|
||||
import requests
|
||||
import time
|
||||
from guerrillamail import GuerrillaMailSession
|
||||
|
||||
|
||||
proxies = {
|
||||
# 'https': 'https://##.##.##.##:##',
|
||||
}
|
||||
|
||||
|
||||
headers = {
|
||||
'User-Agent': "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
|
||||
"Referer": "https://www.poliigon.com/register",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
|
||||
}
|
||||
|
||||
|
||||
def read_email(email):
|
||||
s = GuerrillaMailSession()
|
||||
|
||||
s.set_email_address(email)
|
||||
|
||||
print(s.get_session_state())
|
||||
|
||||
for email in s.get_email_list():
|
||||
if email.subject == "Poliigon: Email Verification":
|
||||
print("Got email")
|
||||
|
||||
body = s.get_email(s.get_email_list()[0].guid).body
|
||||
link = body[body.index("https://www.poliigon.com"):body.index("https://www.poliigon.com") + 71]
|
||||
|
||||
return link
|
||||
|
||||
|
||||
def download_file(url, cookies):
|
||||
|
||||
r = requests.get(url, stream=True, headers=headers, proxies=proxies, cookies=cookies)
|
||||
|
||||
if "X-Sendfile" in r.headers:
|
||||
local_filename = r.headers["X-Sendfile"].split('/')[-1]
|
||||
|
||||
print(local_filename + "...")
|
||||
|
||||
with open(local_filename, 'wb') as f:
|
||||
for chunk in r.iter_content(chunk_size=1024):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
return False
|
||||
else:
|
||||
|
||||
print("Error")
|
||||
return True
|
||||
|
||||
|
||||
def rand_string():
|
||||
return ''.join(random.choice(string.ascii_lowercase) for _ in range(12))
|
||||
|
||||
|
||||
def get_next_num():
|
||||
counter_file = open("counter")
|
||||
counter = int(counter_file.read())
|
||||
|
||||
counter_file.close()
|
||||
counter_file = open("counter", 'w')
|
||||
counter_file.write(str(counter + 1))
|
||||
return counter
|
||||
|
||||
|
||||
def decrement():
|
||||
counter_file = open("counter")
|
||||
counter = int(counter_file.read())
|
||||
|
||||
counter_file.close()
|
||||
counter_file = open("counter", 'w')
|
||||
counter_file.write(str(counter - 1))
|
||||
|
||||
|
||||
def login(email, password):
|
||||
r_login_token = requests.get("https://www.poliigon.com/login", headers=headers, proxies=proxies)
|
||||
token = r_login_token.text[r_login_token.text.index("<input name=\"_token\" type=\"hidden\" value=\"") + 42:
|
||||
r_login_token.text.index("<input name=\"_token\" type=\"hidden\" value=\"") + 82]
|
||||
|
||||
# Login
|
||||
payload = {"_token": token, "email": email, "password": password}
|
||||
r_login = requests.post("https://www.poliigon.com/login", headers=headers, proxies=proxies, data=payload,
|
||||
cookies=r_login_token.cookies)
|
||||
return r_login
|
||||
|
||||
|
||||
def create_account_and_login():
|
||||
email = rand_string() + "@sharklasers.com"
|
||||
|
||||
print("email is " + email)
|
||||
|
||||
f_name = rand_string()
|
||||
l_name = rand_string()
|
||||
password = rand_string()
|
||||
|
||||
print("Password is " + password)
|
||||
|
||||
# Get Cookie
|
||||
r = requests.get("https://www.poliigon.com/register", headers=headers, proxies=proxies)
|
||||
|
||||
session_cookie = r.cookies['laravel_session']
|
||||
|
||||
print("Got cookie: " + session_cookie)
|
||||
|
||||
body = r.text
|
||||
|
||||
# Get token
|
||||
token = body[body.index("<input name=\"_token\" type=\"hidden\" value=\"") + 42:
|
||||
body.index("<input name=\"_token\" type=\"hidden\" value=\"")+82]
|
||||
|
||||
print("Got token: " + token + " " + str(len(token)))
|
||||
|
||||
# Register
|
||||
payload = {"_token": token, "first_name": f_name, "last_name": l_name, "email": email,
|
||||
"email_confirmation": email, "password": password, "password_confirmation": password}
|
||||
|
||||
r2 = requests.post("https://www.poliigon.com/register", headers=headers, data=payload,
|
||||
cookies=r.cookies, proxies=proxies)
|
||||
|
||||
# verify
|
||||
r3 = requests.get("https://www.poliigon.com/verify", headers=headers, proxies=proxies, cookies=r.cookies)
|
||||
|
||||
if r2.text != "Error in exception handler.":
|
||||
print("Sucessful register")
|
||||
|
||||
time.sleep(35)
|
||||
counter = 5
|
||||
|
||||
while counter > 0:
|
||||
counter -= 1
|
||||
link = read_email(email)
|
||||
|
||||
if link is None:
|
||||
time.sleep(5)
|
||||
else:
|
||||
break
|
||||
|
||||
if "https" in link:
|
||||
# Verify email
|
||||
print("Verifying " + link)
|
||||
print(requests.get(link, headers=headers, proxies=proxies, cookies=r.cookies))
|
||||
|
||||
# Email verified, now login
|
||||
return login(email, password)
|
||||
|
||||
else:
|
||||
print(r2.text)
|
||||
|
||||
|
||||
while True:
|
||||
rLogin = create_account_and_login()
|
||||
|
||||
error = False
|
||||
while not error:
|
||||
error = download_file("https://www.poliigon.com/multiple_download/" + str(get_next_num()) + "/1K",
|
||||
rLogin.cookies)
|
||||
if error:
|
||||
decrement()
|
112
software.intel.com/run.py
Normal file
112
software.intel.com/run.py
Normal file
@ -0,0 +1,112 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import os
|
||||
import pdfkit
|
||||
from urllib.parse import urljoin
|
||||
import youtube_dl
|
||||
|
||||
|
||||
articles = []
|
||||
videos = []
|
||||
kits = []
|
||||
|
||||
|
||||
def get_articles():
|
||||
|
||||
for page in range(0, 10):
|
||||
r = requests.get("https://software.intel.com/en-us/ai-academy/library?page=" + str(page))
|
||||
soup = BeautifulSoup(r.text, "html.parser")
|
||||
for link in soup.find_all("a"):
|
||||
if link.get("href") is not None and link.get("href").startswith("/en-us/articles/"):
|
||||
if link.string is not None:
|
||||
articles.append((link.get("href"), link.string))
|
||||
|
||||
if link.get("href") is not None and link.get("href").startswith("/en-us/videos/"):
|
||||
if link.string is not None:
|
||||
videos.append((link.get("href"), link.string))
|
||||
|
||||
print(str(len(articles)) + " articles")
|
||||
print(str(len(videos)) + " videos")
|
||||
|
||||
|
||||
def get_kits():
|
||||
|
||||
r = requests.get("https://software.intel.com/en-us/ai-academy/students/kits")
|
||||
soup = BeautifulSoup(r.text, "html.parser")
|
||||
|
||||
for link in soup.find_all("a"):
|
||||
if link.string is not None and link.string == "Get Started":
|
||||
|
||||
kits.append(link.get("href"))
|
||||
|
||||
|
||||
def download_article(article):
|
||||
|
||||
if not os.path.exists("articles"):
|
||||
os.mkdir("articles")
|
||||
|
||||
if not os.path.isfile("articles/" + article[1] + ".pdf"):
|
||||
pdfkit.from_url(urljoin("https://software.intel.com/", article[0]), "articles/" + article[1] + ".pdf")
|
||||
|
||||
|
||||
def download_video(video):
|
||||
|
||||
if not os.path.exists("videos"):
|
||||
os.mkdir("videos")
|
||||
|
||||
options = {"outtmpl": "videos/%(title)s.%(ext)s"}
|
||||
|
||||
ytd = youtube_dl.YoutubeDL(options)
|
||||
ytd.download([urljoin("https://software.intel.com/", video[0])])
|
||||
|
||||
|
||||
def download_file(url, destination):
|
||||
while True:
|
||||
try:
|
||||
response = requests.get(url, stream=True, timeout=10)
|
||||
|
||||
if not os.path.exists(destination) and response.status_code == 200:
|
||||
with open(destination, 'wb') as f:
|
||||
for chunk in response.iter_content(chunk_size=1024):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
break
|
||||
except:
|
||||
print("!")
|
||||
|
||||
|
||||
def download_kit(kit_url):
|
||||
if not os.path.exists("kits"):
|
||||
os.mkdir("kits")
|
||||
|
||||
kit_url = urljoin("https://software.intel.com/", kit_url)
|
||||
|
||||
r = requests.get(kit_url)
|
||||
soup = BeautifulSoup(r.text, "html.parser")
|
||||
|
||||
kit_title = soup.find("title").string
|
||||
|
||||
if not os.path.exists("kits/" + kit_title):
|
||||
os.mkdir("kits/" + kit_title)
|
||||
|
||||
pdfkit.from_url(kit_url, "kits/" + kit_title + "/kit.pdf")
|
||||
|
||||
for link in soup.find_all("a"):
|
||||
|
||||
target = link.get("href")
|
||||
|
||||
if target is not None and target.endswith(".zip"):
|
||||
download_file(urljoin("https://software.intel.com/", target), "kits/" + kit_title + "/" + os.path.split(target)[1])
|
||||
|
||||
|
||||
# get_articles()
|
||||
get_kits()
|
||||
|
||||
for k in kits:
|
||||
download_kit(k)
|
||||
#
|
||||
# for a in articles:
|
||||
# download_article(a)
|
||||
#
|
||||
# for v in videos:
|
||||
# download_video(v)
|
34
sounds-resource.com/downloader.py
Normal file
34
sounds-resource.com/downloader.py
Normal file
@ -0,0 +1,34 @@
|
||||
import pathlib
|
||||
import requests
|
||||
import os
|
||||
|
||||
file = open("links.txt", "r")
|
||||
|
||||
|
||||
for line in file.read().splitlines():
|
||||
|
||||
path, link = line.split("\0")
|
||||
pathlib.Path("sounds/" + path.strip()).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# if os.path.exists("sounds/" + path + "/" + path.split("/")[-2:-1][0] + ".zip") or \
|
||||
# os.path.exists("sounds/" + path + "/" + path.split("/")[-2:-1][0] + ".mp3"):
|
||||
# continue
|
||||
|
||||
print("sounds/" + path)
|
||||
|
||||
|
||||
while True:
|
||||
# try:
|
||||
response = requests.get(link, stream=True, timeout=5)
|
||||
|
||||
file_extension = os.path.splitext(response.headers["Content-Disposition"])[1][:-2]
|
||||
|
||||
with open("sounds/" + path + path.split("/")[-2:-1][0] + file_extension, 'wb') as f:
|
||||
for chunk in response.iter_content(chunk_size=1024):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
|
||||
break
|
||||
# except:
|
||||
# print("!", end="", flush=True)
|
||||
# continue
|
BIN
sounds-resource.com/links.txt
Normal file
BIN
sounds-resource.com/links.txt
Normal file
Binary file not shown.
79
sounds-resource.com/sound_crawler.py
Normal file
79
sounds-resource.com/sound_crawler.py
Normal file
@ -0,0 +1,79 @@
|
||||
import requests
|
||||
import bs4
|
||||
import os
|
||||
|
||||
headers = {
|
||||
'User-Agent': "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
"Accept-Language": "en-US,en;q=0.5",
|
||||
}
|
||||
|
||||
URL = "https://www.sounds-resource.com"
|
||||
|
||||
|
||||
def get_consoles():
|
||||
|
||||
consoles = []
|
||||
|
||||
response = requests.get(URL)
|
||||
soup = bs4.BeautifulSoup(response.text, "html.parser")
|
||||
|
||||
for console in soup.find(id="leftnav-consoles"):
|
||||
if type(console) == bs4.element.Tag and console.get("href") is not None:
|
||||
consoles.append((console.text, URL + console.get("href")))
|
||||
|
||||
return consoles
|
||||
|
||||
|
||||
def get_games(console, letter):
|
||||
|
||||
games = []
|
||||
|
||||
print(console[0] + " - " + letter)
|
||||
|
||||
print(console[1] + letter + ".html")
|
||||
response = requests.get(console[1] + letter + ".html")
|
||||
soup = bs4.BeautifulSoup(response.text, "html.parser")
|
||||
|
||||
for link in soup.find_all("a"):
|
||||
for child in link.findChildren():
|
||||
if child.get("class") is not None and child.get("class") == ['gameiconcontainer']:
|
||||
game_name = child.find("div").find("span").string
|
||||
|
||||
games.append((game_name, URL + link.get("href")))
|
||||
|
||||
return games
|
||||
|
||||
|
||||
def get_sounds(game):
|
||||
sounds = []
|
||||
|
||||
response = requests.get(game[1])
|
||||
soup = bs4.BeautifulSoup(response.text, "html.parser")
|
||||
|
||||
for row in soup.find_all("tr"):
|
||||
|
||||
if row.get("class") is not None and "altrow" in row.get("class")[0]:
|
||||
|
||||
for child in row.children:
|
||||
if child is not None and isinstance(child, bs4.Tag) and child.get("style") == "padding-left: 10px;":
|
||||
|
||||
sound_name = child.string
|
||||
sound_url = child.find("a").get("href")
|
||||
|
||||
sound_dl = "https://www.sounds-resource.com/download/" + sound_url.split("/")[-2:-1][0] + "/"
|
||||
|
||||
sounds.append((sound_name, sound_dl))
|
||||
|
||||
return sounds
|
||||
|
||||
file = open("links.txt", "w")
|
||||
|
||||
for console in get_consoles():
|
||||
for letter in "0ABCDEFGHIJKLMNOPQRSTUVWXYZ":
|
||||
for game in get_games(console, letter):
|
||||
for sound in get_sounds(game):
|
||||
file.write(console[0] + os.sep + game[0] + os.sep + sound[0] + os.sep + "\0" + sound[1] + "\n")
|
||||
|
||||
file.close()
|
183
spritedatabase.net/ripper.py
Normal file
183
spritedatabase.net/ripper.py
Normal file
@ -0,0 +1,183 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
import os
|
||||
import mimetypes
|
||||
|
||||
|
||||
headers = {
|
||||
'User-Agent': "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
|
||||
|
||||
}
|
||||
|
||||
|
||||
def get_systems():
|
||||
|
||||
systems = []
|
||||
|
||||
response = requests.get("http://spritedatabase.net/", headers=headers)
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
|
||||
links = soup.find_all("a")
|
||||
|
||||
for link in links:
|
||||
if "system" in link.get('href'):
|
||||
|
||||
systems.append((link.text.strip(), "http://spritedatabase.net/" + link.get('href')))
|
||||
|
||||
return systems
|
||||
|
||||
|
||||
def get_games(system):
|
||||
|
||||
games = []
|
||||
|
||||
response = requests.get(system[1], headers=headers)
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
|
||||
links = soup.find_all("a")
|
||||
|
||||
for link in links:
|
||||
if link.get('href') is not None and "game/" in link.get('href'):
|
||||
games.append((link.text.strip().replace("/", ""), "http://spritedatabase.net/" + link.get('href')))
|
||||
|
||||
return games
|
||||
|
||||
|
||||
def get_sprites(game):
|
||||
print(game[0])
|
||||
sprites = []
|
||||
|
||||
while True:
|
||||
try:
|
||||
response = requests.get(game[1], headers=headers, timeout=5)
|
||||
break
|
||||
except:
|
||||
print("!", end="", flush=True)
|
||||
continue
|
||||
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
|
||||
links = soup.find_all("a")
|
||||
|
||||
for link in links:
|
||||
if link.get('href') is not None and "file/" in link.get('href'):
|
||||
|
||||
print(".", end="", flush=True)
|
||||
# Skip 'Latest files' thing
|
||||
if link.parent.get("class") is None:
|
||||
continue
|
||||
|
||||
file_name = link.find(text=True)
|
||||
file_name = file_name.replace("zip", "")
|
||||
file_name = file_name.replace("mp3", "")
|
||||
file_name = file_name.replace("png", "")
|
||||
file_name = file_name.replace("gif", "")
|
||||
file_name = file_name.replace("ogg", "")
|
||||
file_name = re.sub('[^A-Za-z0-9 ]+', '', file_name)
|
||||
file_name = file_name.strip()
|
||||
|
||||
sprites.append((file_name, "http://spritedatabase.net/" + link.get('href')))
|
||||
|
||||
print("")
|
||||
return sprites
|
||||
|
||||
|
||||
def get_download_link(link):
|
||||
|
||||
while True:
|
||||
try:
|
||||
response = requests.get(link, headers=headers, timeout=5)
|
||||
break
|
||||
except:
|
||||
print("!", end="", flush=True)
|
||||
continue
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
|
||||
images = soup.find_all("img")
|
||||
|
||||
for image in images:
|
||||
|
||||
if image.get("style") is not None and "border: 1px solid" in image.get("style"):
|
||||
download_link = image.get("src")
|
||||
|
||||
if "layout/format" in download_link:
|
||||
|
||||
for div in soup.find_all("div"):
|
||||
|
||||
if div.get("class") is not None and str(div.get("class")) == "['dlcapsule']":
|
||||
|
||||
link = div.find("a").get("href")
|
||||
|
||||
if "files/" in link:
|
||||
return "http://spritedatabase.net/" + link
|
||||
else:
|
||||
return link
|
||||
|
||||
else:
|
||||
return "http://spritedatabase.net/" + download_link
|
||||
|
||||
|
||||
def download_all(folder, sprite):
|
||||
|
||||
if not os.path.isdir(folder):
|
||||
os.mkdir(folder)
|
||||
|
||||
link = get_download_link(sprite[1])
|
||||
|
||||
if link is None:
|
||||
print("ERROR: " + sprite[1])
|
||||
return
|
||||
|
||||
if "drive.google" in link or "mediafire" in link:
|
||||
print("I can't download external link. Link: " + link)
|
||||
open("links", "a").write(link + "\n")
|
||||
else:
|
||||
|
||||
print(folder + os.sep + sprite[0])
|
||||
|
||||
while True:
|
||||
try:
|
||||
response = requests.get(link, stream=True, headers=headers, timeout=5)
|
||||
|
||||
extension = ""
|
||||
if response.headers["Content-Type"] is not None:
|
||||
extension = mimetypes.guess_extension(response.headers["Content-Type"])
|
||||
|
||||
if extension is None:
|
||||
extension = ""
|
||||
|
||||
if not os.path.exists(folder + os.sep + sprite[0] + extension) and response.status_code == 200:
|
||||
with open(folder + os.sep + sprite[0] + extension, 'wb') as f:
|
||||
for chunk in response.iter_content(chunk_size=1024):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
break
|
||||
except:
|
||||
print("!")
|
||||
|
||||
|
||||
mimetypes.init()
|
||||
|
||||
|
||||
for system in get_systems():
|
||||
|
||||
if not os.path.exists(system[0]):
|
||||
os.mkdir(system[0])
|
||||
|
||||
for game in get_games(system):
|
||||
sprites = get_sprites(game)
|
||||
|
||||
if os.path.exists(system[0] + os.sep + game[0]):
|
||||
print(str(len(os.listdir(system[0] + os.sep + game[0]))) + "/" + str(len(sprites)))
|
||||
|
||||
if os.path.exists(system[0] + os.sep + game[0]) and len(os.listdir(system[0] + os.sep + game[0])) >= len(sprites):
|
||||
print("Skipping existing folder with " + str(len(os.listdir(system[0] + os.sep + game[0]))) + "/" + str(len(sprites)) + " existing sprites")
|
||||
continue
|
||||
|
||||
for sprite in sprites:
|
||||
download_all(str(system[0] + os.sep + game[0]), sprite)
|
||||
|
||||
|
||||
|
37
sproutvideo.com/run.py
Normal file
37
sproutvideo.com/run.py
Normal file
@ -0,0 +1,37 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("password")
|
||||
parser.add_argument("url")
|
||||
parser.add_argument("--user")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.5",
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
"Referer": args.url
|
||||
}
|
||||
|
||||
|
||||
payload = {"email": args.user if args.user is not None else "", "password": args.password,
|
||||
"host": "unknown", "url": "unknown", "queryParams": ""}
|
||||
print(payload)
|
||||
r = requests.post(args.url.replace("embed", "video_password"), headers=headers, data=payload)
|
||||
print(r.cookies)
|
||||
|
||||
|
||||
soup = BeautifulSoup(r.text, "html.parser")
|
||||
|
||||
try:
|
||||
print(soup.find("a", attrs={"class": "hd-download"}).get("href"))
|
||||
print(soup.find("a", attrs={"class": "sd-download"}).get("href"))
|
||||
except AttributeError:
|
||||
print("Wrong password/username")
|
||||
|
209
sproutvideo.com/tmp.html
Normal file
209
sproutvideo.com/tmp.html
Normal file
@ -0,0 +1,209 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<!--[if IE]><script type="text/javascript">document.documentMode<9&&(document.location.href=document.location.href+(/\?/.test(document.location.href)?"&forceIE8=true":"?forceIE8=true"));</script><![endif]-->
|
||||
<meta name="ROBOTS" content="NOINDEX, NOFOLLOW">
|
||||
<link rel="prefetch" href="https://images.sproutvideo.com/d9a5d2f848be6e5f49bb7f1b09e93f80/89152ce323a8ed764ae122614e78c922/poster_frames/frame_0000.jpg" as="image">
|
||||
<link rel="preconnect" href="//d1ajyp3swh7ygp.cloudfront.com">
|
||||
<link rel="preconnect" href="//hls.videos.sproutvideo.com">
|
||||
|
||||
<link href='https://fonts.googleapis.com/css?family=Open+Sans' rel='stylesheet' type='text/css'>
|
||||
<link href='//d1ajyp3swh7ygp.cloudfront.net/hls_player-0d10c5b6.css' rel='stylesheet' type='text/css'>
|
||||
|
||||
<!--[if IE ]>
|
||||
<style type="text/css">
|
||||
.player-subtitle-cue {
|
||||
font-size: 2em;
|
||||
}
|
||||
.player-select:before {
|
||||
display: none;
|
||||
}
|
||||
</style>
|
||||
<![endif]-->
|
||||
|
||||
<script type="text/javascript">var dat = 'eyJzZXNzaW9uSUQiOiI2MWYxNTQ3Yi1mY2VkLTQ3MzEtODVlNC1kYWE1Y2MxMDdmNWIiLCJob3N0IjoidW5rbm93biIsImhhc19oZCI6dHJ1ZSwiaGFzX3NkIjp0cnVlLCJmdWxsSG9zdCI6InZpZGVvcy5zcHJvdXR2aWRlby5jb20iLCJ1cmwiOiJ1bmtub3duIiwiZHVyYXRpb24iOjY5NjAsInZpZGVvVWlkIjoiYTQ5YmRkYjExMzFjZTNjYTJjIiwidXNlclVpZCI6IjdlOWJkZWIyMWIxZWU3Y2RmMCIsInByaXZhY3lUb2tlbiI6IjBiNmE5NTA2NDZiZGI4M2YiLCJ1aWQiOiI5NTI0NWQxMi05N2RhLTQwZDktOTFhYy1kZWM2YzdkMmQ0MjgiLCJ1c2VyQWdlbnQiOiJNb3ppbGxhLzUuMCAoWDExOyBMaW51eCB4ODZfNjQ7IHJ2OjUyLjApIEdlY2tvLzIwMTAwMTAxIEZpcmVmb3gvNTIuMCIsInBvc3Ryb2xsIjpmYWxzZSwic3VidGl0bGVzIjpmYWxzZSwiYXV0b3BsYXkiOnRydWUsImxvb3AiOmZhbHNlLCJub0JpZ1BsYXkiOmZhbHNlLCJxdWFsaXR5Ijoic2QiLCJmbGFzaFBsYXllciI6Imh0dHBzOi8vYy5zcHJvdXR2aWRlby5jb20vcGxheWVyLTAuNC40LjIyLnN3ZiIsInRyYXNwYXJlbnQiOmZhbHNlLCJ0IjpudWxsLCJjb2xvcnMiOiIiLCJzM191c2VyX2hhc2giOiJkOWE1ZDJmODQ4YmU2ZTVmNDliYjdmMWIwOWU5M2Y4MCIsInMzX3ZpZGVvX2hhc2giOiI4OTE1MmNlMzIzYThlZDc2NGFlMTIyNjE0ZTc4YzkyMiIsImhscyI6dHJ1ZSwidGl0bGUiOiJDbGllbnQgV2VsY29tZXMgRGVjZW1iZXIgMjAxNSBTbWFsbGVyLm1wNCIsIndpZHRoIjoxMjgwLCJoZWlnaHQiOjcyMCwidm9sdW1lIjoxLCJjYyI6bnVsbCwic2lnbmF0dXJlIjp7IkNsb3VkRnJvbnQtUG9saWN5IjoiZXlKVGRHRjBaVzFsYm5RaU9sdDdJbEpsYzI5MWNtTmxJam9pYUhSMGNITTZMeTlvYkhNeUxuWnBaR1Z2Y3k1emNISnZkWFIyYVdSbGJ5NWpiMjB2WkRsaE5XUXlaamcwT0dKbE5tVTFaalE1WW1JM1pqRmlNRGxsT1RObU9EQXZPRGt4TlRKalpUTXlNMkU0WldRM05qUmhaVEV5TWpZeE5HVTNPR001TWpJdktpSXNJa052Ym1ScGRHbHZiaUk2ZXlKRVlYUmxUR1Z6YzFSb1lXNGlPbnNpUVZkVE9rVndiMk5vVkdsdFpTSTZNVFV4T1RjNE5UazFObjE5ZlYxOSIsIkNsb3VkRnJvbnQtU2lnbmF0dXJlIjoiblpCeGQzSkxwS1BaWGNSdDkxTTgwbVBwU0RVcC10dVkzSEg1RkV6cFZWQzdRU2c4STZXY0Jack1lV1l0Si1MWnh+Z2x6RkkySEEtRDJReEowZFNEbU9acGJpTDN3UFV+NEhxOElRTFVZQ1V0ZnFBTi11Y2VpeGZNUTZyWngtMVI1bnh2MG84VTZRdGlZdWotRXJXTDczckZnN0hydHdrcHdPcDRwakNFV3g5blJOMGZ+UWhaV1BncTJBVkFkRkZNeDItTkljQmpOcFBrRDdSTWEyeHJ4TlZ4Z1hXRUNqVUhBUzc3ZmNGaDVHaTNNNnRKdFBOZ0lZUGNwc2hFdm9EWlFSRVZ6fjRDWEZSeGVKaXF5MjBiV0IybW9wbFNsR2czZWJOcjJ+aVYyS09xNVVXclh5LW00V29rdlBBS0F1eE5maE1SZUtlflJ2NkhhMWlqRnBBYXdRX18iLCJDbG91ZEZyb250LUtleS1QYWlyLUlkIjoiQVBLQUlCNURHQ0dBUUo0R0dJVVEifSwiZm9yY2VCdWZmZXIiOmZhbHNlLCJ2ZXJzaW9uIjozLCJkZWJ1ZyI6ZmFsc2UsImNkU2lnIjoiUG9saWN5PWV5SlRkR0YwWlcxbGJuUWlPbHQ3SWxKbGMyOTFjbU5sSWpvaWFIUjBjSE02THk5b2JITXlMblpwWkdWdmN5NXpjSEp2ZFhSMmFXUmxieTVqYjIwdlkzSnZjM05rYjIxaGFXNHVlRzFzSWl3aVEyOXVaR2wwYVc5dUlqcDdJa1JoZEdWTVpYTnpWR2hoYmlJNmV5SkJWMU02UlhCdlkyaFVhVzFsSWpveE5URTVOelkwTXpnMmZYMTlYWDBfJlNpZ25hdHVyZT1NSzVjWWUwajlaR2RYQzNwNFBNekx6STdDQWE3alpldzJyYVdqaGJTZGZvMU9vLWt0ajNNenpabHdRb1ppeW1INUolN0VPUzFud0R2R3R3dWw1cTlYclZVUmxlbFIwbWlLd2hQUVVLcE1Za1FVd2VRTm1SYlM5SU44STNScU9xUWVuJTdFbGZSVzZndURrTlo1ZGI1VlA4RHdtcVVtbTlITUx4VzU3bDNpUmlvVm1PcUVwdnFLdTl3VnVQdHJPZW0lN0VYUEtoTkolN0UlN0VpbWJ3YjJOTVp0MW9MZW5QYjc5YmtwRlRPcSU3RUEtdGpkZzRHcTRtS2RLZVlxMGw5aWJ2ekpkMThnVkE1cGtrMTV2WEJtJTdFQ282LWVGa0N6UkpobjUzOXpPckVFRHdnUnd4TFFLV21yVVJINE82VVdUOGRscENudDdSbkRJby1TQUxNbW5TeUh0QlRSSklxMFFpd19fJktleS1QYWlyLUlkPUFQS0FJQjVER0NHQVFKNEdHSVVRIiwiYmFja2dyb3VuZFZpZGVvIjpmYWxzZSwiZmJTaWciOnsic2lnIjoicDdhMlVQa3Erd2x4alUvbEZIMk96U3FRcUZnPSIsImV4cGlyZXMiOjE1MTk3NzUxNTZ9LCJtb2JpbGUiOmZhbHNlfQ==';</script>
|
||||
|
||||
|
||||
|
||||
|
||||
<body>
|
||||
<div class="player paused sd no-cc " style="background-image: url('https://images.sproutvideo.com/d9a5d2f848be6e5f49bb7f1b09e93f80/89152ce323a8ed764ae122614e78c922/poster_frames/frame_0000.jpg'); background-size:contain; background-position: center;background-repeat: no-repeat;">
|
||||
|
||||
<div class="player-big-play-button">
|
||||
<svg viewBox="0 0 26 30"><polygon points="0,0 0,30 26,15"/></svg>
|
||||
</div>
|
||||
|
||||
<div class="player-buffer">
|
||||
<div class="spinner">
|
||||
<div class="rect1"></div>
|
||||
<div class="rect2"></div>
|
||||
<div class="rect3"></div>
|
||||
<div class="rect4"></div>
|
||||
<div class="rect5"></div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="player-video-holder">
|
||||
<video width='100%' height='100%' id='video-a49bddb1131ce3ca2c-html' crossorigin='anonymous' preload="auto" style="display:none;"></video>
|
||||
</div>
|
||||
<div class="player-control-bar hidden">
|
||||
<div class="player-play-pause player-button">
|
||||
<svg id="play" viewBox="0 0 26 30">
|
||||
<polygon points="0,0 0,30 26,15"/>
|
||||
</svg>
|
||||
<svg id="pause" viewBox="0 0 12 20">
|
||||
<path d="M1,20h2c0.6,0,1-0.4,1-1V1c0-0.6-0.4-1-1-1H1C0.4,0,0,0.4,0,1v18C0,19.6,0.4,20,1,20z"/>
|
||||
<path d="M11,0H9C8.4,0,8,0.4,8,1v18c0,0.6,0.4,1,1,1h2c0.6,0,1-0.4,1-1V1C12,0.4,11.6,0,11,0z"/>
|
||||
</svg>
|
||||
</div>
|
||||
|
||||
<div class="player-volume player-button">
|
||||
|
||||
<div class="volume-bar">
|
||||
<div class="volume-bar-background"></div>
|
||||
<div class="volume-track-background"></div>
|
||||
<div class="volume-track-controller"></div>
|
||||
<div class="volume-track-status"></div>
|
||||
</div>
|
||||
|
||||
<svg viewBox="0 0 17.3 13.6">
|
||||
<path id="speaker" d="m7.89999,0.17501l-3.4,3.3l-3.4,0c-0.7,0 -1.1,0.6 -1.1,1.1l0,4.5c0,0.6 0.4,1.1 1.1,1.1l3.4,0l3.5,3.3c0,0 1,0.6 1,-1.2c0,-0.9 0,-4.9 0,-10.9c0,-1.9 -1.1,-1.2 -1.1,-1.2l0,0z"/>
|
||||
<path id="wave-one" d="m10.99999,3.57501c-0.2,0 -0.4,0.2 -0.4,0.4l0,0.8c0,0.2 0.2,0.4 0.4,0.5c0.7,0.2 1.3,0.9 1.3,1.6c0,0.8 -0.5,1.4 -1.3,1.6c-0.2,0.1 -0.4,0.2 -0.4,0.5l0,0.9c0,0.2 0.2,0.4 0.4,0.4c1.7,-0.2 3,-1.6 3,-3.4s-1.3,-3.1 -3,-3.3z"/>
|
||||
<path id="wave-two" d="m10.59999,0.57501l0,0.8c0,0.2 0.2,0.4 0.4,0.4c2.6,0.2 4.6,2.4 4.6,5s-2,4.8 -4.6,5c-0.2,0 -0.4,0.2 -0.4,0.4l0,0.8c0,0.2 0.2,0.4 0.4,0.4c3.5,-0.2 6.3,-3.2 6.3,-6.7s-2.7,-6.3 -6.3,-6.5c-0.2,0 -0.4,0.2 -0.4,0.4z"/>
|
||||
<path id="mute" d="m15.69999,6.87501l1.4,-1.4c0.2,-0.2 0.2,-0.5 0,-0.7l-0.7,-0.7c-0.2,-0.2 -0.5,-0.2 -0.7,0l-1.4,1.4l-1.3,-1.3c-0.2,-0.2 -0.5,-0.2 -0.7,0l-0.7,0.7c-0.2,0.2 -0.2,0.5 0,0.7l1.4,1.3l-1.4,1.4c-0.2,0.2 -0.2,0.5 0,0.7l0.7,0.7c0.2,0.2 0.5,0.2 0.7,0l1.4,-1.4l1.4,1.4c0.2,0.2 0.5,0.2 0.7,0l0.5,-0.8c0.2,-0.2 0.2,-0.5 0,-0.7l-1.3,-1.3z"/>
|
||||
</svg>
|
||||
</div>
|
||||
|
||||
<div class="player-progress-time">00:00</div>
|
||||
<div class="player-tracks">
|
||||
<div class="player-track-background"></div>
|
||||
<div class="player-track-loaded"></div>
|
||||
<div class="player-track-controller"></div>
|
||||
<div class="player-track-progress"></div>
|
||||
<div class="player-track-time">
|
||||
<div class="player-track-time-background"></div>
|
||||
<div class="player-track-time-gradient"></div>
|
||||
<div class="player-track-time-time">00:00</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="player-total-time">00:00</div>
|
||||
|
||||
|
||||
<div class="player-download-btn player-button">
|
||||
<svg viewBox="0 0 26 26">
|
||||
<path d="m25,17h-2c-0.6,0-1,0.4-1,1v2.5c0,0.3-0.2,0.5-0.5,0.5h-17c-0.3,0-0.5-0.2-0.5-0.5v-2.5c0-0.6-0.4-1-1-1h-2c-0.6,0-1,0.4-1,1v6c0,0.6 0.4,1 1,1h24c0.6,0 1-0.4 1-1v-6c0-0.6-0.4-1-1-1z"/>
|
||||
<path d="m12.3,16.7c0.2,0.2 0.5,0.3 0.7,0.3s0.5-0.1 0.7-0.3l6-6c0.2-0.2 0.3-0.4 0.3-0.7s-0.1-0.5-0.3-0.7l-1.4-1.4c-0.2-0.2-0.4-0.3-0.7-0.3-0.3,0-0.5,0.1-0.7,0.3l-1,1c-0.3,0.3-0.9,0.1-0.9-0.4v-6.5c0-0.6-0.4-1-1-1h-2c-0.6,0-1,0.4-1,1v6.6c0,0.4-0.5,0.7-0.9,0.4l-1-1c-0.2-0.2-0.4-0.3-0.7-0.3-0.3,0-0.5,0.1-0.7,0.3l-1.4,1.4c-0.2,0.2-0.3,0.4-0.3,0.7s0.1,0.5 0.3,0.7l6,5.9z"/>
|
||||
</svg>
|
||||
</div>
|
||||
|
||||
<div class="player-cc player-button">
|
||||
<svg viewBox="0 0 24 17">
|
||||
<path d="M21,0H3C1.3,0,0,1.3,0,3v11c0,1.7,1.3,3,3,3h18c1.7,0,3-1.3,3-3V3C24,1.3,22.7,0,21,0z M10.2,11.9c-0.5,0.2-1,0.3-1.6,0.3
|
||||
c-0.6,0-1.2-0.1-1.7-0.3s-1-0.5-1.3-0.8C5.3,10.7,5,10.3,4.8,9.8C4.6,9.3,4.5,8.7,4.5,8.1c0-0.6,0.1-1.2,0.3-1.7
|
||||
C5,5.9,5.3,5.5,5.6,5.1C6,4.8,6.5,4.5,7,4.3C7.5,4.1,8,4,8.7,4c0.2,0,0.4,0,0.7,0.1c0.2,0,0.5,0.1,0.7,0.2c0.2,0.1,0.5,0.2,0.7,0.4
|
||||
c0.2,0.1,0.4,0.3,0.6,0.5L10,6.2C9.8,6,9.6,5.9,9.3,5.8C9.1,5.7,8.8,5.6,8.5,5.6c-0.3,0-0.6,0.1-0.9,0.2C7.3,5.9,7.1,6.1,6.9,6.3
|
||||
C6.7,6.5,6.5,6.8,6.4,7.1c-0.1,0.3-0.2,0.6-0.2,1c0,0.4,0.1,0.7,0.2,1c0.1,0.3,0.3,0.6,0.5,0.8s0.4,0.4,0.7,0.5
|
||||
c0.3,0.1,0.6,0.2,0.9,0.2c0.4,0,0.7-0.1,0.9-0.2c0.3-0.1,0.5-0.4,0.7-0.6l1.4,1.1C11.1,11.3,10.7,11.7,10.2,11.9z M18.9,11.9
|
||||
c-0.5,0.2-1,0.3-1.6,0.3c-0.6,0-1.2-0.1-1.7-0.3c-0.5-0.2-1-0.5-1.3-0.8c-0.4-0.4-0.7-0.8-0.9-1.3c-0.2-0.5-0.3-1.1-0.3-1.7
|
||||
c0-0.6,0.1-1.2,0.3-1.7c0.2-0.5,0.5-0.9,0.9-1.3c0.4-0.4,0.8-0.6,1.3-0.8C16.1,4.1,16.7,4,17.3,4c0.2,0,0.4,0,0.7,0.1
|
||||
c0.2,0,0.5,0.1,0.7,0.2c0.2,0.1,0.5,0.2,0.7,0.4c0.2,0.1,0.4,0.3,0.6,0.5l-1.3,1.1C18.4,6,18.2,5.9,18,5.8
|
||||
c-0.2-0.1-0.5-0.2-0.9-0.2c-0.3,0-0.6,0.1-0.9,0.2c-0.3,0.1-0.5,0.3-0.7,0.5c-0.2,0.2-0.4,0.5-0.5,0.8c-0.1,0.3-0.2,0.6-0.2,1
|
||||
c0,0.4,0.1,0.7,0.2,1c0.1,0.3,0.3,0.6,0.5,0.8c0.2,0.2,0.4,0.4,0.7,0.5c0.3,0.1,0.6,0.2,0.9,0.2c0.4,0,0.7-0.1,0.9-0.2
|
||||
c0.3-0.1,0.5-0.4,0.7-0.6l1.4,1.1C19.8,11.3,19.4,11.7,18.9,11.9z"/>
|
||||
</svg>
|
||||
</div>
|
||||
<div class="player-settings player-button">
|
||||
<svg viewBox="0 0 15.998 15.998">
|
||||
<path style="fill-rule:evenodd;clip-rule:evenodd;" d="M13.998,7c-0.553,0-1.08-0.443-1.291-0.952 c-0.21-0.508-0.15-1.194,0.24-1.585l0.707-0.706c0.391-0.391,0.391-1.024,0.001-1.415c-0.391-0.391-1.024-0.391-1.415,0 c0,0-0.316,0.316-0.707,0.707S10.457,3.5,9.949,3.29C9.442,3.08,8.998,2.553,8.998,2V1c0-0.553-0.447-1-1-1s-1,0.447-1,1v1 c0,0.553-0.442,1.08-0.95,1.291s-1.192,0.15-1.583-0.24L3.756,2.344c-0.391-0.391-1.024-0.39-1.413,0 C1.952,2.734,1.952,3.367,2.342,3.758l0.709,0.708C3.441,4.856,3.51,5.545,3.338,6.062C3.168,6.58,2.648,7.016,2.097,7.01L1,7 C0.448,7,0,7.449,0,8c0,0.553,0.448,1,1,1h1.001c0.552,0,1.087,0.438,1.331,0.925c0.245,0.486,0.188,1.159-0.207,1.546l-0.783,0.77 c-0.391,0.391-0.39,1.025,0,1.414c0.391,0.391,1.024,0.391,1.414,0.001l0.708-0.708c0.391-0.391,1.075-0.451,1.584-0.24 c0.508,0.211,0.95,0.738,0.95,1.291v1.001c0,0.552,0.448,1,1,0.999c0.553,0,1-0.447,1-0.999v-1.001c0-0.553,0.444-1.08,0.951-1.289 c0.508-0.211,1.193-0.15,1.584,0.24l0.707,0.707c0.391,0.391,1.024,0.391,1.413,0c0.391-0.391,0.392-1.024,0.002-1.414l-0.708-0.708 c-0.391-0.391-0.451-1.076-0.24-1.584S13.445,9,13.998,9h1c0.553,0,1-0.447,1-1s-0.447-1-1-1H13.998z M7.998,10 c-1.103,0-2-0.897-2-2s0.897-2,2-2s2,0.897,2,2S9.101,10,7.998,10z"/>
|
||||
</svg>
|
||||
</div>
|
||||
<div class="player-fullscreen player-button">
|
||||
<svg viewBox="0 0 15 15">
|
||||
<path d="M4.5,13H2v-2.5C2,10.2,1.8,10,1.5,10h-1C0.2,10,0,10.2,0,10.5V14c0,0.6,0.4,1,1,1h3.5C4.8,15,5,14.8,5,14.5v-1
|
||||
C5,13.2,4.8,13,4.5,13z"/>
|
||||
<path d="M4.5,0H1C0.4,0,0,0.4,0,1v3.5C0,4.8,0.2,5,0.5,5h1C1.8,5,2,4.8,2,4.5V2h2.5C4.8,2,5,1.8,5,1.5v-1C5,0.2,4.8,0,4.5,0z"/>
|
||||
<path d="M14,0h-3.5C10.2,0,10,0.2,10,0.5v1C10,1.8,10.2,2,10.5,2H13v2.5C13,4.8,13.2,5,13.5,5h1C14.8,5,15,4.8,15,4.5V1
|
||||
C15,0.4,14.6,0,14,0z"/>
|
||||
<path d="M14.5,10h-1c-0.3,0-0.5,0.2-0.5,0.5V13h-2.5c-0.3,0-0.5,0.2-0.5,0.5v1c0,0.3,0.2,0.5,0.5,0.5H14c0.6,0,1-0.4,1-1v-3.5
|
||||
C15,10.2,14.8,10,14.5,10z"/>
|
||||
</svg>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="player-settings-menu player-menu">
|
||||
<div class="player-resolution player-option">
|
||||
<label>Resolution</label>
|
||||
<div class="player-setting">
|
||||
<div class="player-select">
|
||||
<select class="player-resolution-select">
|
||||
<option selected>Auto</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="player-speed player-option">
|
||||
<label>Speed</label>
|
||||
<div class="player-setting">
|
||||
<div class="player-select">
|
||||
<select class="player-speed-select">
|
||||
<option value="0.25">0.25X</option>
|
||||
<option value="0.5">0.5X</option>
|
||||
<option value="1" selected>1X</option>
|
||||
<option value="1.25">1.25X</option>
|
||||
<option value="1.5">1.5X</option>
|
||||
<option value="2">2X</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class='player-mobile-muted'>
|
||||
<svg viewBox="0 0 17.3 13.6">
|
||||
<path id="speaker" d="m7.89999,0.17501l-3.4,3.3l-3.4,0c-0.7,0 -1.1,0.6 -1.1,1.1l0,4.5c0,0.6 0.4,1.1 1.1,1.1l3.4,0l3.5,3.3c0,0 1,0.6 1,-1.2c0,-0.9 0,-4.9 0,-10.9c0,-1.9 -1.1,-1.2 -1.1,-1.2l0,0z"/>
|
||||
<path id="mute" d="m15.69999,6.87501l1.4,-1.4c0.2,-0.2 0.2,-0.5 0,-0.7l-0.7,-0.7c-0.2,-0.2 -0.5,-0.2 -0.7,0l-1.4,1.4l-1.3,-1.3c-0.2,-0.2 -0.5,-0.2 -0.7,0l-0.7,0.7c-0.2,0.2 -0.2,0.5 0,0.7l1.4,1.3l-1.4,1.4c-0.2,0.2 -0.2,0.5 0,0.7l0.7,0.7c0.2,0.2 0.5,0.2 0.7,0l1.4,-1.4l1.4,1.4c0.2,0.2 0.5,0.2 0.7,0l0.5,-0.8c0.2,-0.2 0.2,-0.5 0,-0.7l-1.3,-1.3z"/>
|
||||
</svg>
|
||||
</div>
|
||||
<div class="player-stats">
|
||||
<div><div>Video ID:</div><span>a49bddb1131ce3ca2c</span></div>
|
||||
<div><div>User ID:</div><span>7e9bdeb21b1ee7cdf0</span></div>
|
||||
<div><div>Playback:</div><span class="stat-playback"></span></div>
|
||||
<div><div>Dimensions:</div><span class="stat-dimensions"></span></div>
|
||||
<div><div>Resolution:</div><span class="stat-resolution"></span></div>
|
||||
<div><div>Level Cap:</div><span class="stat-levelcap"></span></div>
|
||||
<div><div>Speed:</div><span><span class="sparkline"></span><span class="stat-speed"></span></span></div>
|
||||
</div>
|
||||
|
||||
|
||||
<div class="player-download-sheet player-sheet">
|
||||
<div class="player-card-btn player-card-close">
|
||||
<svg viewBox="0 0 8.071 8.07">
|
||||
<path d="M7.924,6.51L5.45,4.035l2.475-2.475c0.196-0.195,0.196-0.512,0-0.707L7.217,0.146 c-0.195-0.195-0.512-0.195-0.707,0L4.036,2.621L1.561,0.146c-0.195-0.195-0.512-0.195-0.707,0L0.147,0.854 c-0.196,0.195-0.196,0.512,0,0.707l2.475,2.475L0.147,6.51c-0.196,0.195-0.196,0.512,0,0.707l0.707,0.707 c0.195,0.195,0.512,0.195,0.707,0l2.475-2.475L6.51,7.924c0.195,0.195,0.512,0.195,0.707,0l0.707-0.707 C8.121,7.021,8.121,6.705,7.924,6.51z"/>
|
||||
</svg>
|
||||
</div>
|
||||
<div class="player-download-options">
|
||||
<h2>Download Video</h2>
|
||||
<ul>
|
||||
|
||||
<li><a class='sd-download' href="https://sproutvideo.com/videos/a49bddb1131ce3ca2c/player_download?expires=1519775155&type=sd&uid=95245d12-97da-40d9-91ac-dec6c7d2d428&auth=bb6a9aa8199938b2d31b9693f23235f3&signature=qSz0ZuxEMVeI8QfYatTIbyS2WYw%3D" target="_blank">SD</a></li>
|
||||
|
||||
|
||||
<li><a class='hd-download' href="https://sproutvideo.com/videos/a49bddb1131ce3ca2c/player_download?expires=1519775155&type=hd&uid=95245d12-97da-40d9-91ac-dec6c7d2d428&auth=bb6a9aa8199938b2d31b9693f23235f3&signature=sULsVrwr8cXXlNI3I%2BvYqNg51K8%3D" target="_blank">HD</a></li>
|
||||
|
||||
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<script src="//d1ajyp3swh7ygp.cloudfront.net/jquery.min.js"></script>
|
||||
<script src="https://src.litix.io/core/2/mux.js"></script>
|
||||
<!--[if lte IE 7]>
|
||||
<script type="text/javascript" src="//d1ajyp3swh7ygp.cloudfront.net/json2.js"></script>
|
||||
<![endif]-->
|
||||
<!--[if IE]>
|
||||
<script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/babel-polyfill/6.8.0/polyfill.min.js"></script>
|
||||
<![endif]-->
|
||||
<script type="text/javascript" src="//d1ajyp3swh7ygp.cloudfront.net/es6.min-8cdbfc06.js"></script>
|
||||
</body>
|
||||
</html>
|
78
textures-resource.com/crawler.py
Normal file
78
textures-resource.com/crawler.py
Normal file
@ -0,0 +1,78 @@
|
||||
import requests
|
||||
import bs4
|
||||
import os
|
||||
|
||||
headers = {
|
||||
'User-Agent': "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
"Accept-Language": "en-US,en;q=0.5",
|
||||
}
|
||||
|
||||
URL = "https://www.textures-resource.com"
|
||||
|
||||
|
||||
def get_consoles():
|
||||
|
||||
consoles = []
|
||||
|
||||
response = requests.get(URL)
|
||||
soup = bs4.BeautifulSoup(response.text, "html.parser")
|
||||
|
||||
for console in soup.find(id="leftnav-consoles"):
|
||||
if type(console) == bs4.element.Tag and console.get("href") is not None:
|
||||
consoles.append((console.text, URL + console.get("href")))
|
||||
|
||||
return consoles
|
||||
|
||||
|
||||
def get_games(console, letter):
|
||||
|
||||
games = []
|
||||
|
||||
print(console[0] + " - " + letter)
|
||||
|
||||
print(console[1] + letter + ".html")
|
||||
response = requests.get(console[1] + letter + ".html")
|
||||
soup = bs4.BeautifulSoup(response.text, "html.parser")
|
||||
|
||||
for link in soup.find_all("a"):
|
||||
for child in link.findChildren():
|
||||
if child.get("class") is not None and child.get("class") == ['gameiconcontainer']:
|
||||
game_name = child.find("div").find("span").string
|
||||
|
||||
games.append((game_name, URL + link.get("href")))
|
||||
|
||||
return games
|
||||
|
||||
|
||||
def get_textures(game):
|
||||
textures = []
|
||||
|
||||
response = requests.get(game[1])
|
||||
soup = bs4.BeautifulSoup(response.text, "html.parser")
|
||||
|
||||
for link in soup.find_all("a"):
|
||||
|
||||
for div in link.find_all("div"):
|
||||
if div.get("class") == ["iconcontainer"]:
|
||||
|
||||
texture_url = div.find("div", attrs={"class": "iconbody"}).find("img").get("src")
|
||||
texture_id = texture_url.split("/")[4][:-4]
|
||||
model_download = "https://www.textures-resource.com/download/" + texture_id + "/"
|
||||
|
||||
model_name = div.find("div").find("span").string
|
||||
textures.append((model_name, URL + texture_url, model_download))
|
||||
|
||||
return textures
|
||||
|
||||
file = open("links.txt", "w")
|
||||
|
||||
for console in get_consoles():
|
||||
for letter in "0ABCDEFGHIJKLMNOPQRSTUVWXYZ":
|
||||
for game in get_games(console, letter):
|
||||
for model in get_textures(game):
|
||||
file.write(console[0] + os.sep + game[0] + os.sep + model[0] + os.sep + "\0" + model[1] + "\0" +
|
||||
model[2] + "\n")
|
||||
|
||||
file.close()
|
39
textures-resource.com/downloader.py
Normal file
39
textures-resource.com/downloader.py
Normal file
@ -0,0 +1,39 @@
|
||||
import pathlib
|
||||
import requests
|
||||
import os
|
||||
|
||||
file = open("links.txt", "r")
|
||||
|
||||
|
||||
for line in file.read().splitlines():
|
||||
|
||||
path, preview, link = line.split("\0")
|
||||
|
||||
if os.path.isfile("textures/" + path + "preview.png"):
|
||||
continue
|
||||
|
||||
print("textures/" + path)
|
||||
|
||||
pathlib.Path("textures/" + path).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
while True:
|
||||
try:
|
||||
response = requests.get(preview, stream=True, timeout=5)
|
||||
with open("textures/" + path + "preview.png", 'wb') as f:
|
||||
for chunk in response.iter_content(chunk_size=1024):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
|
||||
response2 = requests.get(link, stream=True, timeout=5)
|
||||
|
||||
file_extension = os.path.splitext(response2.headers["Content-Disposition"])[1][:-2]
|
||||
|
||||
with open("textures/" + path + path.split("/")[-2:-1][0] + file_extension, 'wb') as f:
|
||||
for chunk in response2.iter_content(chunk_size=1024):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
|
||||
break
|
||||
except:
|
||||
print("!", end="", flush=True)
|
||||
continue
|
BIN
textures-resource.com/links.txt
Normal file
BIN
textures-resource.com/links.txt
Normal file
Binary file not shown.
3
viditut.com/README.md
Normal file
3
viditut.com/README.md
Normal file
@ -0,0 +1,3 @@
|
||||
crawler_courses.py -> courses.txt
|
||||
courses.txt -> crawler_videos.py -> links.txt
|
||||
links.txt -> downloader.py -> (Downloaded videos)
|
0
viditut.com/courses.txt
Normal file
0
viditut.com/courses.txt
Normal file
73
viditut.com/crawler_courses.py
Normal file
73
viditut.com/crawler_courses.py
Normal file
@ -0,0 +1,73 @@
|
||||
import requests
|
||||
import bs4
|
||||
import json
|
||||
|
||||
URL = "https://viditut.com"
|
||||
|
||||
|
||||
def request_timeout(url):
|
||||
while True:
|
||||
try:
|
||||
return requests.get(url, timeout=30)
|
||||
except:
|
||||
print("!", end="", flush=True)
|
||||
continue
|
||||
|
||||
|
||||
def get_categories():
|
||||
|
||||
categories = []
|
||||
|
||||
r = requests.get(URL)
|
||||
soup = bs4.BeautifulSoup(r.text, "html.parser")
|
||||
|
||||
for i in soup.find_all("i"):
|
||||
if i.get("class") is not None and len(i.get("class")) > 1 and "cat-" in i.get("class")[1]:
|
||||
category_id = i.get("class")[1][4:]
|
||||
category_name = i.get("title")[:i.get("title").find("-") - 1]
|
||||
|
||||
categories.append((category_name, category_id))
|
||||
|
||||
return categories
|
||||
|
||||
|
||||
def get_courses(category):
|
||||
last_len = 0
|
||||
courses = []
|
||||
page = 0
|
||||
while True:
|
||||
|
||||
page += 1
|
||||
r = request_timeout("https://viditut.com/ajax/category/" + category[1] + "/courses?page=" + str(page))
|
||||
soup = bs4.BeautifulSoup(json.loads(r.text)["html"], "html.parser")
|
||||
|
||||
for link in soup.find_all("a"):
|
||||
if link.get("href") is not None:
|
||||
if link.find("h3") is not None:
|
||||
course_link = link.get("href")
|
||||
course_name = link.find("h3").string
|
||||
course_id = course_link.split("/")[-1:][0][:-7]
|
||||
|
||||
courses.append((course_name, course_id, course_link))
|
||||
|
||||
print("Page " + str(page) + " (" + str(len(courses)) + ")")
|
||||
|
||||
if last_len == len(courses):
|
||||
break
|
||||
|
||||
last_len = len(courses)
|
||||
|
||||
return courses
|
||||
|
||||
|
||||
file = open("courses.txt", "w")
|
||||
|
||||
for category in get_categories():
|
||||
print(category)
|
||||
for course in get_courses(category):
|
||||
print(course[0])
|
||||
file.write(category[1] + "\0" + course[0] + "\0" + course[1] + "\0" + course[2] + "\n")
|
||||
file.flush()
|
||||
|
||||
|
||||
file.close()
|
68
viditut.com/crawler_videos.py
Normal file
68
viditut.com/crawler_videos.py
Normal file
@ -0,0 +1,68 @@
|
||||
import requests
|
||||
import bs4
|
||||
import json
|
||||
|
||||
|
||||
def request_timeout(url):
|
||||
while True:
|
||||
try:
|
||||
return requests.get(url, timeout=30)
|
||||
except:
|
||||
print("!", end="", flush=True)
|
||||
continue
|
||||
|
||||
|
||||
def get_videos(course):
|
||||
|
||||
videos = []
|
||||
r = request_timeout(course[2])
|
||||
soup = bs4.BeautifulSoup(r.text, "html.parser")
|
||||
|
||||
for link in soup.find_all("a"):
|
||||
|
||||
if link.get("class") is not None and str(link.get("class")) == "['item-name', 'video-name', 'ga']":
|
||||
video_id = link.get("data-ga-value")
|
||||
video_name = link.text.replace("\n", "").strip()
|
||||
|
||||
videos.append((video_name, video_id))
|
||||
|
||||
return videos
|
||||
|
||||
|
||||
def get_links(course, video):
|
||||
|
||||
links = []
|
||||
r = request_timeout("https://viditut.com/ajax/course/" + course[1] + "/" + video[1] + "/play")
|
||||
json_obj = json.loads(r.text)
|
||||
|
||||
if len(json.loads(r.text)) > 0:
|
||||
json_obj = json_obj[0]
|
||||
else:
|
||||
return links
|
||||
|
||||
for quality in json_obj["qualities"]:
|
||||
links.append((quality, json_obj["urls"][quality]))
|
||||
|
||||
return links
|
||||
|
||||
|
||||
file = open("courses.txt", "r")
|
||||
|
||||
fileout = open("links1.txt", "w")
|
||||
|
||||
for line in file.read().splitlines():
|
||||
|
||||
category, course_name, course_id, course_url = line.split("\0")
|
||||
|
||||
course = (course_name, course_id, course_url)
|
||||
|
||||
print(course_name)
|
||||
|
||||
for video in get_videos(course):
|
||||
for link in get_links(course, video):
|
||||
fileout.write(category + "/" + course_name + "/" + video[0] + "\0" + link[0] + "\0" + link[1] + "\n")
|
||||
fileout.flush()
|
||||
|
||||
|
||||
fileout.close()
|
||||
file.close()
|
38
viditut.com/downloader.py
Normal file
38
viditut.com/downloader.py
Normal file
@ -0,0 +1,38 @@
|
||||
import pathlib
|
||||
import os
|
||||
import requests
|
||||
|
||||
file = open("links1.txt", "r")
|
||||
|
||||
i = 0
|
||||
|
||||
for line in file.read().splitlines():
|
||||
|
||||
path, quality, link = line.split("\0")
|
||||
|
||||
if quality != "720":
|
||||
continue
|
||||
|
||||
i += 1
|
||||
|
||||
pathlib.Path(os.path.split(path)[0]).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if os.path.isfile(os.path.split(path)[0] + os.sep + str(i) + " -" + os.path.split(path)[1] +
|
||||
"[" + quality + "].mp4"):
|
||||
continue
|
||||
|
||||
print(path)
|
||||
|
||||
while True:
|
||||
try:
|
||||
response = requests.get(link, stream=True, timeout=5)
|
||||
|
||||
with open(os.path.split(path)[0] + os.sep + str(i) + " -" + os.path.split(path)[1] +
|
||||
"[" + quality + "].mp4", 'wb') as f:
|
||||
for chunk in response.iter_content(chunk_size=1024):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
break
|
||||
except:
|
||||
print("!", end="", flush=True)
|
||||
continue
|
BIN
viditut.com/links.txt
Normal file
BIN
viditut.com/links.txt
Normal file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user