mirror of
				https://github.com/simon987/Misc-Download-Scripts.git
				synced 2025-11-04 03:16:56 +00:00 
			
		
		
		
	Initial commit
This commit is contained in:
		
						commit
						cd09d2b791
					
				
							
								
								
									
										20327
									
								
								1001freefonts.com/links.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										20327
									
								
								1001freefonts.com/links.txt
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										95
									
								
								1001freefonts.com/run.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										95
									
								
								1001freefonts.com/run.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,95 @@
 | 
				
			|||||||
 | 
					import requests
 | 
				
			||||||
 | 
					from bs4 import BeautifulSoup
 | 
				
			||||||
 | 
					import multiprocessing
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					headers = {
 | 
				
			||||||
 | 
					    "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
 | 
				
			||||||
 | 
					    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
 | 
				
			||||||
 | 
					    "Accept-Language": "en-US,en;q=0.5",
 | 
				
			||||||
 | 
					    "Accept-Encoding": "gzip, deflate, br",
 | 
				
			||||||
 | 
					    "Upgrade-Insecure-Requests": "1",
 | 
				
			||||||
 | 
					    "Referer": "https://www.1001freefonts.com/"
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def request_timeout(url):
 | 
				
			||||||
 | 
					    while True:
 | 
				
			||||||
 | 
					        try:
 | 
				
			||||||
 | 
					            return requests.get(url, timeout=30, headers=headers)
 | 
				
			||||||
 | 
					        except Exception as e:
 | 
				
			||||||
 | 
					            print("!", end="", flush=True)
 | 
				
			||||||
 | 
					            continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_dl_links(url):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    print(url)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    r = request_timeout(url)
 | 
				
			||||||
 | 
					    soup = BeautifulSoup(r.text, "html.parser")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for a in soup.findAll("a"):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        href = a.get("href")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if href is not None and href.find("/d/") != -1:
 | 
				
			||||||
 | 
					            with open("links.txt", "a") as f:
 | 
				
			||||||
 | 
					                f.write(href + "\n")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_fonts():
 | 
				
			||||||
 | 
					    letters = list("abcdefghijklmnopqrstuvwxyz")
 | 
				
			||||||
 | 
					    letters.append("num")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    all_page_links = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for letter in letters:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        print(letter)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        r = request_timeout("https://www.1001freefonts.com/" + letter + "fonts.php")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        soup = BeautifulSoup(r.text, "html.parser")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        page_max = soup.find("div", attrs={"class": "pagingLabelWrapper"})
 | 
				
			||||||
 | 
					        page_max = page_max.text.split(" ")[-1]
 | 
				
			||||||
 | 
					        page_max = int(page_max)
 | 
				
			||||||
 | 
					        print(page_max)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for i in range(1, page_max+1):
 | 
				
			||||||
 | 
					            all_page_links.append("https://www.1001freefonts.com/" + letter + "fonts" + str(i) + ".php")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    pool = multiprocessing.Pool(processes=25)
 | 
				
			||||||
 | 
					    pool.map(get_dl_links, all_page_links)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def download_font(url):
 | 
				
			||||||
 | 
					    file_path = "fonts" + url[url.rfind("/"):]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if os.path.exists(file_path):
 | 
				
			||||||
 | 
					        return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    print(file_path)
 | 
				
			||||||
 | 
					    r = requests.get(url, stream=True, headers=headers)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if r.status_code != 200:
 | 
				
			||||||
 | 
					        print(r.status_code)
 | 
				
			||||||
 | 
					        return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    with open(file_path, 'wb') as f:
 | 
				
			||||||
 | 
					        for chunk in r.iter_content(chunk_size=1024):
 | 
				
			||||||
 | 
					            if chunk:
 | 
				
			||||||
 | 
					                f.write(chunk)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def download_all():
 | 
				
			||||||
 | 
					    pool = multiprocessing.Pool(processes=25)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    with open("links.txt", "r") as f:
 | 
				
			||||||
 | 
					        pool.map(download_font, f.read().splitlines())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# get_fonts()
 | 
				
			||||||
 | 
					download_all()
 | 
				
			||||||
							
								
								
									
										9
									
								
								README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										9
									
								
								README.md
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,9 @@
 | 
				
			|||||||
 | 
					## Scripts for downloading content from a bunch of websites
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					### Setup:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					```sudo pip3 install python-guerrillamail bs4 pdfkit youtube-dl```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					### About
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Feel free to contribute or suggest new websites using the Issue feature.
 | 
				
			||||||
							
								
								
									
										0
									
								
								abstractfonts.com/downloaded.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								abstractfonts.com/downloaded.txt
									
									
									
									
									
										Normal file
									
								
							
							
								
								
									
										13866
									
								
								abstractfonts.com/fonts.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										13866
									
								
								abstractfonts.com/fonts.txt
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										0
									
								
								abstractfonts.com/proxies.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								abstractfonts.com/proxies.txt
									
									
									
									
									
										Normal file
									
								
							
							
								
								
									
										122
									
								
								abstractfonts.com/run.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										122
									
								
								abstractfonts.com/run.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,122 @@
 | 
				
			|||||||
 | 
					import requests
 | 
				
			||||||
 | 
					from bs4 import BeautifulSoup
 | 
				
			||||||
 | 
					import multiprocessing
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					proxy_index = 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					proxies = {
 | 
				
			||||||
 | 
					    "http": ""
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					headers = {
 | 
				
			||||||
 | 
					    "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
 | 
				
			||||||
 | 
					    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
 | 
				
			||||||
 | 
					    "Accept-Language": "en-US,en;q=0.5",
 | 
				
			||||||
 | 
					    "Accept-Encoding": "gzip, deflate, br",
 | 
				
			||||||
 | 
					    "Upgrade-Insecure-Requests": "1"
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def already_downloaded(font_id):
 | 
				
			||||||
 | 
					    with open("downloaded.txt", "r") as f:
 | 
				
			||||||
 | 
					        return font_id in f.read().splitlines()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def flag_downloaded(font_id):
 | 
				
			||||||
 | 
					    with open("downloaded.txt", "a") as f:
 | 
				
			||||||
 | 
					        f.write(font_id + "\n")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_new_proxy():
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    global proxy_index
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    with open("proxies.txt", "r") as f:
 | 
				
			||||||
 | 
					        line = f.read().splitlines()[proxy_index]
 | 
				
			||||||
 | 
					        proxies["http"] = line
 | 
				
			||||||
 | 
					        print("Switched to proxy " + line)
 | 
				
			||||||
 | 
					        proxy_index += 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def request_timeout(url):
 | 
				
			||||||
 | 
					    while True:
 | 
				
			||||||
 | 
					        try:
 | 
				
			||||||
 | 
					            return requests.get(url, timeout=30)
 | 
				
			||||||
 | 
					        except Exception as e:
 | 
				
			||||||
 | 
					            print("!", end="", flush=True)
 | 
				
			||||||
 | 
					            continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_dl_links(url):
 | 
				
			||||||
 | 
					    print(url)
 | 
				
			||||||
 | 
					    r = request_timeout(url)
 | 
				
			||||||
 | 
					    soup = BeautifulSoup(r.text, "html.parser")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for a in soup.findAll("a"):
 | 
				
			||||||
 | 
					        if a.get("data-font-id") is not None:
 | 
				
			||||||
 | 
					            with open("fonts.txt", "a") as f:
 | 
				
			||||||
 | 
					                f.write(a.get("data-font-id") + "\n")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_fonts():
 | 
				
			||||||
 | 
					    letters = list("ABCDEFGHIJKLMNOPQRSTUVWXYZ")
 | 
				
			||||||
 | 
					    letters.append("Numbers")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    all_page_links = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for letter in letters:
 | 
				
			||||||
 | 
					        all_page_links.append("http://www.abstractfonts.com/alpha/" + letter)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    pool = multiprocessing.Pool(processes=25)
 | 
				
			||||||
 | 
					    pool.map(get_dl_links, all_page_links)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def download_font(font_id):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if already_downloaded(font_id):
 | 
				
			||||||
 | 
					        return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    while True:
 | 
				
			||||||
 | 
					        try:
 | 
				
			||||||
 | 
					            r = requests.get("http://www.abstractfonts.com/download/" + font_id, stream=True, proxies=proxies, headers=headers, timeout=5)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            if r.status_code == 404:
 | 
				
			||||||
 | 
					                print(str(r.status_code) + " - http://www.abstractfonts.com/download/" + font_id)
 | 
				
			||||||
 | 
					                get_new_proxy()
 | 
				
			||||||
 | 
					                return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            if "Content-Disposition" not in r.headers:
 | 
				
			||||||
 | 
					                print(r.text)
 | 
				
			||||||
 | 
					                get_new_proxy()
 | 
				
			||||||
 | 
					                return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            file_path = "fonts/" + r.headers["Content-Disposition"][r.headers["Content-Disposition"].rfind("\"", 0, -2) + 1:-1]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            if os.path.exists(file_path):
 | 
				
			||||||
 | 
					                return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            print(file_path)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            with open(file_path, 'wb') as f:
 | 
				
			||||||
 | 
					                for chunk in r.iter_content(chunk_size=1024):
 | 
				
			||||||
 | 
					                    if chunk:
 | 
				
			||||||
 | 
					                        f.write(chunk)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            flag_downloaded(font_id)
 | 
				
			||||||
 | 
					            break
 | 
				
			||||||
 | 
					        except:
 | 
				
			||||||
 | 
					            get_new_proxy()
 | 
				
			||||||
 | 
					            continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# get_fonts()
 | 
				
			||||||
 | 
					get_new_proxy()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pool = multiprocessing.Pool(processes=100)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					with open("fonts.txt", "r") as f1:
 | 
				
			||||||
 | 
					    pool.map(download_font, f1.read().splitlines())
 | 
				
			||||||
							
								
								
									
										0
									
								
								craftsy.com/courses.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								craftsy.com/courses.txt
									
									
									
									
									
										Normal file
									
								
							
							
								
								
									
										170
									
								
								craftsy.com/ripper.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										170
									
								
								craftsy.com/ripper.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,170 @@
 | 
				
			|||||||
 | 
					import requests
 | 
				
			||||||
 | 
					import pathlib
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					import json
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					headers_login = {
 | 
				
			||||||
 | 
					    'User-Agent': "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
 | 
				
			||||||
 | 
					    "Accept": "application/json, text/plain, */*",
 | 
				
			||||||
 | 
					    "Content-Type": "application/json",
 | 
				
			||||||
 | 
					    "Referer": "https://unlimited.craftsy.com/login",
 | 
				
			||||||
 | 
					    "X-Requested-By": "Craftsy"
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					headers = {
 | 
				
			||||||
 | 
					    'User-Agent': "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
 | 
				
			||||||
 | 
					    "Accept": "application/json, text/plain, */*"
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def login(email, password):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    r1 = requests.get("https://unlimited.craftsy.com/login", headers=headers_login)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    payload = json.dumps({"email": email, "password": password})
 | 
				
			||||||
 | 
					    r2 = requests.post("https://api.craftsy.com/login/", data=payload, headers=headers_login, cookies=r1.cookies)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    print(r2.text)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return r2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_course_info(r_login, course_id):
 | 
				
			||||||
 | 
					        while True:
 | 
				
			||||||
 | 
					            try:
 | 
				
			||||||
 | 
					                r = requests.get("https://api.craftsy.com/m/playlists/" + course_id, headers=headers_login,
 | 
				
			||||||
 | 
					                                 cookies=r_login.cookies, timeout=5)
 | 
				
			||||||
 | 
					                break
 | 
				
			||||||
 | 
					            except:
 | 
				
			||||||
 | 
					                print("!", end="", flush=True)
 | 
				
			||||||
 | 
					                continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        course_info = json.loads(r.text)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return course_info
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_materials(r_login, course_id):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    materials = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    while True:
 | 
				
			||||||
 | 
					        try:
 | 
				
			||||||
 | 
					            r = requests.get("https://api.craftsy.com/m/playlists/" + course_id + "/materials", headers=headers_login,
 | 
				
			||||||
 | 
					                             cookies=r_login.cookies, timeout=5)
 | 
				
			||||||
 | 
					            break
 | 
				
			||||||
 | 
					        except:
 | 
				
			||||||
 | 
					            print("!", end="", flush=True)
 | 
				
			||||||
 | 
					            continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
 | 
					        material_info = json.loads(r.text)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for material in material_info:
 | 
				
			||||||
 | 
					            materials.append((material["materialName"], material["materialPath"]))
 | 
				
			||||||
 | 
					    except:
 | 
				
			||||||
 | 
					        print("Err mat!", end="", flush=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return materials
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_episodes(course_info):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    episodes = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    course_name = course_info["name"]
 | 
				
			||||||
 | 
					    print(course_name)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for episode in course_info["episodes"]:
 | 
				
			||||||
 | 
					        episodes.append((course_name, episode["name"], episode["episodeId"]))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return episodes
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def download_episode(episode, r_login):
 | 
				
			||||||
 | 
					    while True:
 | 
				
			||||||
 | 
					        try:
 | 
				
			||||||
 | 
					            r = requests.get("https://api.craftsy.com/m/videos/secure/episodes/" + str(episode[2]), headers=headers,
 | 
				
			||||||
 | 
					                             cookies=r_login.cookies, timeout=5)
 | 
				
			||||||
 | 
					            break
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        except Exception as e:
 | 
				
			||||||
 | 
					            print("!", end="", flush=True)
 | 
				
			||||||
 | 
					        continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    episode_info = []
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
 | 
					        episode_info = json.loads(r.text)
 | 
				
			||||||
 | 
					    except:
 | 
				
			||||||
 | 
					        print("Err episode!", end="", flush=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for source in episode_info:
 | 
				
			||||||
 | 
					        if source["format"] == "mp4":
 | 
				
			||||||
 | 
					            path = episode[0]
 | 
				
			||||||
 | 
					            print(path + os.sep + str(episode[1]) + ".mp4")
 | 
				
			||||||
 | 
					            pathlib.Path(path).mkdir(parents=True, exist_ok=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            if os.path.exists(path + os.sep + str(episode[2]) + " - " + episode[1].replace("/", "") + ".mp4"):
 | 
				
			||||||
 | 
					                print("Skipping...")
 | 
				
			||||||
 | 
					                continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            while True:
 | 
				
			||||||
 | 
					                try:
 | 
				
			||||||
 | 
					                    response = requests.get(source["url"], stream=True, timeout=5)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    with open(path + os.sep + str(episode[2]) + " - " + episode[1].replace("/", "") + ".mp4", 'wb') as f:
 | 
				
			||||||
 | 
					                        for chunk in response.iter_content(chunk_size=1024):
 | 
				
			||||||
 | 
					                            if chunk:
 | 
				
			||||||
 | 
					                                f.write(chunk)
 | 
				
			||||||
 | 
					                    break
 | 
				
			||||||
 | 
					                except Exception as e:
 | 
				
			||||||
 | 
					                    print("!", end="", flush=True)
 | 
				
			||||||
 | 
					                    continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def download_material(r_login, material, course_info):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    path = course_info["name"]
 | 
				
			||||||
 | 
					    print(path + os.sep + material[0] + os.path.splitext(material[1])[1])
 | 
				
			||||||
 | 
					    pathlib.Path(path).mkdir(parents=True, exist_ok=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if os.path.exists(path + os.sep + material[0] + os.path.splitext(material[1])[1]):
 | 
				
			||||||
 | 
					        print("Skipping...")
 | 
				
			||||||
 | 
					        return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    while True:
 | 
				
			||||||
 | 
					        try:
 | 
				
			||||||
 | 
					            response = requests.get(material[1], stream=True, timeout=5, cookies=r_login.cookies)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            with open(path + os.sep + material[0] + os.path.splitext(material[1])[1], 'wb') as f:
 | 
				
			||||||
 | 
					                for chunk in response.iter_content(chunk_size=1024):
 | 
				
			||||||
 | 
					                    if chunk:
 | 
				
			||||||
 | 
					                        f.write(chunk)
 | 
				
			||||||
 | 
					            break
 | 
				
			||||||
 | 
					        except:
 | 
				
			||||||
 | 
					            print("!", end="", flush=True)
 | 
				
			||||||
 | 
					            continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					rLogin = login("", "")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					for course in open("courses.txt").read().splitlines():
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    print(course)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    course_info = get_course_info(rLogin, course)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for material in get_materials(rLogin, course):
 | 
				
			||||||
 | 
					        download_material(rLogin, material, course_info)
 | 
				
			||||||
 | 
					        print(material)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for episode in get_episodes(course_info):
 | 
				
			||||||
 | 
					        download_episode(episode, rLogin)
 | 
				
			||||||
 | 
					        print(episode)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										34106
									
								
								dafont.com/links.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										34106
									
								
								dafont.com/links.txt
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										100
									
								
								dafont.com/run.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										100
									
								
								dafont.com/run.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,100 @@
 | 
				
			|||||||
 | 
					import requests
 | 
				
			||||||
 | 
					from bs4 import BeautifulSoup
 | 
				
			||||||
 | 
					import multiprocessing
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					headers = {
 | 
				
			||||||
 | 
					    "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
 | 
				
			||||||
 | 
					    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
 | 
				
			||||||
 | 
					    "Accept-Language": "en-US,en;q=0.5",
 | 
				
			||||||
 | 
					    "Accept-Encoding": "gzip, deflate, br",
 | 
				
			||||||
 | 
					    "Upgrade-Insecure-Requests": "1",
 | 
				
			||||||
 | 
					    "Referer": "https://www.dafont.com/"
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def request_timeout(url):
 | 
				
			||||||
 | 
					    while True:
 | 
				
			||||||
 | 
					        try:
 | 
				
			||||||
 | 
					            return requests.get(url, timeout=30, headers=headers)
 | 
				
			||||||
 | 
					        except Exception as e:
 | 
				
			||||||
 | 
					            print("!", end="", flush=True)
 | 
				
			||||||
 | 
					            continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_dl_links(url):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    print(url)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    r = request_timeout(url)
 | 
				
			||||||
 | 
					    soup = BeautifulSoup(r.text, "html.parser")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for a in soup.findAll("a"):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        href = a.get("href")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if href is not None and href.startswith("//dl"):
 | 
				
			||||||
 | 
					            with open("links.txt", "a") as f:
 | 
				
			||||||
 | 
					                f.write("https://www.dafont.com" + href + "\n")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_fonts():
 | 
				
			||||||
 | 
					    letters = list("abcdefghijklmnopqrstuvwxyz")
 | 
				
			||||||
 | 
					    letters.append("%23")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    page_links = []
 | 
				
			||||||
 | 
					    all_page_links = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for letter in letters:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        print(letter)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        r = request_timeout("https://www.dafont.com/alpha.php?lettre=" + letter)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        soup = BeautifulSoup(r.text, "html.parser")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for a in soup.findAll("a"):
 | 
				
			||||||
 | 
					            if a.get("href") is not None and a.get("href").find("&page=") != -1:
 | 
				
			||||||
 | 
					                page_links.append("https://" + a.get("href"))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        page_max = page_links[-2]
 | 
				
			||||||
 | 
					        page_max = int(page_max[page_max.rfind("=") + 1:])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        print(page_max)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for i in range(1, page_max+1):
 | 
				
			||||||
 | 
					            all_page_links.append("https://www.dafont.com/alpha.php?lettre=" + letter + "&page=" + str(i))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    pool = multiprocessing.Pool(processes=25)
 | 
				
			||||||
 | 
					    pool.map(get_dl_links, all_page_links)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def download_font(url):
 | 
				
			||||||
 | 
					    file_path = "fonts/" + url[url.rfind("/")+4:] + ".zip"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if os.path.exists(file_path):
 | 
				
			||||||
 | 
					        return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    print(file_path)
 | 
				
			||||||
 | 
					    r = requests.get(url, stream=True, headers=headers)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if r.status_code != 200:
 | 
				
			||||||
 | 
					        print(r.status_code)
 | 
				
			||||||
 | 
					        return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    with open(file_path, 'wb') as f:
 | 
				
			||||||
 | 
					        for chunk in r.iter_content(chunk_size=1024):
 | 
				
			||||||
 | 
					            if chunk:
 | 
				
			||||||
 | 
					                f.write(chunk)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def download_all():
 | 
				
			||||||
 | 
					    pool = multiprocessing.Pool(processes=25)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    with open("links.txt", "r") as f:
 | 
				
			||||||
 | 
					        pool.map(download_font, f.read().splitlines())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# get_fonts()
 | 
				
			||||||
 | 
					download_all()
 | 
				
			||||||
							
								
								
									
										65
									
								
								fontfabric.com/run.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										65
									
								
								fontfabric.com/run.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,65 @@
 | 
				
			|||||||
 | 
					import requests
 | 
				
			||||||
 | 
					from bs4 import BeautifulSoup
 | 
				
			||||||
 | 
					import multiprocessing
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					fonts = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def request_timeout(url):
 | 
				
			||||||
 | 
					    while True:
 | 
				
			||||||
 | 
					        try:
 | 
				
			||||||
 | 
					            return requests.get(url, timeout=30)
 | 
				
			||||||
 | 
					        except Exception as e:
 | 
				
			||||||
 | 
					            print("!", end="", flush=True)
 | 
				
			||||||
 | 
					            continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_fonts():
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for page in range(1, 4):
 | 
				
			||||||
 | 
					        r = request_timeout("http://www.fontfabric.com/category/free/page/" + str(page))
 | 
				
			||||||
 | 
					        soup = BeautifulSoup(r.text, "html.parser")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for link in soup.find("div", attrs={"class": "recent-leads fix"}).findAll("a"):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            href = link.get("href")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            if href is not None and href not in fonts and href.find("#") == -1 and href.find("category/") == -1:
 | 
				
			||||||
 | 
					                fonts.append(link.get("href"))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    print(len(fonts))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def download_font(url):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    r = request_timeout(url)
 | 
				
			||||||
 | 
					    soup = BeautifulSoup(r.text, "html.parser")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for a in soup.findAll("a"):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        onclick = a.get("onclick")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if onclick is not None and onclick.startswith("window.location"):
 | 
				
			||||||
 | 
					            dl_link = "http://www.fontfabric.com" + onclick[onclick.find("'")+1:onclick.rfind("'")]
 | 
				
			||||||
 | 
					            file_path = "fonts" + dl_link[dl_link.rfind("/"):]
 | 
				
			||||||
 | 
					            r_dl = requests.get(dl_link, stream=True, cookies=r.cookies)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            if r_dl.status_code != 200:
 | 
				
			||||||
 | 
					                print(r_dl.status_code)
 | 
				
			||||||
 | 
					                return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            print(file_path)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            with open(file_path, 'wb') as f:
 | 
				
			||||||
 | 
					                for chunk in r_dl.iter_content(chunk_size=1024):
 | 
				
			||||||
 | 
					                    if chunk:
 | 
				
			||||||
 | 
					                        f.write(chunk)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def download_all():
 | 
				
			||||||
 | 
					    pool = multiprocessing.Pool(processes=25)
 | 
				
			||||||
 | 
					    pool.map(download_font, fonts)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					get_fonts()
 | 
				
			||||||
 | 
					download_all()
 | 
				
			||||||
							
								
								
									
										7458
									
								
								fontfreak.com/fonts.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										7458
									
								
								fontfreak.com/fonts.txt
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										72
									
								
								fontfreak.com/run.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										72
									
								
								fontfreak.com/run.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,72 @@
 | 
				
			|||||||
 | 
					import requests
 | 
				
			||||||
 | 
					from bs4 import BeautifulSoup
 | 
				
			||||||
 | 
					import multiprocessing
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					fonts = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def request_timeout(url):
 | 
				
			||||||
 | 
					    while True:
 | 
				
			||||||
 | 
					        try:
 | 
				
			||||||
 | 
					            return requests.get(url, timeout=30)
 | 
				
			||||||
 | 
					        except Exception as e:
 | 
				
			||||||
 | 
					            print("!", end="", flush=True)
 | 
				
			||||||
 | 
					            continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_fonts():
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    letters = list("abcdefghijklmnopqrstuvwxyz")
 | 
				
			||||||
 | 
					    letters.append("no")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    pool = multiprocessing.Pool(processes=25)
 | 
				
			||||||
 | 
					    pool.map(get_dl_links, letters)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_dl_links(letter):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for page in range(1, 11):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        r = request_timeout("http://www.fontfreak.com/fonts-" + letter + str(page) + ".htm")
 | 
				
			||||||
 | 
					        soup = BeautifulSoup(r.text, "html.parser")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for a in soup.findAll("a"):
 | 
				
			||||||
 | 
					            if a.text is not None and a.text == "click here to download":
 | 
				
			||||||
 | 
					                with open("fonts.txt", "a") as f:
 | 
				
			||||||
 | 
					                    f.write("http://www.fontfreak.com/" + a.get("href") + "\n")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def download_font(url):
 | 
				
			||||||
 | 
					    r = request_timeout(url)
 | 
				
			||||||
 | 
					    soup = BeautifulSoup(r.text, "html.parser")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    dl_link = soup.find("a", attrs={"title": "DOWNLOAD FONT"})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if dl_link is not None:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        dl_url = "http://www.fontfreak.com/" + dl_link.get("href")
 | 
				
			||||||
 | 
					        file_path = "fonts/" + dl_url[dl_url.rfind("/")+1:]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if os.path.exists(file_path):
 | 
				
			||||||
 | 
					            return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        r = requests.get(dl_url, stream=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        print(file_path)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        with open(file_path, 'wb') as f:
 | 
				
			||||||
 | 
					            for chunk in r.iter_content(chunk_size=1024):
 | 
				
			||||||
 | 
					                if chunk:
 | 
				
			||||||
 | 
					                    f.write(chunk)
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        print("no dl" + url)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					get_fonts()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pool = multiprocessing.Pool(processes=25)
 | 
				
			||||||
 | 
					with open("fonts.txt", "r") as f:
 | 
				
			||||||
 | 
					    pool.map(download_font, f.read().splitlines())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										37570
									
								
								fontmeme.com/fonts.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										37570
									
								
								fontmeme.com/fonts.txt
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										0
									
								
								fontmeme.com/proxies.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								fontmeme.com/proxies.txt
									
									
									
									
									
										Normal file
									
								
							
							
								
								
									
										115
									
								
								fontmeme.com/run.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										115
									
								
								fontmeme.com/run.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,115 @@
 | 
				
			|||||||
 | 
					import requests
 | 
				
			||||||
 | 
					from bs4 import BeautifulSoup
 | 
				
			||||||
 | 
					import multiprocessing
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					proxy_index = 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					headers = {
 | 
				
			||||||
 | 
					    "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
 | 
				
			||||||
 | 
					    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
 | 
				
			||||||
 | 
					    "Accept-Language": "en-US,en;q=0.5",
 | 
				
			||||||
 | 
					    "Accept-Encoding": "gzip, deflate, br",
 | 
				
			||||||
 | 
					    "Upgrade-Insecure-Requests": "1"
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					proxies = {
 | 
				
			||||||
 | 
					    'https': '',
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def request_timeout(url):
 | 
				
			||||||
 | 
					    while True:
 | 
				
			||||||
 | 
					        try:
 | 
				
			||||||
 | 
					            return requests.get(url, timeout=30)
 | 
				
			||||||
 | 
					        except Exception as e:
 | 
				
			||||||
 | 
					            print("!", end="", flush=True)
 | 
				
			||||||
 | 
					            continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_fonts():
 | 
				
			||||||
 | 
					    for i in range(3758):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        print(i)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        r = request_timeout("https://fontmeme.com/fonts/page/" + str(i))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        soup = BeautifulSoup(r.text, "html.parser")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for div in soup.findAll("div"):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            if div.get("id") is not None and div.get("id") == "ptitle":
 | 
				
			||||||
 | 
					                for child in div.children:
 | 
				
			||||||
 | 
					                    if child.get("href") is not None:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                        with open("fonts.txt", "a") as f:
 | 
				
			||||||
 | 
					                            f.write(child.get("href") + '\n')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_new_proxy():
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    global proxy_index
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    with open("proxies.txt", "r") as f:
 | 
				
			||||||
 | 
					        line = f.read().splitlines()[proxy_index]
 | 
				
			||||||
 | 
					        proxies["https"] = line
 | 
				
			||||||
 | 
					        print("Switched to proxy " + line)
 | 
				
			||||||
 | 
					        proxy_index += 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def download_font(font_url):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    file_path = "fonts/" + font_url[font_url[:-1].rfind("/")+1:-6] + ".zip"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if os.path.exists(file_path):
 | 
				
			||||||
 | 
					        return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    r1 = request_timeout(font_url)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    dl_link_index = r1.text.find("https://fontmeme.com/fonts/download/")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if dl_link_index != -1:
 | 
				
			||||||
 | 
					        dl_link = r1.text[dl_link_index: r1.text.find("'", dl_link_index)]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        headers["Referer"] = font_url
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        try:
 | 
				
			||||||
 | 
					            r = requests.get(dl_link, stream=True, headers=headers, proxies=proxies, cookies=r1.cookies, timeout=10)
 | 
				
			||||||
 | 
					        except:
 | 
				
			||||||
 | 
					            get_new_proxy()
 | 
				
			||||||
 | 
					            return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if r.status_code != 200:
 | 
				
			||||||
 | 
					            print(r.status_code)
 | 
				
			||||||
 | 
					            return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        reached_limit = False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        with open(file_path, 'wb') as f:
 | 
				
			||||||
 | 
					            for chunk in r.iter_content(chunk_size=1024):
 | 
				
			||||||
 | 
					                if chunk:
 | 
				
			||||||
 | 
					                    f.write(chunk)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        with open(file_path, "rb") as f:
 | 
				
			||||||
 | 
					            if f.read().find(b"PK") != 0:
 | 
				
			||||||
 | 
					                reached_limit = True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if reached_limit:
 | 
				
			||||||
 | 
					            os.remove(file_path)
 | 
				
			||||||
 | 
					            print("You have reached the maximum permitted downloads")
 | 
				
			||||||
 | 
					            get_new_proxy()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def download_all():
 | 
				
			||||||
 | 
					    pool = multiprocessing.Pool(processes=100)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    with open("fonts.txt", "r") as f:
 | 
				
			||||||
 | 
					        pool.map(download_font, f.read().splitlines())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# get_fonts()
 | 
				
			||||||
 | 
					# get_new_proxy()
 | 
				
			||||||
 | 
					download_all()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										27390
									
								
								fontspace.com/fonts.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										27390
									
								
								fontspace.com/fonts.txt
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										111
									
								
								fontspace.com/run.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										111
									
								
								fontspace.com/run.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,111 @@
 | 
				
			|||||||
 | 
					import requests
 | 
				
			||||||
 | 
					from bs4 import BeautifulSoup
 | 
				
			||||||
 | 
					import multiprocessing
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					from urllib.parse import urljoin
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					headers = {
 | 
				
			||||||
 | 
					    "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
 | 
				
			||||||
 | 
					    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
 | 
				
			||||||
 | 
					    "Accept-Language": "en-US,en;q=0.5",
 | 
				
			||||||
 | 
					    "Accept-Encoding": "gzip, deflate, br",
 | 
				
			||||||
 | 
					    "Upgrade-Insecure-Requests": "1",
 | 
				
			||||||
 | 
					    "Referer": "http://www.fontspace.com"
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def request_timeout(url):
 | 
				
			||||||
 | 
					    while True:
 | 
				
			||||||
 | 
					        try:
 | 
				
			||||||
 | 
					            return requests.get(url, timeout=30)
 | 
				
			||||||
 | 
					        except Exception as e:
 | 
				
			||||||
 | 
					            print("!", end="", flush=True)
 | 
				
			||||||
 | 
					            continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_dl_links(page_url):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    print(page_url)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    r_page = request_timeout(page_url)
 | 
				
			||||||
 | 
					    soup_page = BeautifulSoup(r_page.text, "html.parser")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for dl_link in soup_page.findAll("a", attrs={"class": "box-button transparent"}):
 | 
				
			||||||
 | 
					        with open("fonts.txt", "a") as f:
 | 
				
			||||||
 | 
					            f.write(dl_link.get("href") + "\n")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_fonts():
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    lists = list("abcdefghijklmnopqrstuvwxyz")
 | 
				
			||||||
 | 
					    lists.append("letter")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    page_links = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for page in lists:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        print(page)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        r = request_timeout("http://www.fontspace.com/list/" + page)
 | 
				
			||||||
 | 
					        soup = BeautifulSoup(r.text, "html.parser")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for a in soup.findAll("a"):
 | 
				
			||||||
 | 
					            if a.get("href") is not None and a.get("href").find("?p=") != -1:
 | 
				
			||||||
 | 
					                page_links.append(a.get("href"))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        page_max = page_links[-2]
 | 
				
			||||||
 | 
					        page_max = int(page_max[page_max.rfind("=") + 1:])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        print(page_max)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for i in range(1, page_max):
 | 
				
			||||||
 | 
					            page_links.append("http://www.fontspace.com/list/" + page + "?p=" + str(i))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    pool = multiprocessing.Pool(processes=25)
 | 
				
			||||||
 | 
					    pool.map(get_dl_links, page_links)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def download_font(dl_url):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    full_url = urljoin("http://www.fontspace.com", dl_url)
 | 
				
			||||||
 | 
					    file_path = "fonts" + full_url[full_url.rfind("/"):]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if os.path.exists(file_path):
 | 
				
			||||||
 | 
					        return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    print(file_path)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    r = requests.get(full_url, stream=True, headers=headers, cookies=cookies)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if r.status_code != 200:
 | 
				
			||||||
 | 
					        print(r.status_code)
 | 
				
			||||||
 | 
					        return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    with open(file_path, 'wb') as f:
 | 
				
			||||||
 | 
					        for chunk in r.iter_content(chunk_size=1024):
 | 
				
			||||||
 | 
					            if chunk:
 | 
				
			||||||
 | 
					                f.write(chunk)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_cookie():
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    r = request_timeout("http://www.fontspace.com/list/a?text=&p=2")
 | 
				
			||||||
 | 
					    return r.cookies
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def download_all(cookies):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    pool = multiprocessing.Pool(processes=25)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    with open("fonts.txt", "r") as f:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        pool.map(download_font, f.read().splitlines())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# get_fonts()
 | 
				
			||||||
 | 
					cookies = get_cookie()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					download_all(cookies)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										47830
									
								
								fontstruct.com/fonts.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										47830
									
								
								fontstruct.com/fonts.txt
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										142
									
								
								fontstruct.com/run.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										142
									
								
								fontstruct.com/run.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,142 @@
 | 
				
			|||||||
 | 
					import requests
 | 
				
			||||||
 | 
					from bs4 import BeautifulSoup
 | 
				
			||||||
 | 
					import multiprocessing
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					username = ""
 | 
				
			||||||
 | 
					password = ""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					headers = {
 | 
				
			||||||
 | 
					    "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
 | 
				
			||||||
 | 
					    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
 | 
				
			||||||
 | 
					    "Accept-Language": "en-US,en;q=0.5",
 | 
				
			||||||
 | 
					    "Accept-Encoding": "gzip, deflate, br",
 | 
				
			||||||
 | 
					    "Upgrade-Insecure-Requests": "1",
 | 
				
			||||||
 | 
					    "Referer": "https://fontstruct.com/",
 | 
				
			||||||
 | 
					    "Connection": "keep-alive"
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					font_ids = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def request_timeout(url):
 | 
				
			||||||
 | 
					    while True:
 | 
				
			||||||
 | 
					        try:
 | 
				
			||||||
 | 
					            return requests.get(url, timeout=30, headers=headers)
 | 
				
			||||||
 | 
					        except Exception as e:
 | 
				
			||||||
 | 
					            print("!", end="", flush=True)
 | 
				
			||||||
 | 
					            continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def login():
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    r1 = request_timeout("https://fontstruct.com/login")
 | 
				
			||||||
 | 
					    print(r1.cookies)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    login_headers = {
 | 
				
			||||||
 | 
					        "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
 | 
				
			||||||
 | 
					        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
 | 
				
			||||||
 | 
					        "Accept-Language": "en-US,en;q=0.5",
 | 
				
			||||||
 | 
					        "Accept-Encoding": "gzip, deflate, br",
 | 
				
			||||||
 | 
					        "Upgrade-Insecure-Requests": "1",
 | 
				
			||||||
 | 
					        "Referer": "https://fontstruct.com/login",
 | 
				
			||||||
 | 
					        "Connection": "keep-alive"
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    payload = {"_username": username, "_password": password, "_csrf_token": "", "_submit": "Sign+In"}
 | 
				
			||||||
 | 
					    r = requests.post("https://fontstruct.com/login_check", headers=login_headers, data=payload, cookies=r1.cookies)
 | 
				
			||||||
 | 
					    print(r.cookies)
 | 
				
			||||||
 | 
					    print(len(r.text))
 | 
				
			||||||
 | 
					    print(r.headers)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return r.history[0]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_font_ids(page_url):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    print(page_url)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    r = request_timeout(page_url)
 | 
				
			||||||
 | 
					    soup = BeautifulSoup(r.text, "html.parser")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for a in soup.findAll("a"):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        href = a.get("href")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if href is not None and href.startswith("/fontstructions") and href.find("/license/") == -1 and\
 | 
				
			||||||
 | 
					                        href.find("/vote_breakdown/") == -1:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            font_id = href[href.find("show/")+5:href.rfind("/")]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            if font_id not in font_ids:
 | 
				
			||||||
 | 
					                font_ids.append(font_id)
 | 
				
			||||||
 | 
					                with open("fonts.txt", "a") as f:
 | 
				
			||||||
 | 
					                    f.write(font_id + "\n")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_fonts():
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    page_urls = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for page_num in range(1, 1428):
 | 
				
			||||||
 | 
					        page_urls.append("https://fontstruct.com/gallery?filters=all&page=" + str(page_num))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    pool = multiprocessing.Pool(processes=25)
 | 
				
			||||||
 | 
					    pool.map(get_font_ids, page_urls)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def download_font(font_id):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    dl_headers = {
 | 
				
			||||||
 | 
					        "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
 | 
				
			||||||
 | 
					        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
 | 
				
			||||||
 | 
					        "Accept-Language": "en-US,en;q=0.5",
 | 
				
			||||||
 | 
					        "Accept-Encoding": "gzip, deflate, br",
 | 
				
			||||||
 | 
					        "Upgrade-Insecure-Requests": "1",
 | 
				
			||||||
 | 
					        "Referer": "https://fontstruct.com/fontstructions/download/" + font_id,
 | 
				
			||||||
 | 
					        "Connection": "keep-alive"
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    dl_url = "https://fontstruct.com/font_archives/download/" + font_id
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    while True:
 | 
				
			||||||
 | 
					        r = requests.get(dl_url, stream=True, headers=dl_headers, cookies=cookies)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if r.status_code == 403:
 | 
				
			||||||
 | 
					            return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if r.status_code == 500:
 | 
				
			||||||
 | 
					            continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if "Content-Disposition" not in r.headers:
 | 
				
			||||||
 | 
					            print(r.text)
 | 
				
			||||||
 | 
					            return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        file_path = "fonts/" + r.headers["Content-Disposition"][r.headers["Content-Disposition"].rfind("'") + 1:]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if os.path.exists(file_path):
 | 
				
			||||||
 | 
					            return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        print(file_path)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        with open(file_path, 'wb') as f:
 | 
				
			||||||
 | 
					            for chunk in r.iter_content(chunk_size=1024):
 | 
				
			||||||
 | 
					                if chunk:
 | 
				
			||||||
 | 
					                    f.write(chunk)
 | 
				
			||||||
 | 
					        return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def download_all():
 | 
				
			||||||
 | 
					    pool = multiprocessing.Pool(processes=25)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    with open("fonts.txt", "r") as f:
 | 
				
			||||||
 | 
					        pool.map(download_font, f.read().splitlines())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cookies = login().cookies
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# get_fonts()
 | 
				
			||||||
 | 
					download_all()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										
											BIN
										
									
								
								lynda.com/courses.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								lynda.com/courses.txt
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										73
									
								
								lynda.com/crawler-courses.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										73
									
								
								lynda.com/crawler-courses.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,73 @@
 | 
				
			|||||||
 | 
					import requests
 | 
				
			||||||
 | 
					import bs4
 | 
				
			||||||
 | 
					import json
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					URL = "https://lynda.com"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def request_timeout(url):
 | 
				
			||||||
 | 
					    while True:
 | 
				
			||||||
 | 
					        try:
 | 
				
			||||||
 | 
					            return requests.get(url, timeout=30)
 | 
				
			||||||
 | 
					        except:
 | 
				
			||||||
 | 
					            print("!", end="", flush=True)
 | 
				
			||||||
 | 
					            continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_categories():
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    categories = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    r = requests.get(URL)
 | 
				
			||||||
 | 
					    soup = bs4.BeautifulSoup(r.text, "html.parser")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for i in soup.find_all("i"):
 | 
				
			||||||
 | 
					        if i.get("class") is not None and len(i.get("class")) > 1 and "cat-" in i.get("class")[1]:
 | 
				
			||||||
 | 
					            category_id = i.get("class")[1][4:]
 | 
				
			||||||
 | 
					            category_name = i.get("title")[:i.get("title").find("-") - 1]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            categories.append((category_name, category_id))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return categories
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_courses(category):
 | 
				
			||||||
 | 
					    last_len = 0
 | 
				
			||||||
 | 
					    courses = []
 | 
				
			||||||
 | 
					    page = 0
 | 
				
			||||||
 | 
					    while True:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        page += 1
 | 
				
			||||||
 | 
					        r = request_timeout("https://lynda.com/ajax/category/" + category[1] + "/courses?page=" + str(page))
 | 
				
			||||||
 | 
					        soup = bs4.BeautifulSoup(json.loads(r.text)["html"], "html.parser")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for link in soup.find_all("a"):
 | 
				
			||||||
 | 
					            if link.get("href") is not None:
 | 
				
			||||||
 | 
					                if link.find("h3") is not None:
 | 
				
			||||||
 | 
					                    course_link = link.get("href")
 | 
				
			||||||
 | 
					                    course_name = link.find("h3").string
 | 
				
			||||||
 | 
					                    course_id = course_link.split("/")[-1:][0][:-7]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    courses.append((course_name, course_id, course_link))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        print("Page " + str(page) + " (" + str(len(courses)) + ")")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if last_len == len(courses):
 | 
				
			||||||
 | 
					            break
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        last_len = len(courses)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return courses
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					file = open("courses.txt", "w")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					for category in get_categories():
 | 
				
			||||||
 | 
					    print(category)
 | 
				
			||||||
 | 
					    for course in get_courses(category):
 | 
				
			||||||
 | 
					        print(course[0])
 | 
				
			||||||
 | 
					        file.write(category[1] + "\0" + course[0] + "\0" + course[1] + "\0" + course[2] + "\n")
 | 
				
			||||||
 | 
					    file.flush()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					file.close()
 | 
				
			||||||
							
								
								
									
										11
									
								
								lynda.com/tmp.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								lynda.com/tmp.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,11 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					terms = ["data science", "big data", "hadoop", "python", "data mining", "text mining", "deep learning", "blender",
 | 
				
			||||||
 | 
					         "unity", "zbrush", "substance"]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					for line in open("courses.txt"):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    category, name, course_id, url = line.split("\0")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for term in terms:
 | 
				
			||||||
 | 
					        if term in name.lower():
 | 
				
			||||||
 | 
					            print(url[:-1])
 | 
				
			||||||
							
								
								
									
										79
									
								
								models-resource.com/crawler.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										79
									
								
								models-resource.com/crawler.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,79 @@
 | 
				
			|||||||
 | 
					import requests
 | 
				
			||||||
 | 
					import bs4
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					headers = {
 | 
				
			||||||
 | 
					    'User-Agent': "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
 | 
				
			||||||
 | 
					    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
 | 
				
			||||||
 | 
					    "Accept-Encoding": "gzip, deflate, br",
 | 
				
			||||||
 | 
					    "Accept-Language": "en-US,en;q=0.5",
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					URL = "https://www.models-resource.com"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_consoles():
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    consoles = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    response = requests.get(URL)
 | 
				
			||||||
 | 
					    soup = bs4.BeautifulSoup(response.text, "html.parser")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for console in soup.find(id="leftnav-consoles"):
 | 
				
			||||||
 | 
					        if type(console) == bs4.element.Tag and console.get("href") is not None:
 | 
				
			||||||
 | 
					            consoles.append((console.text, URL + console.get("href")))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return consoles
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_games(console, letter):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    games = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    print(console[0] + " - " + letter)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    print(console[1] + letter + ".html")
 | 
				
			||||||
 | 
					    response = requests.get(console[1] + letter + ".html")
 | 
				
			||||||
 | 
					    soup = bs4.BeautifulSoup(response.text, "html.parser")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for link in soup.find_all("a"):
 | 
				
			||||||
 | 
					        for child in link.findChildren():
 | 
				
			||||||
 | 
					            if child.get("class") is not None and child.get("class") == ['gameiconcontainer']:
 | 
				
			||||||
 | 
					                game_name = child.find("div").find("span").string
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                games.append((game_name, URL + link.get("href")))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return games
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_models(game):
 | 
				
			||||||
 | 
					    models = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    response = requests.get(game[1])
 | 
				
			||||||
 | 
					    soup = bs4.BeautifulSoup(response.text, "html.parser")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for link in soup.find_all("a"):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for div in link.find_all("div"):
 | 
				
			||||||
 | 
					            if div.get("class") == ["iconcontainer"]:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                model_url = div.find("div", attrs={"class": "iconbody"}).find("img").get("src").replace("sheet_icons",
 | 
				
			||||||
 | 
					                                                                                                        "big_icons")
 | 
				
			||||||
 | 
					                model_id = model_url.split("/")[4][:-4]
 | 
				
			||||||
 | 
					                model_download = "https://www.models-resource.com/download/" + model_id + "/"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                model_name = div.find("div").find("span").string
 | 
				
			||||||
 | 
					                models.append((model_name, URL + model_url, model_download))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return models
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					file = open("links.txt", "w")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					for console in get_consoles():
 | 
				
			||||||
 | 
					    for letter in "0ABCDEFGHIJKLMNOPQRSTUVWXYZ":
 | 
				
			||||||
 | 
					        for game in get_games(console, letter):
 | 
				
			||||||
 | 
					            for model in get_models(game):
 | 
				
			||||||
 | 
					                file.write(console[0] + os.sep + game[0] + os.sep + model[0] + os.sep + "\0" + model[1] + "\0" +
 | 
				
			||||||
 | 
					                           model[2] + "\n")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					file.close()
 | 
				
			||||||
							
								
								
									
										39
									
								
								models-resource.com/downloader.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										39
									
								
								models-resource.com/downloader.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,39 @@
 | 
				
			|||||||
 | 
					import pathlib
 | 
				
			||||||
 | 
					import requests
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					file = open("links.txt", "r")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					for line in file.read().splitlines():
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    path, preview, link = line.split("\0")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if os.path.isfile("models/" + path + "preview.png"):
 | 
				
			||||||
 | 
					        continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    print("models/" + path)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    pathlib.Path("models/" + path).mkdir(parents=True, exist_ok=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    while True:
 | 
				
			||||||
 | 
					        try:
 | 
				
			||||||
 | 
					            response = requests.get(preview, stream=True, timeout=5)
 | 
				
			||||||
 | 
					            with open("models/" + path + "preview.png", 'wb') as f:
 | 
				
			||||||
 | 
					                for chunk in response.iter_content(chunk_size=1024):
 | 
				
			||||||
 | 
					                    if chunk:
 | 
				
			||||||
 | 
					                        f.write(chunk)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            response2 = requests.get(link, stream=True, timeout=5)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            file_extension = os.path.splitext(response2.headers["Content-Disposition"])[1][:-2]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            with open("models/" + path + path.split("/")[-2:-1][0] + file_extension, 'wb') as f:
 | 
				
			||||||
 | 
					                for chunk in response2.iter_content(chunk_size=1024):
 | 
				
			||||||
 | 
					                    if chunk:
 | 
				
			||||||
 | 
					                        f.write(chunk)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            break
 | 
				
			||||||
 | 
					        except:
 | 
				
			||||||
 | 
					            print("!", end="", flush=True)
 | 
				
			||||||
 | 
					            continue
 | 
				
			||||||
							
								
								
									
										
											BIN
										
									
								
								models-resource.com/links.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								models-resource.com/links.txt
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										1
									
								
								poliigon.com/counter
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								poliigon.com/counter
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1 @@
 | 
				
			|||||||
 | 
					1
 | 
				
			||||||
							
								
								
									
										163
									
								
								poliigon.com/ripper.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										163
									
								
								poliigon.com/ripper.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,163 @@
 | 
				
			|||||||
 | 
					import random
 | 
				
			||||||
 | 
					import string
 | 
				
			||||||
 | 
					import requests
 | 
				
			||||||
 | 
					import time
 | 
				
			||||||
 | 
					from guerrillamail import GuerrillaMailSession
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					proxies = {
 | 
				
			||||||
 | 
					    # 'https': 'https://##.##.##.##:##',
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					headers = {
 | 
				
			||||||
 | 
					    'User-Agent': "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
 | 
				
			||||||
 | 
					    "Referer": "https://www.poliigon.com/register",
 | 
				
			||||||
 | 
					    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def read_email(email):
 | 
				
			||||||
 | 
					    s = GuerrillaMailSession()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    s.set_email_address(email)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    print(s.get_session_state())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for email in s.get_email_list():
 | 
				
			||||||
 | 
					        if email.subject == "Poliigon: Email Verification":
 | 
				
			||||||
 | 
					            print("Got email")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            body = s.get_email(s.get_email_list()[0].guid).body
 | 
				
			||||||
 | 
					            link = body[body.index("https://www.poliigon.com"):body.index("https://www.poliigon.com") + 71]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            return link
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def download_file(url, cookies):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    r = requests.get(url, stream=True, headers=headers, proxies=proxies, cookies=cookies)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if "X-Sendfile" in r.headers:
 | 
				
			||||||
 | 
					        local_filename = r.headers["X-Sendfile"].split('/')[-1]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        print(local_filename + "...")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        with open(local_filename, 'wb') as f:
 | 
				
			||||||
 | 
					            for chunk in r.iter_content(chunk_size=1024):
 | 
				
			||||||
 | 
					                if chunk:
 | 
				
			||||||
 | 
					                    f.write(chunk)
 | 
				
			||||||
 | 
					        return False
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        print("Error")
 | 
				
			||||||
 | 
					        return True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def rand_string():
 | 
				
			||||||
 | 
					    return ''.join(random.choice(string.ascii_lowercase) for _ in range(12))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_next_num():
 | 
				
			||||||
 | 
					    counter_file = open("counter")
 | 
				
			||||||
 | 
					    counter = int(counter_file.read())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    counter_file.close()
 | 
				
			||||||
 | 
					    counter_file = open("counter", 'w')
 | 
				
			||||||
 | 
					    counter_file.write(str(counter + 1))
 | 
				
			||||||
 | 
					    return counter
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def decrement():
 | 
				
			||||||
 | 
					    counter_file = open("counter")
 | 
				
			||||||
 | 
					    counter = int(counter_file.read())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    counter_file.close()
 | 
				
			||||||
 | 
					    counter_file = open("counter", 'w')
 | 
				
			||||||
 | 
					    counter_file.write(str(counter - 1))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def login(email, password):
 | 
				
			||||||
 | 
					    r_login_token = requests.get("https://www.poliigon.com/login", headers=headers, proxies=proxies)
 | 
				
			||||||
 | 
					    token = r_login_token.text[r_login_token.text.index("<input name=\"_token\" type=\"hidden\" value=\"") + 42:
 | 
				
			||||||
 | 
					    r_login_token.text.index("<input name=\"_token\" type=\"hidden\" value=\"") + 82]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Login
 | 
				
			||||||
 | 
					    payload = {"_token": token, "email": email, "password": password}
 | 
				
			||||||
 | 
					    r_login = requests.post("https://www.poliigon.com/login", headers=headers, proxies=proxies, data=payload,
 | 
				
			||||||
 | 
					                            cookies=r_login_token.cookies)
 | 
				
			||||||
 | 
					    return r_login
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def create_account_and_login():
 | 
				
			||||||
 | 
					    email = rand_string() + "@sharklasers.com"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    print("email is " + email)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    f_name = rand_string()
 | 
				
			||||||
 | 
					    l_name = rand_string()
 | 
				
			||||||
 | 
					    password = rand_string()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    print("Password is " + password)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Get Cookie
 | 
				
			||||||
 | 
					    r = requests.get("https://www.poliigon.com/register", headers=headers, proxies=proxies)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    session_cookie = r.cookies['laravel_session']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    print("Got cookie: " + session_cookie)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    body = r.text
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Get token
 | 
				
			||||||
 | 
					    token = body[body.index("<input name=\"_token\" type=\"hidden\" value=\"") + 42:
 | 
				
			||||||
 | 
					    body.index("<input name=\"_token\" type=\"hidden\" value=\"")+82]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    print("Got token: " + token + " " + str(len(token)))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Register
 | 
				
			||||||
 | 
					    payload = {"_token": token, "first_name": f_name, "last_name": l_name, "email": email,
 | 
				
			||||||
 | 
					               "email_confirmation": email, "password": password, "password_confirmation": password}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    r2 = requests.post("https://www.poliigon.com/register", headers=headers, data=payload,
 | 
				
			||||||
 | 
					                  cookies=r.cookies, proxies=proxies)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # verify
 | 
				
			||||||
 | 
					    r3 = requests.get("https://www.poliigon.com/verify", headers=headers, proxies=proxies, cookies=r.cookies)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if r2.text != "Error in exception handler.":
 | 
				
			||||||
 | 
					        print("Sucessful register")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        time.sleep(35)
 | 
				
			||||||
 | 
					        counter = 5
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        while counter > 0:
 | 
				
			||||||
 | 
					            counter -= 1
 | 
				
			||||||
 | 
					            link = read_email(email)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            if link is None:
 | 
				
			||||||
 | 
					                time.sleep(5)
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
 | 
					                break
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if "https" in link:
 | 
				
			||||||
 | 
					            # Verify email
 | 
				
			||||||
 | 
					            print("Verifying " + link)
 | 
				
			||||||
 | 
					            print(requests.get(link, headers=headers, proxies=proxies, cookies=r.cookies))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Email verified, now login
 | 
				
			||||||
 | 
					        return login(email, password)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        print(r2.text)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					while True:
 | 
				
			||||||
 | 
					    rLogin = create_account_and_login()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    error = False
 | 
				
			||||||
 | 
					    while not error:
 | 
				
			||||||
 | 
					        error = download_file("https://www.poliigon.com/multiple_download/" + str(get_next_num()) + "/1K",
 | 
				
			||||||
 | 
					                              rLogin.cookies)
 | 
				
			||||||
 | 
					        if error:
 | 
				
			||||||
 | 
					            decrement()
 | 
				
			||||||
							
								
								
									
										112
									
								
								software.intel.com/run.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										112
									
								
								software.intel.com/run.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,112 @@
 | 
				
			|||||||
 | 
					import requests
 | 
				
			||||||
 | 
					from bs4 import BeautifulSoup
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					import pdfkit
 | 
				
			||||||
 | 
					from urllib.parse import urljoin
 | 
				
			||||||
 | 
					import youtube_dl
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					articles = []
 | 
				
			||||||
 | 
					videos = []
 | 
				
			||||||
 | 
					kits = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_articles():
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for page in range(0, 10):
 | 
				
			||||||
 | 
					        r = requests.get("https://software.intel.com/en-us/ai-academy/library?page=" + str(page))
 | 
				
			||||||
 | 
					        soup = BeautifulSoup(r.text, "html.parser")
 | 
				
			||||||
 | 
					        for link in soup.find_all("a"):
 | 
				
			||||||
 | 
					            if link.get("href") is not None and link.get("href").startswith("/en-us/articles/"):
 | 
				
			||||||
 | 
					                if link.string is not None:
 | 
				
			||||||
 | 
					                    articles.append((link.get("href"), link.string))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            if link.get("href") is not None and link.get("href").startswith("/en-us/videos/"):
 | 
				
			||||||
 | 
					                if link.string is not None:
 | 
				
			||||||
 | 
					                    videos.append((link.get("href"), link.string))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        print(str(len(articles)) + " articles")
 | 
				
			||||||
 | 
					        print(str(len(videos)) + " videos")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_kits():
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    r = requests.get("https://software.intel.com/en-us/ai-academy/students/kits")
 | 
				
			||||||
 | 
					    soup = BeautifulSoup(r.text, "html.parser")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for link in soup.find_all("a"):
 | 
				
			||||||
 | 
					        if link.string is not None and link.string == "Get Started":
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            kits.append(link.get("href"))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def download_article(article):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if not os.path.exists("articles"):
 | 
				
			||||||
 | 
					        os.mkdir("articles")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if not os.path.isfile("articles/" + article[1] + ".pdf"):
 | 
				
			||||||
 | 
					        pdfkit.from_url(urljoin("https://software.intel.com/", article[0]), "articles/" + article[1] + ".pdf")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def download_video(video):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if not os.path.exists("videos"):
 | 
				
			||||||
 | 
					        os.mkdir("videos")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    options = {"outtmpl": "videos/%(title)s.%(ext)s"}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    ytd = youtube_dl.YoutubeDL(options)
 | 
				
			||||||
 | 
					    ytd.download([urljoin("https://software.intel.com/", video[0])])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def download_file(url, destination):
 | 
				
			||||||
 | 
					    while True:
 | 
				
			||||||
 | 
					        try:
 | 
				
			||||||
 | 
					            response = requests.get(url, stream=True, timeout=10)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            if not os.path.exists(destination) and response.status_code == 200:
 | 
				
			||||||
 | 
					                with open(destination, 'wb') as f:
 | 
				
			||||||
 | 
					                    for chunk in response.iter_content(chunk_size=1024):
 | 
				
			||||||
 | 
					                        if chunk:
 | 
				
			||||||
 | 
					                            f.write(chunk)
 | 
				
			||||||
 | 
					            break
 | 
				
			||||||
 | 
					        except:
 | 
				
			||||||
 | 
					            print("!")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def download_kit(kit_url):
 | 
				
			||||||
 | 
					    if not os.path.exists("kits"):
 | 
				
			||||||
 | 
					        os.mkdir("kits")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    kit_url = urljoin("https://software.intel.com/", kit_url)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    r = requests.get(kit_url)
 | 
				
			||||||
 | 
					    soup = BeautifulSoup(r.text, "html.parser")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    kit_title = soup.find("title").string
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if not os.path.exists("kits/" + kit_title):
 | 
				
			||||||
 | 
					        os.mkdir("kits/" + kit_title)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    pdfkit.from_url(kit_url, "kits/" + kit_title + "/kit.pdf")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for link in soup.find_all("a"):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        target = link.get("href")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if target is not None and target.endswith(".zip"):
 | 
				
			||||||
 | 
					            download_file(urljoin("https://software.intel.com/", target), "kits/" + kit_title + "/" + os.path.split(target)[1])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# get_articles()
 | 
				
			||||||
 | 
					get_kits()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					for k in kits:
 | 
				
			||||||
 | 
					    download_kit(k)
 | 
				
			||||||
 | 
					#
 | 
				
			||||||
 | 
					# for a in articles:
 | 
				
			||||||
 | 
					#     download_article(a)
 | 
				
			||||||
 | 
					#
 | 
				
			||||||
 | 
					# for v in videos:
 | 
				
			||||||
 | 
					#     download_video(v)
 | 
				
			||||||
							
								
								
									
										34
									
								
								sounds-resource.com/downloader.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										34
									
								
								sounds-resource.com/downloader.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,34 @@
 | 
				
			|||||||
 | 
					import pathlib
 | 
				
			||||||
 | 
					import requests
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					file = open("links.txt", "r")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					for line in file.read().splitlines():
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    path, link = line.split("\0")
 | 
				
			||||||
 | 
					    pathlib.Path("sounds/" + path.strip()).mkdir(parents=True, exist_ok=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # if os.path.exists("sounds/" + path + "/" + path.split("/")[-2:-1][0] + ".zip") or \
 | 
				
			||||||
 | 
					    #     os.path.exists("sounds/" + path + "/" + path.split("/")[-2:-1][0] + ".mp3"):
 | 
				
			||||||
 | 
					    #     continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    print("sounds/" + path)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    while True:
 | 
				
			||||||
 | 
					        # try:
 | 
				
			||||||
 | 
					            response = requests.get(link, stream=True, timeout=5)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            file_extension = os.path.splitext(response.headers["Content-Disposition"])[1][:-2]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            with open("sounds/" + path + path.split("/")[-2:-1][0] + file_extension, 'wb') as f:
 | 
				
			||||||
 | 
					                for chunk in response.iter_content(chunk_size=1024):
 | 
				
			||||||
 | 
					                    if chunk:
 | 
				
			||||||
 | 
					                        f.write(chunk)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            break
 | 
				
			||||||
 | 
					        # except:
 | 
				
			||||||
 | 
					        #     print("!", end="", flush=True)
 | 
				
			||||||
 | 
					        #     continue
 | 
				
			||||||
							
								
								
									
										
											BIN
										
									
								
								sounds-resource.com/links.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								sounds-resource.com/links.txt
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										79
									
								
								sounds-resource.com/sound_crawler.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										79
									
								
								sounds-resource.com/sound_crawler.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,79 @@
 | 
				
			|||||||
 | 
					import requests
 | 
				
			||||||
 | 
					import bs4
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					headers = {
 | 
				
			||||||
 | 
					    'User-Agent': "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
 | 
				
			||||||
 | 
					    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
 | 
				
			||||||
 | 
					    "Accept-Encoding": "gzip, deflate, br",
 | 
				
			||||||
 | 
					    "Accept-Language": "en-US,en;q=0.5",
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					URL = "https://www.sounds-resource.com"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_consoles():
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    consoles = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    response = requests.get(URL)
 | 
				
			||||||
 | 
					    soup = bs4.BeautifulSoup(response.text, "html.parser")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for console in soup.find(id="leftnav-consoles"):
 | 
				
			||||||
 | 
					        if type(console) == bs4.element.Tag and console.get("href") is not None:
 | 
				
			||||||
 | 
					            consoles.append((console.text, URL + console.get("href")))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return consoles
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_games(console, letter):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    games = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    print(console[0] + " - " + letter)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    print(console[1] + letter + ".html")
 | 
				
			||||||
 | 
					    response = requests.get(console[1] + letter + ".html")
 | 
				
			||||||
 | 
					    soup = bs4.BeautifulSoup(response.text, "html.parser")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for link in soup.find_all("a"):
 | 
				
			||||||
 | 
					        for child in link.findChildren():
 | 
				
			||||||
 | 
					            if child.get("class") is not None and child.get("class") == ['gameiconcontainer']:
 | 
				
			||||||
 | 
					                game_name = child.find("div").find("span").string
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                games.append((game_name, URL + link.get("href")))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return games
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_sounds(game):
 | 
				
			||||||
 | 
					    sounds = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    response = requests.get(game[1])
 | 
				
			||||||
 | 
					    soup = bs4.BeautifulSoup(response.text, "html.parser")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for row in soup.find_all("tr"):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if row.get("class") is not None and "altrow" in row.get("class")[0]:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            for child in row.children:
 | 
				
			||||||
 | 
					                if child is not None and isinstance(child, bs4.Tag) and child.get("style") == "padding-left: 10px;":
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    sound_name = child.string
 | 
				
			||||||
 | 
					                    sound_url = child.find("a").get("href")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    sound_dl = "https://www.sounds-resource.com/download/" + sound_url.split("/")[-2:-1][0] + "/"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    sounds.append((sound_name, sound_dl))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return sounds
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					file = open("links.txt", "w")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					for console in get_consoles():
 | 
				
			||||||
 | 
					    for letter in "0ABCDEFGHIJKLMNOPQRSTUVWXYZ":
 | 
				
			||||||
 | 
					        for game in get_games(console, letter):
 | 
				
			||||||
 | 
					            for sound in get_sounds(game):
 | 
				
			||||||
 | 
					                file.write(console[0] + os.sep + game[0] + os.sep + sound[0] + os.sep + "\0" + sound[1] + "\n")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					file.close()
 | 
				
			||||||
							
								
								
									
										183
									
								
								spritedatabase.net/ripper.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										183
									
								
								spritedatabase.net/ripper.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,183 @@
 | 
				
			|||||||
 | 
					import requests
 | 
				
			||||||
 | 
					from bs4 import BeautifulSoup
 | 
				
			||||||
 | 
					import re
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					import mimetypes
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					headers = {
 | 
				
			||||||
 | 
					    'User-Agent': "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
 | 
				
			||||||
 | 
					    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_systems():
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    systems = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    response = requests.get("http://spritedatabase.net/", headers=headers)
 | 
				
			||||||
 | 
					    soup = BeautifulSoup(response.text, 'html.parser')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    links = soup.find_all("a")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for link in links:
 | 
				
			||||||
 | 
					        if "system" in link.get('href'):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            systems.append((link.text.strip(), "http://spritedatabase.net/" + link.get('href')))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return systems
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_games(system):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    games = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    response = requests.get(system[1], headers=headers)
 | 
				
			||||||
 | 
					    soup = BeautifulSoup(response.text, 'html.parser')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    links = soup.find_all("a")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for link in links:
 | 
				
			||||||
 | 
					        if link.get('href') is not None and "game/" in link.get('href'):
 | 
				
			||||||
 | 
					            games.append((link.text.strip().replace("/", ""), "http://spritedatabase.net/" + link.get('href')))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return games
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_sprites(game):
 | 
				
			||||||
 | 
					    print(game[0])
 | 
				
			||||||
 | 
					    sprites = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    while True:
 | 
				
			||||||
 | 
					        try:
 | 
				
			||||||
 | 
					            response = requests.get(game[1], headers=headers, timeout=5)
 | 
				
			||||||
 | 
					            break
 | 
				
			||||||
 | 
					        except:
 | 
				
			||||||
 | 
					            print("!", end="", flush=True)
 | 
				
			||||||
 | 
					            continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    soup = BeautifulSoup(response.text, 'html.parser')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    links = soup.find_all("a")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for link in links:
 | 
				
			||||||
 | 
					        if link.get('href') is not None and "file/" in link.get('href'):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            print(".", end="", flush=True)
 | 
				
			||||||
 | 
					            # Skip 'Latest files' thing
 | 
				
			||||||
 | 
					            if link.parent.get("class") is None:
 | 
				
			||||||
 | 
					                continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            file_name = link.find(text=True)
 | 
				
			||||||
 | 
					            file_name = file_name.replace("zip", "")
 | 
				
			||||||
 | 
					            file_name = file_name.replace("mp3", "")
 | 
				
			||||||
 | 
					            file_name = file_name.replace("png", "")
 | 
				
			||||||
 | 
					            file_name = file_name.replace("gif", "")
 | 
				
			||||||
 | 
					            file_name = file_name.replace("ogg", "")
 | 
				
			||||||
 | 
					            file_name = re.sub('[^A-Za-z0-9 ]+', '', file_name)
 | 
				
			||||||
 | 
					            file_name = file_name.strip()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            sprites.append((file_name, "http://spritedatabase.net/" + link.get('href')))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    print("")
 | 
				
			||||||
 | 
					    return sprites
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_download_link(link):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    while True:
 | 
				
			||||||
 | 
					        try:
 | 
				
			||||||
 | 
					            response = requests.get(link, headers=headers, timeout=5)
 | 
				
			||||||
 | 
					            break
 | 
				
			||||||
 | 
					        except:
 | 
				
			||||||
 | 
					            print("!", end="", flush=True)
 | 
				
			||||||
 | 
					            continue
 | 
				
			||||||
 | 
					    soup = BeautifulSoup(response.text, 'html.parser')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    images = soup.find_all("img")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for image in images:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if image.get("style") is not None and "border: 1px solid" in image.get("style"):
 | 
				
			||||||
 | 
					            download_link = image.get("src")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            if "layout/format" in download_link:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                for div in soup.find_all("div"):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    if div.get("class") is not None and str(div.get("class")) == "['dlcapsule']":
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                        link = div.find("a").get("href")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                        if "files/" in link:
 | 
				
			||||||
 | 
					                            return "http://spritedatabase.net/" + link
 | 
				
			||||||
 | 
					                        else:
 | 
				
			||||||
 | 
					                            return link
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
 | 
					                return "http://spritedatabase.net/" + download_link
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def download_all(folder, sprite):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if not os.path.isdir(folder):
 | 
				
			||||||
 | 
					        os.mkdir(folder)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    link = get_download_link(sprite[1])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if link is None:
 | 
				
			||||||
 | 
					        print("ERROR: " + sprite[1])
 | 
				
			||||||
 | 
					        return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if "drive.google" in link or "mediafire" in link:
 | 
				
			||||||
 | 
					        print("I can't download external link. Link: " + link)
 | 
				
			||||||
 | 
					        open("links", "a").write(link + "\n")
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        print(folder + os.sep + sprite[0])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        while True:
 | 
				
			||||||
 | 
					            try:
 | 
				
			||||||
 | 
					                response = requests.get(link, stream=True, headers=headers, timeout=5)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                extension = ""
 | 
				
			||||||
 | 
					                if response.headers["Content-Type"] is not None:
 | 
				
			||||||
 | 
					                    extension = mimetypes.guess_extension(response.headers["Content-Type"])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    if extension is None:
 | 
				
			||||||
 | 
					                        extension = ""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                if not os.path.exists(folder + os.sep + sprite[0] + extension) and response.status_code == 200:
 | 
				
			||||||
 | 
					                            with open(folder + os.sep + sprite[0] + extension, 'wb') as f:
 | 
				
			||||||
 | 
					                                for chunk in response.iter_content(chunk_size=1024):
 | 
				
			||||||
 | 
					                                    if chunk:
 | 
				
			||||||
 | 
					                                        f.write(chunk)
 | 
				
			||||||
 | 
					                break
 | 
				
			||||||
 | 
					            except:
 | 
				
			||||||
 | 
					                print("!")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					mimetypes.init()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					for system in get_systems():
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if not os.path.exists(system[0]):
 | 
				
			||||||
 | 
					        os.mkdir(system[0])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for game in get_games(system):
 | 
				
			||||||
 | 
					        sprites = get_sprites(game)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if os.path.exists(system[0] + os.sep + game[0]):
 | 
				
			||||||
 | 
					            print(str(len(os.listdir(system[0] + os.sep + game[0]))) + "/" + str(len(sprites)))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if os.path.exists(system[0] + os.sep + game[0]) and len(os.listdir(system[0] + os.sep + game[0])) >= len(sprites):
 | 
				
			||||||
 | 
					            print("Skipping existing folder with " + str(len(os.listdir(system[0] + os.sep + game[0]))) + "/" + str(len(sprites)) + " existing sprites")
 | 
				
			||||||
 | 
					            continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for sprite in sprites:
 | 
				
			||||||
 | 
					            download_all(str(system[0] + os.sep + game[0]), sprite)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										37
									
								
								sproutvideo.com/run.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										37
									
								
								sproutvideo.com/run.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,37 @@
 | 
				
			|||||||
 | 
					import requests
 | 
				
			||||||
 | 
					from bs4 import BeautifulSoup
 | 
				
			||||||
 | 
					import argparse
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					parser = argparse.ArgumentParser()
 | 
				
			||||||
 | 
					parser.add_argument("password")
 | 
				
			||||||
 | 
					parser.add_argument("url")
 | 
				
			||||||
 | 
					parser.add_argument("--user")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					args = parser.parse_args()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					headers = {
 | 
				
			||||||
 | 
					    "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
 | 
				
			||||||
 | 
					    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
 | 
				
			||||||
 | 
					    "Accept-Language": "en-US,en;q=0.5",
 | 
				
			||||||
 | 
					    "Accept-Encoding": "gzip, deflate, br",
 | 
				
			||||||
 | 
					    "Upgrade-Insecure-Requests": "1",
 | 
				
			||||||
 | 
					    "Referer": args.url
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					payload = {"email": args.user if args.user is not None else "", "password": args.password,
 | 
				
			||||||
 | 
					           "host": "unknown", "url": "unknown", "queryParams": ""}
 | 
				
			||||||
 | 
					print(payload)
 | 
				
			||||||
 | 
					r = requests.post(args.url.replace("embed", "video_password"), headers=headers, data=payload)
 | 
				
			||||||
 | 
					print(r.cookies)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					soup = BeautifulSoup(r.text, "html.parser")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					try:
 | 
				
			||||||
 | 
					    print(soup.find("a", attrs={"class": "hd-download"}).get("href"))
 | 
				
			||||||
 | 
					    print(soup.find("a", attrs={"class": "sd-download"}).get("href"))
 | 
				
			||||||
 | 
					except AttributeError:
 | 
				
			||||||
 | 
					    print("Wrong password/username")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										209
									
								
								sproutvideo.com/tmp.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										209
									
								
								sproutvideo.com/tmp.html
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,209 @@
 | 
				
			|||||||
 | 
					<!DOCTYPE html>
 | 
				
			||||||
 | 
					<html>
 | 
				
			||||||
 | 
					  <head>
 | 
				
			||||||
 | 
					  <!--[if IE]><script type="text/javascript">document.documentMode<9&&(document.location.href=document.location.href+(/\?/.test(document.location.href)?"&forceIE8=true":"?forceIE8=true"));</script><![endif]-->
 | 
				
			||||||
 | 
					  <meta name="ROBOTS" content="NOINDEX, NOFOLLOW">
 | 
				
			||||||
 | 
					  <link rel="prefetch" href="https://images.sproutvideo.com/d9a5d2f848be6e5f49bb7f1b09e93f80/89152ce323a8ed764ae122614e78c922/poster_frames/frame_0000.jpg" as="image">
 | 
				
			||||||
 | 
					  <link rel="preconnect" href="//d1ajyp3swh7ygp.cloudfront.com">
 | 
				
			||||||
 | 
					  <link rel="preconnect" href="//hls.videos.sproutvideo.com">
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  <link href='https://fonts.googleapis.com/css?family=Open+Sans' rel='stylesheet' type='text/css'>
 | 
				
			||||||
 | 
					  <link href='//d1ajyp3swh7ygp.cloudfront.net/hls_player-0d10c5b6.css' rel='stylesheet' type='text/css'>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  <!--[if IE ]>
 | 
				
			||||||
 | 
					  <style type="text/css">
 | 
				
			||||||
 | 
					    .player-subtitle-cue {
 | 
				
			||||||
 | 
					      font-size: 2em;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    .player-select:before {
 | 
				
			||||||
 | 
					      display: none;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  </style>
 | 
				
			||||||
 | 
					  <![endif]-->
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  <script type="text/javascript">var dat = 'eyJzZXNzaW9uSUQiOiI2MWYxNTQ3Yi1mY2VkLTQ3MzEtODVlNC1kYWE1Y2MxMDdmNWIiLCJob3N0IjoidW5rbm93biIsImhhc19oZCI6dHJ1ZSwiaGFzX3NkIjp0cnVlLCJmdWxsSG9zdCI6InZpZGVvcy5zcHJvdXR2aWRlby5jb20iLCJ1cmwiOiJ1bmtub3duIiwiZHVyYXRpb24iOjY5NjAsInZpZGVvVWlkIjoiYTQ5YmRkYjExMzFjZTNjYTJjIiwidXNlclVpZCI6IjdlOWJkZWIyMWIxZWU3Y2RmMCIsInByaXZhY3lUb2tlbiI6IjBiNmE5NTA2NDZiZGI4M2YiLCJ1aWQiOiI5NTI0NWQxMi05N2RhLTQwZDktOTFhYy1kZWM2YzdkMmQ0MjgiLCJ1c2VyQWdlbnQiOiJNb3ppbGxhLzUuMCAoWDExOyBMaW51eCB4ODZfNjQ7IHJ2OjUyLjApIEdlY2tvLzIwMTAwMTAxIEZpcmVmb3gvNTIuMCIsInBvc3Ryb2xsIjpmYWxzZSwic3VidGl0bGVzIjpmYWxzZSwiYXV0b3BsYXkiOnRydWUsImxvb3AiOmZhbHNlLCJub0JpZ1BsYXkiOmZhbHNlLCJxdWFsaXR5Ijoic2QiLCJmbGFzaFBsYXllciI6Imh0dHBzOi8vYy5zcHJvdXR2aWRlby5jb20vcGxheWVyLTAuNC40LjIyLnN3ZiIsInRyYXNwYXJlbnQiOmZhbHNlLCJ0IjpudWxsLCJjb2xvcnMiOiIiLCJzM191c2VyX2hhc2giOiJkOWE1ZDJmODQ4YmU2ZTVmNDliYjdmMWIwOWU5M2Y4MCIsInMzX3ZpZGVvX2hhc2giOiI4OTE1MmNlMzIzYThlZDc2NGFlMTIyNjE0ZTc4YzkyMiIsImhscyI6dHJ1ZSwidGl0bGUiOiJDbGllbnQgV2VsY29tZXMgRGVjZW1iZXIgMjAxNSBTbWFsbGVyLm1wNCIsIndpZHRoIjoxMjgwLCJoZWlnaHQiOjcyMCwidm9sdW1lIjoxLCJjYyI6bnVsbCwic2lnbmF0dXJlIjp7IkNsb3VkRnJvbnQtUG9saWN5IjoiZXlKVGRHRjBaVzFsYm5RaU9sdDdJbEpsYzI5MWNtTmxJam9pYUhSMGNITTZMeTlvYkhNeUxuWnBaR1Z2Y3k1emNISnZkWFIyYVdSbGJ5NWpiMjB2WkRsaE5XUXlaamcwT0dKbE5tVTFaalE1WW1JM1pqRmlNRGxsT1RObU9EQXZPRGt4TlRKalpUTXlNMkU0WldRM05qUmhaVEV5TWpZeE5HVTNPR001TWpJdktpSXNJa052Ym1ScGRHbHZiaUk2ZXlKRVlYUmxUR1Z6YzFSb1lXNGlPbnNpUVZkVE9rVndiMk5vVkdsdFpTSTZNVFV4T1RjNE5UazFObjE5ZlYxOSIsIkNsb3VkRnJvbnQtU2lnbmF0dXJlIjoiblpCeGQzSkxwS1BaWGNSdDkxTTgwbVBwU0RVcC10dVkzSEg1RkV6cFZWQzdRU2c4STZXY0Jack1lV1l0Si1MWnh+Z2x6RkkySEEtRDJReEowZFNEbU9acGJpTDN3UFV+NEhxOElRTFVZQ1V0ZnFBTi11Y2VpeGZNUTZyWngtMVI1bnh2MG84VTZRdGlZdWotRXJXTDczckZnN0hydHdrcHdPcDRwakNFV3g5blJOMGZ+UWhaV1BncTJBVkFkRkZNeDItTkljQmpOcFBrRDdSTWEyeHJ4TlZ4Z1hXRUNqVUhBUzc3ZmNGaDVHaTNNNnRKdFBOZ0lZUGNwc2hFdm9EWlFSRVZ6fjRDWEZSeGVKaXF5MjBiV0IybW9wbFNsR2czZWJOcjJ+aVYyS09xNVVXclh5LW00V29rdlBBS0F1eE5maE1SZUtlflJ2NkhhMWlqRnBBYXdRX18iLCJDbG91ZEZyb250LUtleS1QYWlyLUlkIjoiQVBLQUlCNURHQ0dBUUo0R0dJVVEifSwiZm9yY2VCdWZmZXIiOmZhbHNlLCJ2ZXJzaW9uIjozLCJkZWJ1ZyI6ZmFsc2UsImNkU2lnIjoiUG9saWN5PWV5SlRkR0YwWlcxbGJuUWlPbHQ3SWxKbGMyOTFjbU5sSWpvaWFIUjBjSE02THk5b2JITXlMblpwWkdWdmN5NXpjSEp2ZFhSMmFXUmxieTVqYjIwdlkzSnZjM05rYjIxaGFXNHVlRzFzSWl3aVEyOXVaR2wwYVc5dUlqcDdJa1JoZEdWTVpYTnpWR2hoYmlJNmV5SkJWMU02UlhCdlkyaFVhVzFsSWpveE5URTVOelkwTXpnMmZYMTlYWDBfJlNpZ25hdHVyZT1NSzVjWWUwajlaR2RYQzNwNFBNekx6STdDQWE3alpldzJyYVdqaGJTZGZvMU9vLWt0ajNNenpabHdRb1ppeW1INUolN0VPUzFud0R2R3R3dWw1cTlYclZVUmxlbFIwbWlLd2hQUVVLcE1Za1FVd2VRTm1SYlM5SU44STNScU9xUWVuJTdFbGZSVzZndURrTlo1ZGI1VlA4RHdtcVVtbTlITUx4VzU3bDNpUmlvVm1PcUVwdnFLdTl3VnVQdHJPZW0lN0VYUEtoTkolN0UlN0VpbWJ3YjJOTVp0MW9MZW5QYjc5YmtwRlRPcSU3RUEtdGpkZzRHcTRtS2RLZVlxMGw5aWJ2ekpkMThnVkE1cGtrMTV2WEJtJTdFQ282LWVGa0N6UkpobjUzOXpPckVFRHdnUnd4TFFLV21yVVJINE82VVdUOGRscENudDdSbkRJby1TQUxNbW5TeUh0QlRSSklxMFFpd19fJktleS1QYWlyLUlkPUFQS0FJQjVER0NHQVFKNEdHSVVRIiwiYmFja2dyb3VuZFZpZGVvIjpmYWxzZSwiZmJTaWciOnsic2lnIjoicDdhMlVQa3Erd2x4alUvbEZIMk96U3FRcUZnPSIsImV4cGlyZXMiOjE1MTk3NzUxNTZ9LCJtb2JpbGUiOmZhbHNlfQ==';</script>
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  <body>
 | 
				
			||||||
 | 
					    <div class="player paused sd no-cc " style="background-image: url('https://images.sproutvideo.com/d9a5d2f848be6e5f49bb7f1b09e93f80/89152ce323a8ed764ae122614e78c922/poster_frames/frame_0000.jpg'); background-size:contain; background-position: center;background-repeat: no-repeat;">
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      <div class="player-big-play-button">
 | 
				
			||||||
 | 
					        <svg viewBox="0 0 26 30"><polygon points="0,0 0,30 26,15"/></svg>
 | 
				
			||||||
 | 
					      </div>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      <div class="player-buffer">
 | 
				
			||||||
 | 
					        <div class="spinner">
 | 
				
			||||||
 | 
					          <div class="rect1"></div>
 | 
				
			||||||
 | 
					          <div class="rect2"></div>
 | 
				
			||||||
 | 
					          <div class="rect3"></div>
 | 
				
			||||||
 | 
					          <div class="rect4"></div>
 | 
				
			||||||
 | 
					          <div class="rect5"></div>
 | 
				
			||||||
 | 
					        </div>
 | 
				
			||||||
 | 
					      </div>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      <div class="player-video-holder">
 | 
				
			||||||
 | 
					        <video width='100%' height='100%' id='video-a49bddb1131ce3ca2c-html' crossorigin='anonymous' preload="auto"    style="display:none;"></video>
 | 
				
			||||||
 | 
					      </div>
 | 
				
			||||||
 | 
					      <div class="player-control-bar hidden">
 | 
				
			||||||
 | 
					        <div class="player-play-pause player-button">
 | 
				
			||||||
 | 
					          <svg id="play" viewBox="0 0 26 30">
 | 
				
			||||||
 | 
					            <polygon points="0,0 0,30 26,15"/>
 | 
				
			||||||
 | 
					          </svg>
 | 
				
			||||||
 | 
					          <svg id="pause" viewBox="0 0 12 20">
 | 
				
			||||||
 | 
					            <path d="M1,20h2c0.6,0,1-0.4,1-1V1c0-0.6-0.4-1-1-1H1C0.4,0,0,0.4,0,1v18C0,19.6,0.4,20,1,20z"/>
 | 
				
			||||||
 | 
					            <path d="M11,0H9C8.4,0,8,0.4,8,1v18c0,0.6,0.4,1,1,1h2c0.6,0,1-0.4,1-1V1C12,0.4,11.6,0,11,0z"/>
 | 
				
			||||||
 | 
					          </svg>
 | 
				
			||||||
 | 
					        </div>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        <div class="player-volume player-button">
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					          <div class="volume-bar">
 | 
				
			||||||
 | 
					            <div class="volume-bar-background"></div>
 | 
				
			||||||
 | 
					            <div class="volume-track-background"></div>
 | 
				
			||||||
 | 
					            <div class="volume-track-controller"></div>
 | 
				
			||||||
 | 
					            <div class="volume-track-status"></div>
 | 
				
			||||||
 | 
					          </div>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					          <svg viewBox="0 0 17.3 13.6">
 | 
				
			||||||
 | 
					             <path id="speaker" d="m7.89999,0.17501l-3.4,3.3l-3.4,0c-0.7,0 -1.1,0.6 -1.1,1.1l0,4.5c0,0.6 0.4,1.1 1.1,1.1l3.4,0l3.5,3.3c0,0 1,0.6 1,-1.2c0,-0.9 0,-4.9 0,-10.9c0,-1.9 -1.1,-1.2 -1.1,-1.2l0,0z"/>
 | 
				
			||||||
 | 
					             <path id="wave-one" d="m10.99999,3.57501c-0.2,0 -0.4,0.2 -0.4,0.4l0,0.8c0,0.2 0.2,0.4 0.4,0.5c0.7,0.2 1.3,0.9 1.3,1.6c0,0.8 -0.5,1.4 -1.3,1.6c-0.2,0.1 -0.4,0.2 -0.4,0.5l0,0.9c0,0.2 0.2,0.4 0.4,0.4c1.7,-0.2 3,-1.6 3,-3.4s-1.3,-3.1 -3,-3.3z"/>
 | 
				
			||||||
 | 
					             <path id="wave-two" d="m10.59999,0.57501l0,0.8c0,0.2 0.2,0.4 0.4,0.4c2.6,0.2 4.6,2.4 4.6,5s-2,4.8 -4.6,5c-0.2,0 -0.4,0.2 -0.4,0.4l0,0.8c0,0.2 0.2,0.4 0.4,0.4c3.5,-0.2 6.3,-3.2 6.3,-6.7s-2.7,-6.3 -6.3,-6.5c-0.2,0 -0.4,0.2 -0.4,0.4z"/>
 | 
				
			||||||
 | 
					             <path id="mute" d="m15.69999,6.87501l1.4,-1.4c0.2,-0.2 0.2,-0.5 0,-0.7l-0.7,-0.7c-0.2,-0.2 -0.5,-0.2 -0.7,0l-1.4,1.4l-1.3,-1.3c-0.2,-0.2 -0.5,-0.2 -0.7,0l-0.7,0.7c-0.2,0.2 -0.2,0.5 0,0.7l1.4,1.3l-1.4,1.4c-0.2,0.2 -0.2,0.5 0,0.7l0.7,0.7c0.2,0.2 0.5,0.2 0.7,0l1.4,-1.4l1.4,1.4c0.2,0.2 0.5,0.2 0.7,0l0.5,-0.8c0.2,-0.2 0.2,-0.5 0,-0.7l-1.3,-1.3z"/>
 | 
				
			||||||
 | 
					          </svg>
 | 
				
			||||||
 | 
					        </div>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        <div class="player-progress-time">00:00</div>
 | 
				
			||||||
 | 
					        <div class="player-tracks">
 | 
				
			||||||
 | 
					          <div class="player-track-background"></div>
 | 
				
			||||||
 | 
					          <div class="player-track-loaded"></div>
 | 
				
			||||||
 | 
					          <div class="player-track-controller"></div>
 | 
				
			||||||
 | 
					          <div class="player-track-progress"></div>
 | 
				
			||||||
 | 
					          <div class="player-track-time">
 | 
				
			||||||
 | 
					            <div class="player-track-time-background"></div>
 | 
				
			||||||
 | 
					            <div class="player-track-time-gradient"></div>
 | 
				
			||||||
 | 
					            <div class="player-track-time-time">00:00</div>
 | 
				
			||||||
 | 
					          </div>
 | 
				
			||||||
 | 
					        </div>
 | 
				
			||||||
 | 
					        <div class="player-total-time">00:00</div>
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        <div class="player-download-btn player-button">
 | 
				
			||||||
 | 
					          <svg viewBox="0 0 26 26">
 | 
				
			||||||
 | 
					            <path d="m25,17h-2c-0.6,0-1,0.4-1,1v2.5c0,0.3-0.2,0.5-0.5,0.5h-17c-0.3,0-0.5-0.2-0.5-0.5v-2.5c0-0.6-0.4-1-1-1h-2c-0.6,0-1,0.4-1,1v6c0,0.6 0.4,1 1,1h24c0.6,0 1-0.4 1-1v-6c0-0.6-0.4-1-1-1z"/>
 | 
				
			||||||
 | 
					            <path d="m12.3,16.7c0.2,0.2 0.5,0.3 0.7,0.3s0.5-0.1 0.7-0.3l6-6c0.2-0.2 0.3-0.4 0.3-0.7s-0.1-0.5-0.3-0.7l-1.4-1.4c-0.2-0.2-0.4-0.3-0.7-0.3-0.3,0-0.5,0.1-0.7,0.3l-1,1c-0.3,0.3-0.9,0.1-0.9-0.4v-6.5c0-0.6-0.4-1-1-1h-2c-0.6,0-1,0.4-1,1v6.6c0,0.4-0.5,0.7-0.9,0.4l-1-1c-0.2-0.2-0.4-0.3-0.7-0.3-0.3,0-0.5,0.1-0.7,0.3l-1.4,1.4c-0.2,0.2-0.3,0.4-0.3,0.7s0.1,0.5 0.3,0.7l6,5.9z"/>
 | 
				
			||||||
 | 
					          </svg>
 | 
				
			||||||
 | 
					        </div>
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        <div class="player-cc player-button">
 | 
				
			||||||
 | 
					          <svg viewBox="0 0 24 17">
 | 
				
			||||||
 | 
					            <path d="M21,0H3C1.3,0,0,1.3,0,3v11c0,1.7,1.3,3,3,3h18c1.7,0,3-1.3,3-3V3C24,1.3,22.7,0,21,0z M10.2,11.9c-0.5,0.2-1,0.3-1.6,0.3
 | 
				
			||||||
 | 
					              c-0.6,0-1.2-0.1-1.7-0.3s-1-0.5-1.3-0.8C5.3,10.7,5,10.3,4.8,9.8C4.6,9.3,4.5,8.7,4.5,8.1c0-0.6,0.1-1.2,0.3-1.7
 | 
				
			||||||
 | 
					              C5,5.9,5.3,5.5,5.6,5.1C6,4.8,6.5,4.5,7,4.3C7.5,4.1,8,4,8.7,4c0.2,0,0.4,0,0.7,0.1c0.2,0,0.5,0.1,0.7,0.2c0.2,0.1,0.5,0.2,0.7,0.4
 | 
				
			||||||
 | 
					              c0.2,0.1,0.4,0.3,0.6,0.5L10,6.2C9.8,6,9.6,5.9,9.3,5.8C9.1,5.7,8.8,5.6,8.5,5.6c-0.3,0-0.6,0.1-0.9,0.2C7.3,5.9,7.1,6.1,6.9,6.3
 | 
				
			||||||
 | 
					              C6.7,6.5,6.5,6.8,6.4,7.1c-0.1,0.3-0.2,0.6-0.2,1c0,0.4,0.1,0.7,0.2,1c0.1,0.3,0.3,0.6,0.5,0.8s0.4,0.4,0.7,0.5
 | 
				
			||||||
 | 
					              c0.3,0.1,0.6,0.2,0.9,0.2c0.4,0,0.7-0.1,0.9-0.2c0.3-0.1,0.5-0.4,0.7-0.6l1.4,1.1C11.1,11.3,10.7,11.7,10.2,11.9z M18.9,11.9
 | 
				
			||||||
 | 
					              c-0.5,0.2-1,0.3-1.6,0.3c-0.6,0-1.2-0.1-1.7-0.3c-0.5-0.2-1-0.5-1.3-0.8c-0.4-0.4-0.7-0.8-0.9-1.3c-0.2-0.5-0.3-1.1-0.3-1.7
 | 
				
			||||||
 | 
					              c0-0.6,0.1-1.2,0.3-1.7c0.2-0.5,0.5-0.9,0.9-1.3c0.4-0.4,0.8-0.6,1.3-0.8C16.1,4.1,16.7,4,17.3,4c0.2,0,0.4,0,0.7,0.1
 | 
				
			||||||
 | 
					              c0.2,0,0.5,0.1,0.7,0.2c0.2,0.1,0.5,0.2,0.7,0.4c0.2,0.1,0.4,0.3,0.6,0.5l-1.3,1.1C18.4,6,18.2,5.9,18,5.8
 | 
				
			||||||
 | 
					              c-0.2-0.1-0.5-0.2-0.9-0.2c-0.3,0-0.6,0.1-0.9,0.2c-0.3,0.1-0.5,0.3-0.7,0.5c-0.2,0.2-0.4,0.5-0.5,0.8c-0.1,0.3-0.2,0.6-0.2,1
 | 
				
			||||||
 | 
					              c0,0.4,0.1,0.7,0.2,1c0.1,0.3,0.3,0.6,0.5,0.8c0.2,0.2,0.4,0.4,0.7,0.5c0.3,0.1,0.6,0.2,0.9,0.2c0.4,0,0.7-0.1,0.9-0.2
 | 
				
			||||||
 | 
					              c0.3-0.1,0.5-0.4,0.7-0.6l1.4,1.1C19.8,11.3,19.4,11.7,18.9,11.9z"/>
 | 
				
			||||||
 | 
					          </svg>
 | 
				
			||||||
 | 
					        </div>
 | 
				
			||||||
 | 
					        <div class="player-settings player-button">
 | 
				
			||||||
 | 
					          <svg viewBox="0 0 15.998 15.998">
 | 
				
			||||||
 | 
					            <path style="fill-rule:evenodd;clip-rule:evenodd;" d="M13.998,7c-0.553,0-1.08-0.443-1.291-0.952 c-0.21-0.508-0.15-1.194,0.24-1.585l0.707-0.706c0.391-0.391,0.391-1.024,0.001-1.415c-0.391-0.391-1.024-0.391-1.415,0 c0,0-0.316,0.316-0.707,0.707S10.457,3.5,9.949,3.29C9.442,3.08,8.998,2.553,8.998,2V1c0-0.553-0.447-1-1-1s-1,0.447-1,1v1 c0,0.553-0.442,1.08-0.95,1.291s-1.192,0.15-1.583-0.24L3.756,2.344c-0.391-0.391-1.024-0.39-1.413,0 C1.952,2.734,1.952,3.367,2.342,3.758l0.709,0.708C3.441,4.856,3.51,5.545,3.338,6.062C3.168,6.58,2.648,7.016,2.097,7.01L1,7 C0.448,7,0,7.449,0,8c0,0.553,0.448,1,1,1h1.001c0.552,0,1.087,0.438,1.331,0.925c0.245,0.486,0.188,1.159-0.207,1.546l-0.783,0.77 c-0.391,0.391-0.39,1.025,0,1.414c0.391,0.391,1.024,0.391,1.414,0.001l0.708-0.708c0.391-0.391,1.075-0.451,1.584-0.24 c0.508,0.211,0.95,0.738,0.95,1.291v1.001c0,0.552,0.448,1,1,0.999c0.553,0,1-0.447,1-0.999v-1.001c0-0.553,0.444-1.08,0.951-1.289 c0.508-0.211,1.193-0.15,1.584,0.24l0.707,0.707c0.391,0.391,1.024,0.391,1.413,0c0.391-0.391,0.392-1.024,0.002-1.414l-0.708-0.708 c-0.391-0.391-0.451-1.076-0.24-1.584S13.445,9,13.998,9h1c0.553,0,1-0.447,1-1s-0.447-1-1-1H13.998z M7.998,10 c-1.103,0-2-0.897-2-2s0.897-2,2-2s2,0.897,2,2S9.101,10,7.998,10z"/>
 | 
				
			||||||
 | 
					          </svg>
 | 
				
			||||||
 | 
					        </div>
 | 
				
			||||||
 | 
					        <div class="player-fullscreen player-button">
 | 
				
			||||||
 | 
					          <svg viewBox="0 0 15 15">
 | 
				
			||||||
 | 
					            <path d="M4.5,13H2v-2.5C2,10.2,1.8,10,1.5,10h-1C0.2,10,0,10.2,0,10.5V14c0,0.6,0.4,1,1,1h3.5C4.8,15,5,14.8,5,14.5v-1
 | 
				
			||||||
 | 
					              C5,13.2,4.8,13,4.5,13z"/>
 | 
				
			||||||
 | 
					            <path d="M4.5,0H1C0.4,0,0,0.4,0,1v3.5C0,4.8,0.2,5,0.5,5h1C1.8,5,2,4.8,2,4.5V2h2.5C4.8,2,5,1.8,5,1.5v-1C5,0.2,4.8,0,4.5,0z"/>
 | 
				
			||||||
 | 
					            <path d="M14,0h-3.5C10.2,0,10,0.2,10,0.5v1C10,1.8,10.2,2,10.5,2H13v2.5C13,4.8,13.2,5,13.5,5h1C14.8,5,15,4.8,15,4.5V1
 | 
				
			||||||
 | 
					              C15,0.4,14.6,0,14,0z"/>
 | 
				
			||||||
 | 
					            <path d="M14.5,10h-1c-0.3,0-0.5,0.2-0.5,0.5V13h-2.5c-0.3,0-0.5,0.2-0.5,0.5v1c0,0.3,0.2,0.5,0.5,0.5H14c0.6,0,1-0.4,1-1v-3.5
 | 
				
			||||||
 | 
					              C15,10.2,14.8,10,14.5,10z"/>
 | 
				
			||||||
 | 
					          </svg>
 | 
				
			||||||
 | 
					        </div>
 | 
				
			||||||
 | 
					      </div>
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      <div class="player-settings-menu player-menu">
 | 
				
			||||||
 | 
					        <div class="player-resolution player-option">
 | 
				
			||||||
 | 
					          <label>Resolution</label>
 | 
				
			||||||
 | 
					          <div class="player-setting">
 | 
				
			||||||
 | 
					            <div class="player-select">
 | 
				
			||||||
 | 
					              <select class="player-resolution-select">
 | 
				
			||||||
 | 
					                <option selected>Auto</option>
 | 
				
			||||||
 | 
					              </select>
 | 
				
			||||||
 | 
					            </div>
 | 
				
			||||||
 | 
					          </div>
 | 
				
			||||||
 | 
					        </div>
 | 
				
			||||||
 | 
					        <div class="player-speed player-option">
 | 
				
			||||||
 | 
					          <label>Speed</label>
 | 
				
			||||||
 | 
					          <div class="player-setting">
 | 
				
			||||||
 | 
					            <div class="player-select">
 | 
				
			||||||
 | 
					              <select class="player-speed-select">
 | 
				
			||||||
 | 
					                <option value="0.25">0.25X</option>
 | 
				
			||||||
 | 
					                <option value="0.5">0.5X</option>
 | 
				
			||||||
 | 
					                <option value="1" selected>1X</option>
 | 
				
			||||||
 | 
					                <option value="1.25">1.25X</option>
 | 
				
			||||||
 | 
					                <option value="1.5">1.5X</option>
 | 
				
			||||||
 | 
					                <option value="2">2X</option>
 | 
				
			||||||
 | 
					              </select>
 | 
				
			||||||
 | 
					            </div>
 | 
				
			||||||
 | 
					          </div>
 | 
				
			||||||
 | 
					        </div>
 | 
				
			||||||
 | 
					      </div>
 | 
				
			||||||
 | 
					      <div class='player-mobile-muted'>
 | 
				
			||||||
 | 
					        <svg viewBox="0 0 17.3 13.6">
 | 
				
			||||||
 | 
					           <path id="speaker" d="m7.89999,0.17501l-3.4,3.3l-3.4,0c-0.7,0 -1.1,0.6 -1.1,1.1l0,4.5c0,0.6 0.4,1.1 1.1,1.1l3.4,0l3.5,3.3c0,0 1,0.6 1,-1.2c0,-0.9 0,-4.9 0,-10.9c0,-1.9 -1.1,-1.2 -1.1,-1.2l0,0z"/>
 | 
				
			||||||
 | 
					           <path id="mute" d="m15.69999,6.87501l1.4,-1.4c0.2,-0.2 0.2,-0.5 0,-0.7l-0.7,-0.7c-0.2,-0.2 -0.5,-0.2 -0.7,0l-1.4,1.4l-1.3,-1.3c-0.2,-0.2 -0.5,-0.2 -0.7,0l-0.7,0.7c-0.2,0.2 -0.2,0.5 0,0.7l1.4,1.3l-1.4,1.4c-0.2,0.2 -0.2,0.5 0,0.7l0.7,0.7c0.2,0.2 0.5,0.2 0.7,0l1.4,-1.4l1.4,1.4c0.2,0.2 0.5,0.2 0.7,0l0.5,-0.8c0.2,-0.2 0.2,-0.5 0,-0.7l-1.3,-1.3z"/>
 | 
				
			||||||
 | 
					        </svg>
 | 
				
			||||||
 | 
					      </div>
 | 
				
			||||||
 | 
					      <div class="player-stats">
 | 
				
			||||||
 | 
					        <div><div>Video ID:</div><span>a49bddb1131ce3ca2c</span></div>
 | 
				
			||||||
 | 
					        <div><div>User ID:</div><span>7e9bdeb21b1ee7cdf0</span></div>
 | 
				
			||||||
 | 
					        <div><div>Playback:</div><span class="stat-playback"></span></div>
 | 
				
			||||||
 | 
					        <div><div>Dimensions:</div><span class="stat-dimensions"></span></div>
 | 
				
			||||||
 | 
					        <div><div>Resolution:</div><span class="stat-resolution"></span></div>
 | 
				
			||||||
 | 
					        <div><div>Level Cap:</div><span class="stat-levelcap"></span></div>
 | 
				
			||||||
 | 
					        <div><div>Speed:</div><span><span class="sparkline"></span><span class="stat-speed"></span></span></div>
 | 
				
			||||||
 | 
					      </div>
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      <div class="player-download-sheet player-sheet">
 | 
				
			||||||
 | 
					        <div class="player-card-btn player-card-close">
 | 
				
			||||||
 | 
					          <svg viewBox="0 0 8.071 8.07">
 | 
				
			||||||
 | 
					            <path d="M7.924,6.51L5.45,4.035l2.475-2.475c0.196-0.195,0.196-0.512,0-0.707L7.217,0.146 c-0.195-0.195-0.512-0.195-0.707,0L4.036,2.621L1.561,0.146c-0.195-0.195-0.512-0.195-0.707,0L0.147,0.854 c-0.196,0.195-0.196,0.512,0,0.707l2.475,2.475L0.147,6.51c-0.196,0.195-0.196,0.512,0,0.707l0.707,0.707 c0.195,0.195,0.512,0.195,0.707,0l2.475-2.475L6.51,7.924c0.195,0.195,0.512,0.195,0.707,0l0.707-0.707 C8.121,7.021,8.121,6.705,7.924,6.51z"/>
 | 
				
			||||||
 | 
					          </svg>
 | 
				
			||||||
 | 
					        </div>
 | 
				
			||||||
 | 
					        <div class="player-download-options">
 | 
				
			||||||
 | 
					          <h2>Download Video</h2>
 | 
				
			||||||
 | 
					          <ul>
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            <li><a class='sd-download' href="https://sproutvideo.com/videos/a49bddb1131ce3ca2c/player_download?expires=1519775155&type=sd&uid=95245d12-97da-40d9-91ac-dec6c7d2d428&auth=bb6a9aa8199938b2d31b9693f23235f3&signature=qSz0ZuxEMVeI8QfYatTIbyS2WYw%3D" target="_blank">SD</a></li>
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            <li><a class='hd-download' href="https://sproutvideo.com/videos/a49bddb1131ce3ca2c/player_download?expires=1519775155&type=hd&uid=95245d12-97da-40d9-91ac-dec6c7d2d428&auth=bb6a9aa8199938b2d31b9693f23235f3&signature=sULsVrwr8cXXlNI3I%2BvYqNg51K8%3D" target="_blank">HD</a></li>
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					          </ul>
 | 
				
			||||||
 | 
					        </div>
 | 
				
			||||||
 | 
					      </div>
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					    </div>
 | 
				
			||||||
 | 
					    <script src="//d1ajyp3swh7ygp.cloudfront.net/jquery.min.js"></script>
 | 
				
			||||||
 | 
					    <script src="https://src.litix.io/core/2/mux.js"></script>
 | 
				
			||||||
 | 
					    <!--[if lte IE 7]>
 | 
				
			||||||
 | 
					    <script type="text/javascript" src="//d1ajyp3swh7ygp.cloudfront.net/json2.js"></script>
 | 
				
			||||||
 | 
					    <![endif]-->
 | 
				
			||||||
 | 
					    <!--[if IE]>
 | 
				
			||||||
 | 
					    <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/babel-polyfill/6.8.0/polyfill.min.js"></script>
 | 
				
			||||||
 | 
					    <![endif]-->
 | 
				
			||||||
 | 
					    <script type="text/javascript" src="//d1ajyp3swh7ygp.cloudfront.net/es6.min-8cdbfc06.js"></script>
 | 
				
			||||||
 | 
					  </body>
 | 
				
			||||||
 | 
					</html>
 | 
				
			||||||
							
								
								
									
										78
									
								
								textures-resource.com/crawler.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										78
									
								
								textures-resource.com/crawler.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,78 @@
 | 
				
			|||||||
 | 
					import requests
 | 
				
			||||||
 | 
					import bs4
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					headers = {
 | 
				
			||||||
 | 
					    'User-Agent': "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
 | 
				
			||||||
 | 
					    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
 | 
				
			||||||
 | 
					    "Accept-Encoding": "gzip, deflate, br",
 | 
				
			||||||
 | 
					    "Accept-Language": "en-US,en;q=0.5",
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					URL = "https://www.textures-resource.com"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_consoles():
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    consoles = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    response = requests.get(URL)
 | 
				
			||||||
 | 
					    soup = bs4.BeautifulSoup(response.text, "html.parser")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for console in soup.find(id="leftnav-consoles"):
 | 
				
			||||||
 | 
					        if type(console) == bs4.element.Tag and console.get("href") is not None:
 | 
				
			||||||
 | 
					            consoles.append((console.text, URL + console.get("href")))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return consoles
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_games(console, letter):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    games = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    print(console[0] + " - " + letter)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    print(console[1] + letter + ".html")
 | 
				
			||||||
 | 
					    response = requests.get(console[1] + letter + ".html")
 | 
				
			||||||
 | 
					    soup = bs4.BeautifulSoup(response.text, "html.parser")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for link in soup.find_all("a"):
 | 
				
			||||||
 | 
					        for child in link.findChildren():
 | 
				
			||||||
 | 
					            if child.get("class") is not None and child.get("class") == ['gameiconcontainer']:
 | 
				
			||||||
 | 
					                game_name = child.find("div").find("span").string
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                games.append((game_name, URL + link.get("href")))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return games
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_textures(game):
 | 
				
			||||||
 | 
					    textures = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    response = requests.get(game[1])
 | 
				
			||||||
 | 
					    soup = bs4.BeautifulSoup(response.text, "html.parser")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for link in soup.find_all("a"):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for div in link.find_all("div"):
 | 
				
			||||||
 | 
					            if div.get("class") == ["iconcontainer"]:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                texture_url = div.find("div", attrs={"class": "iconbody"}).find("img").get("src")
 | 
				
			||||||
 | 
					                texture_id = texture_url.split("/")[4][:-4]
 | 
				
			||||||
 | 
					                model_download = "https://www.textures-resource.com/download/" + texture_id + "/"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                model_name = div.find("div").find("span").string
 | 
				
			||||||
 | 
					                textures.append((model_name, URL + texture_url, model_download))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return textures
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					file = open("links.txt", "w")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					for console in get_consoles():
 | 
				
			||||||
 | 
					    for letter in "0ABCDEFGHIJKLMNOPQRSTUVWXYZ":
 | 
				
			||||||
 | 
					        for game in get_games(console, letter):
 | 
				
			||||||
 | 
					            for model in get_textures(game):
 | 
				
			||||||
 | 
					                file.write(console[0] + os.sep + game[0] + os.sep + model[0] + os.sep + "\0" + model[1] + "\0" +
 | 
				
			||||||
 | 
					                           model[2] + "\n")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					file.close()
 | 
				
			||||||
							
								
								
									
										39
									
								
								textures-resource.com/downloader.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										39
									
								
								textures-resource.com/downloader.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,39 @@
 | 
				
			|||||||
 | 
					import pathlib
 | 
				
			||||||
 | 
					import requests
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					file = open("links.txt", "r")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					for line in file.read().splitlines():
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    path, preview, link = line.split("\0")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if os.path.isfile("textures/" + path + "preview.png"):
 | 
				
			||||||
 | 
					        continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    print("textures/" + path)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    pathlib.Path("textures/" + path).mkdir(parents=True, exist_ok=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    while True:
 | 
				
			||||||
 | 
					        try:
 | 
				
			||||||
 | 
					            response = requests.get(preview, stream=True, timeout=5)
 | 
				
			||||||
 | 
					            with open("textures/" + path + "preview.png", 'wb') as f:
 | 
				
			||||||
 | 
					                for chunk in response.iter_content(chunk_size=1024):
 | 
				
			||||||
 | 
					                    if chunk:
 | 
				
			||||||
 | 
					                        f.write(chunk)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            response2 = requests.get(link, stream=True, timeout=5)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            file_extension = os.path.splitext(response2.headers["Content-Disposition"])[1][:-2]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            with open("textures/" + path + path.split("/")[-2:-1][0] + file_extension, 'wb') as f:
 | 
				
			||||||
 | 
					                for chunk in response2.iter_content(chunk_size=1024):
 | 
				
			||||||
 | 
					                    if chunk:
 | 
				
			||||||
 | 
					                        f.write(chunk)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            break
 | 
				
			||||||
 | 
					        except:
 | 
				
			||||||
 | 
					            print("!", end="", flush=True)
 | 
				
			||||||
 | 
					            continue
 | 
				
			||||||
							
								
								
									
										
											BIN
										
									
								
								textures-resource.com/links.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								textures-resource.com/links.txt
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										3
									
								
								viditut.com/README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								viditut.com/README.md
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,3 @@
 | 
				
			|||||||
 | 
					crawler_courses.py -> courses.txt  
 | 
				
			||||||
 | 
					courses.txt -> crawler_videos.py -> links.txt  
 | 
				
			||||||
 | 
					links.txt -> downloader.py -> (Downloaded videos)  
 | 
				
			||||||
							
								
								
									
										0
									
								
								viditut.com/courses.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								viditut.com/courses.txt
									
									
									
									
									
										Normal file
									
								
							
							
								
								
									
										73
									
								
								viditut.com/crawler_courses.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										73
									
								
								viditut.com/crawler_courses.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,73 @@
 | 
				
			|||||||
 | 
					import requests
 | 
				
			||||||
 | 
					import bs4
 | 
				
			||||||
 | 
					import json
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					URL = "https://viditut.com"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def request_timeout(url):
 | 
				
			||||||
 | 
					    while True:
 | 
				
			||||||
 | 
					        try:
 | 
				
			||||||
 | 
					            return requests.get(url, timeout=30)
 | 
				
			||||||
 | 
					        except:
 | 
				
			||||||
 | 
					            print("!", end="", flush=True)
 | 
				
			||||||
 | 
					            continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_categories():
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    categories = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    r = requests.get(URL)
 | 
				
			||||||
 | 
					    soup = bs4.BeautifulSoup(r.text, "html.parser")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for i in soup.find_all("i"):
 | 
				
			||||||
 | 
					        if i.get("class") is not None and len(i.get("class")) > 1 and "cat-" in i.get("class")[1]:
 | 
				
			||||||
 | 
					            category_id = i.get("class")[1][4:]
 | 
				
			||||||
 | 
					            category_name = i.get("title")[:i.get("title").find("-") - 1]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            categories.append((category_name, category_id))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return categories
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_courses(category):
 | 
				
			||||||
 | 
					    last_len = 0
 | 
				
			||||||
 | 
					    courses = []
 | 
				
			||||||
 | 
					    page = 0
 | 
				
			||||||
 | 
					    while True:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        page += 1
 | 
				
			||||||
 | 
					        r = request_timeout("https://viditut.com/ajax/category/" + category[1] + "/courses?page=" + str(page))
 | 
				
			||||||
 | 
					        soup = bs4.BeautifulSoup(json.loads(r.text)["html"], "html.parser")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for link in soup.find_all("a"):
 | 
				
			||||||
 | 
					            if link.get("href") is not None:
 | 
				
			||||||
 | 
					                if link.find("h3") is not None:
 | 
				
			||||||
 | 
					                    course_link = link.get("href")
 | 
				
			||||||
 | 
					                    course_name = link.find("h3").string
 | 
				
			||||||
 | 
					                    course_id = course_link.split("/")[-1:][0][:-7]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    courses.append((course_name, course_id, course_link))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        print("Page " + str(page) + " (" + str(len(courses)) + ")")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if last_len == len(courses):
 | 
				
			||||||
 | 
					            break
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        last_len = len(courses)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return courses
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					file = open("courses.txt", "w")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					for category in get_categories():
 | 
				
			||||||
 | 
					    print(category)
 | 
				
			||||||
 | 
					    for course in get_courses(category):
 | 
				
			||||||
 | 
					        print(course[0])
 | 
				
			||||||
 | 
					        file.write(category[1] + "\0" + course[0] + "\0" + course[1] + "\0" + course[2] + "\n")
 | 
				
			||||||
 | 
					    file.flush()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					file.close()
 | 
				
			||||||
							
								
								
									
										68
									
								
								viditut.com/crawler_videos.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										68
									
								
								viditut.com/crawler_videos.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,68 @@
 | 
				
			|||||||
 | 
					import requests
 | 
				
			||||||
 | 
					import bs4
 | 
				
			||||||
 | 
					import json
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def request_timeout(url):
 | 
				
			||||||
 | 
					    while True:
 | 
				
			||||||
 | 
					        try:
 | 
				
			||||||
 | 
					            return requests.get(url, timeout=30)
 | 
				
			||||||
 | 
					        except:
 | 
				
			||||||
 | 
					            print("!", end="", flush=True)
 | 
				
			||||||
 | 
					            continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_videos(course):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    videos = []
 | 
				
			||||||
 | 
					    r = request_timeout(course[2])
 | 
				
			||||||
 | 
					    soup = bs4.BeautifulSoup(r.text, "html.parser")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for link in soup.find_all("a"):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if link.get("class") is not None and str(link.get("class")) == "['item-name', 'video-name', 'ga']":
 | 
				
			||||||
 | 
					            video_id = link.get("data-ga-value")
 | 
				
			||||||
 | 
					            video_name = link.text.replace("\n", "").strip()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            videos.append((video_name, video_id))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return videos
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_links(course, video):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    links = []
 | 
				
			||||||
 | 
					    r = request_timeout("https://viditut.com/ajax/course/" + course[1] + "/" + video[1] + "/play")
 | 
				
			||||||
 | 
					    json_obj = json.loads(r.text)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if len(json.loads(r.text)) > 0:
 | 
				
			||||||
 | 
					        json_obj = json_obj[0]
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        return links
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for quality in json_obj["qualities"]:
 | 
				
			||||||
 | 
					        links.append((quality, json_obj["urls"][quality]))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return links
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					file = open("courses.txt", "r")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					fileout = open("links1.txt", "w")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					for line in file.read().splitlines():
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    category, course_name, course_id, course_url = line.split("\0")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    course = (course_name, course_id, course_url)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    print(course_name)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for video in get_videos(course):
 | 
				
			||||||
 | 
					        for link in get_links(course, video):
 | 
				
			||||||
 | 
					            fileout.write(category + "/" + course_name + "/" + video[0] + "\0" + link[0] + "\0" + link[1] + "\n")
 | 
				
			||||||
 | 
					        fileout.flush()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					fileout.close()
 | 
				
			||||||
 | 
					file.close()
 | 
				
			||||||
							
								
								
									
										38
									
								
								viditut.com/downloader.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								viditut.com/downloader.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,38 @@
 | 
				
			|||||||
 | 
					import pathlib
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					import requests
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					file = open("links1.txt", "r")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					i = 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					for line in file.read().splitlines():
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    path, quality, link = line.split("\0")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if quality != "720":
 | 
				
			||||||
 | 
					        continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    i += 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    pathlib.Path(os.path.split(path)[0]).mkdir(parents=True, exist_ok=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if os.path.isfile(os.path.split(path)[0] + os.sep + str(i) + " -" + os.path.split(path)[1] +
 | 
				
			||||||
 | 
					            "[" + quality + "].mp4"):
 | 
				
			||||||
 | 
					        continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    print(path)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    while True:
 | 
				
			||||||
 | 
					        try:
 | 
				
			||||||
 | 
					            response = requests.get(link, stream=True, timeout=5)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            with open(os.path.split(path)[0] + os.sep + str(i) + " -" + os.path.split(path)[1] +
 | 
				
			||||||
 | 
					                              "[" + quality + "].mp4", 'wb') as f:
 | 
				
			||||||
 | 
					                for chunk in response.iter_content(chunk_size=1024):
 | 
				
			||||||
 | 
					                    if chunk:
 | 
				
			||||||
 | 
					                        f.write(chunk)
 | 
				
			||||||
 | 
					            break
 | 
				
			||||||
 | 
					        except:
 | 
				
			||||||
 | 
					            print("!", end="", flush=True)
 | 
				
			||||||
 | 
					            continue
 | 
				
			||||||
							
								
								
									
										
											BIN
										
									
								
								viditut.com/links.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								viditut.com/links.txt
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user