2018-02-27 16:31:54 -05:00

122 lines
2.9 KiB
Python

import requests
from bs4 import BeautifulSoup
import multiprocessing
import os
proxy_index = 0
proxies = {
"http": ""
}
headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate, br",
"Upgrade-Insecure-Requests": "1"
}
def already_downloaded(font_id):
with open("downloaded.txt", "r") as f:
return font_id in f.read().splitlines()
def flag_downloaded(font_id):
with open("downloaded.txt", "a") as f:
f.write(font_id + "\n")
def get_new_proxy():
global proxy_index
with open("proxies.txt", "r") as f:
line = f.read().splitlines()[proxy_index]
proxies["http"] = line
print("Switched to proxy " + line)
proxy_index += 1
def request_timeout(url):
while True:
try:
return requests.get(url, timeout=30)
except Exception as e:
print("!", end="", flush=True)
continue
def get_dl_links(url):
print(url)
r = request_timeout(url)
soup = BeautifulSoup(r.text, "html.parser")
for a in soup.findAll("a"):
if a.get("data-font-id") is not None:
with open("fonts.txt", "a") as f:
f.write(a.get("data-font-id") + "\n")
def get_fonts():
letters = list("ABCDEFGHIJKLMNOPQRSTUVWXYZ")
letters.append("Numbers")
all_page_links = []
for letter in letters:
all_page_links.append("http://www.abstractfonts.com/alpha/" + letter)
pool = multiprocessing.Pool(processes=25)
pool.map(get_dl_links, all_page_links)
def download_font(font_id):
if already_downloaded(font_id):
return
while True:
try:
r = requests.get("http://www.abstractfonts.com/download/" + font_id, stream=True, proxies=proxies, headers=headers, timeout=5)
if r.status_code == 404:
print(str(r.status_code) + " - http://www.abstractfonts.com/download/" + font_id)
get_new_proxy()
return
if "Content-Disposition" not in r.headers:
print(r.text)
get_new_proxy()
return
file_path = "fonts/" + r.headers["Content-Disposition"][r.headers["Content-Disposition"].rfind("\"", 0, -2) + 1:-1]
if os.path.exists(file_path):
return
print(file_path)
with open(file_path, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
flag_downloaded(font_id)
break
except:
get_new_proxy()
continue
return
# get_fonts()
get_new_proxy()
pool = multiprocessing.Pool(processes=100)
with open("fonts.txt", "r") as f1:
pool.map(download_font, f1.read().splitlines())