2018-02-27 16:31:54 -05:00

143 lines
3.8 KiB
Python

import requests
from bs4 import BeautifulSoup
import multiprocessing
import os
username = ""
password = ""
headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate, br",
"Upgrade-Insecure-Requests": "1",
"Referer": "https://fontstruct.com/",
"Connection": "keep-alive"
}
font_ids = []
def request_timeout(url):
while True:
try:
return requests.get(url, timeout=30, headers=headers)
except Exception as e:
print("!", end="", flush=True)
continue
def login():
r1 = request_timeout("https://fontstruct.com/login")
print(r1.cookies)
login_headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate, br",
"Upgrade-Insecure-Requests": "1",
"Referer": "https://fontstruct.com/login",
"Connection": "keep-alive"
}
payload = {"_username": username, "_password": password, "_csrf_token": "", "_submit": "Sign+In"}
r = requests.post("https://fontstruct.com/login_check", headers=login_headers, data=payload, cookies=r1.cookies)
print(r.cookies)
print(len(r.text))
print(r.headers)
return r.history[0]
def get_font_ids(page_url):
print(page_url)
r = request_timeout(page_url)
soup = BeautifulSoup(r.text, "html.parser")
for a in soup.findAll("a"):
href = a.get("href")
if href is not None and href.startswith("/fontstructions") and href.find("/license/") == -1 and\
href.find("/vote_breakdown/") == -1:
font_id = href[href.find("show/")+5:href.rfind("/")]
if font_id not in font_ids:
font_ids.append(font_id)
with open("fonts.txt", "a") as f:
f.write(font_id + "\n")
def get_fonts():
page_urls = []
for page_num in range(1, 1428):
page_urls.append("https://fontstruct.com/gallery?filters=all&page=" + str(page_num))
pool = multiprocessing.Pool(processes=25)
pool.map(get_font_ids, page_urls)
def download_font(font_id):
dl_headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate, br",
"Upgrade-Insecure-Requests": "1",
"Referer": "https://fontstruct.com/fontstructions/download/" + font_id,
"Connection": "keep-alive"
}
dl_url = "https://fontstruct.com/font_archives/download/" + font_id
while True:
r = requests.get(dl_url, stream=True, headers=dl_headers, cookies=cookies)
if r.status_code == 403:
return
if r.status_code == 500:
continue
if "Content-Disposition" not in r.headers:
print(r.text)
return
file_path = "fonts/" + r.headers["Content-Disposition"][r.headers["Content-Disposition"].rfind("'") + 1:]
if os.path.exists(file_path):
return
print(file_path)
with open(file_path, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
return
def download_all():
pool = multiprocessing.Pool(processes=25)
with open("fonts.txt", "r") as f:
pool.map(download_font, f.read().splitlines())
cookies = login().cookies
# get_fonts()
download_all()