mirror of
https://github.com/simon987/Misc-Download-Scripts.git
synced 2025-04-04 04:52:58 +00:00
Added progression and resume option
This commit is contained in:
parent
894c4f1328
commit
8212793dc5
@ -8,6 +8,9 @@ import multiprocessing
|
|||||||
h = HTML2Text()
|
h = HTML2Text()
|
||||||
h.images_to_alt = True
|
h.images_to_alt = True
|
||||||
|
|
||||||
|
counter = multiprocessing.Value("i", 0)
|
||||||
|
total = multiprocessing.Value("i", 0)
|
||||||
|
|
||||||
|
|
||||||
def request_timeout(url):
|
def request_timeout(url):
|
||||||
while True:
|
while True:
|
||||||
@ -41,61 +44,74 @@ def get_entries():
|
|||||||
|
|
||||||
def download_entry(url):
|
def download_entry(url):
|
||||||
|
|
||||||
r = request_timeout("https://opengameart.org" + url)
|
global counter
|
||||||
soup = BeautifulSoup(r.text, "html.parser")
|
global total
|
||||||
|
counter.value += 1
|
||||||
|
print(str(counter.value) + "/" + str(total.value) + " {0:.2f}".format(counter.value / total.value * 100) + "%")
|
||||||
|
|
||||||
simple_title = os.path.split(url)[1]
|
simple_title = os.path.split(url)[1]
|
||||||
|
|
||||||
if not os.path.exists(simple_title + os.sep + "metadata.json"):
|
if not os.path.exists("entries" + os.sep + simple_title + os.sep + "metadata.json"):
|
||||||
|
|
||||||
|
r = request_timeout("https://opengameart.org" + url)
|
||||||
|
soup = BeautifulSoup(r.text, "html.parser")
|
||||||
|
|
||||||
metadata = dict()
|
metadata = dict()
|
||||||
|
|
||||||
metadata["title"] = list(soup.find("div", attrs={"property": "dc:title"}).children)[0].text
|
try:
|
||||||
metadata["tags"] = list()
|
|
||||||
for tag in soup.find_all("a", attrs={"property": "rdfs:label skos:prefLabel"}):
|
|
||||||
metadata["tags"].append(tag.text)
|
|
||||||
|
|
||||||
metadata["description"] = h.handle(str(soup.find("div", attrs={"class": "field-item even", "property": "content:encoded"}))).strip()
|
metadata["title"] = list(soup.find("div", attrs={"property": "dc:title"}).children)[0].text
|
||||||
metadata["attribution"] = h.handle(str(soup.find("div", attrs={"class": "field field-name-field-art-attribution field-type-text-long field-label-above"}))).strip()
|
metadata["tags"] = list()
|
||||||
metadata["license"] = soup.find("div", attrs={"class": "license-name"}).text
|
tag_list = soup.find_all("a", attrs={"property": "rdfs:label skos:prefLabel"})
|
||||||
metadata["type"] = soup.find("a", attrs={"property": "rdfs:label skos:prefLabel", "typeof": "skos:Concept"}).text
|
if tag_list is not None:
|
||||||
|
for tag in tag_list:
|
||||||
|
metadata["tags"].append(tag.text)
|
||||||
|
|
||||||
path = "entries" + os.sep + simple_title
|
metadata["description"] = h.handle(str(soup.find("div", attrs={"class": "field-item even", "property": "content:encoded"}))).strip()
|
||||||
if not os.path.exists(path):
|
metadata["attribution"] = h.handle(str(soup.find("div", attrs={"class": "field field-name-field-art-attribution field-type-text-long field-label-above"}))).strip()
|
||||||
os.mkdir(path)
|
metadata["license"] = soup.find("div", attrs={"class": "license-name"}).text
|
||||||
|
metadata["type"] = soup.find("a", attrs={"property": "rdfs:label skos:prefLabel", "typeof": "skos:Concept"}).text
|
||||||
|
|
||||||
for file in soup.find_all("span", attrs={"class", "file"}):
|
path = "entries" + os.sep + simple_title
|
||||||
link = file.find("a").get("href")
|
if not os.path.exists(path):
|
||||||
|
os.mkdir(path)
|
||||||
|
|
||||||
if not os.path.exists(path + os.sep + os.path.split(link)[1]):
|
for file in soup.find_all("span", attrs={"class", "file"}):
|
||||||
print(link)
|
link = file.find("a").get("href")
|
||||||
|
|
||||||
while True:
|
if not os.path.exists(path + os.sep + os.path.split(link)[1]):
|
||||||
try:
|
|
||||||
response = requests.get(link, stream=True, timeout=8)
|
|
||||||
|
|
||||||
with open(path + os.sep + os.path.split(link)[1], 'wb') as f:
|
while True:
|
||||||
for chunk in response.iter_content(chunk_size=1024):
|
try:
|
||||||
if chunk:
|
response = requests.get(link, stream=True, timeout=8)
|
||||||
f.write(chunk)
|
|
||||||
break
|
|
||||||
except:
|
|
||||||
print("!")
|
|
||||||
|
|
||||||
with open(path + os.sep + "metadata.json", "w") as f:
|
with open(path + os.sep + os.path.split(link)[1], 'wb') as f:
|
||||||
json.dump(metadata, f)
|
for chunk in response.iter_content(chunk_size=1024):
|
||||||
|
if chunk:
|
||||||
|
f.write(chunk)
|
||||||
|
break
|
||||||
|
except:
|
||||||
|
print("!")
|
||||||
|
|
||||||
|
with open(path + os.sep + "metadata.json", "w") as f:
|
||||||
|
json.dump(metadata, f)
|
||||||
|
except:
|
||||||
|
print("ERROR " + url)
|
||||||
|
|
||||||
|
|
||||||
def download_all():
|
def download_all():
|
||||||
|
global total
|
||||||
pool = multiprocessing.Pool(processes=25)
|
pool = multiprocessing.Pool(processes=25)
|
||||||
|
|
||||||
with open("entries.txt", "r") as f:
|
with open("entries.txt", "r") as f:
|
||||||
pool.map(download_entry, f.read().splitlines())
|
lines = f.read().splitlines()
|
||||||
|
total.value = len(lines)
|
||||||
|
pool.map(download_entry, lines)
|
||||||
|
|
||||||
|
|
||||||
if not os.path.exists("entries"):
|
if not os.path.exists("entries"):
|
||||||
os.mkdir("entries")
|
os.mkdir("entries")
|
||||||
|
|
||||||
|
|
||||||
get_entries()
|
# get_entries()
|
||||||
download_all()
|
download_all()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user