Misc-Download-Scripts/software.intel.com/run.py

import requests
from bs4 import BeautifulSoup
import os
import pdfkit
from urllib.parse import urljoin
import youtube_dl


articles = []
videos = []
kits = []


def get_articles():

    for page in range(0, 10):
        r = requests.get("https://software.intel.com/en-us/ai-academy/library?page=" + str(page))
        soup = BeautifulSoup(r.text, "html.parser")
        for link in soup.find_all("a"):
            if link.get("href") is not None and link.get("href").startswith("/en-us/articles/"):
                if link.string is not None:
                    articles.append((link.get("href"), link.string))

            if link.get("href") is not None and link.get("href").startswith("/en-us/videos/"):
                if link.string is not None:
                    videos.append((link.get("href"), link.string))

        print(str(len(articles)) + " articles")
        print(str(len(videos)) + " videos")


def get_kits():

    r = requests.get("https://software.intel.com/en-us/ai-academy/students/kits")
    soup = BeautifulSoup(r.text, "html.parser")

    for link in soup.find_all("a"):
        if link.string is not None and link.string == "Get Started":

            kits.append(link.get("href"))


def download_article(article):

    if not os.path.exists("articles"):
        os.mkdir("articles")

    if not os.path.isfile("articles/" + article[1] + ".pdf"):
        pdfkit.from_url(urljoin("https://software.intel.com/", article[0]), "articles/" + article[1] + ".pdf")


def download_video(video):

    if not os.path.exists("videos"):
        os.mkdir("videos")

    options = {"outtmpl": "videos/%(title)s.%(ext)s"}

    ytd = youtube_dl.YoutubeDL(options)
    ytd.download([urljoin("https://software.intel.com/", video[0])])


def download_file(url, destination):
    while True:
        try:
            response = requests.get(url, stream=True, timeout=10)

            if not os.path.exists(destination) and response.status_code == 200:
                with open(destination, 'wb') as f:
                    for chunk in response.iter_content(chunk_size=1024):
                        if chunk:
                            f.write(chunk)
            break
        except:
            print("!")


def download_kit(kit_url):
    if not os.path.exists("kits"):
        os.mkdir("kits")

    kit_url = urljoin("https://software.intel.com/", kit_url)

    r = requests.get(kit_url)
    soup = BeautifulSoup(r.text, "html.parser")

    kit_title = soup.find("title").string

    if not os.path.exists("kits/" + kit_title):
        os.mkdir("kits/" + kit_title)

    pdfkit.from_url(kit_url, "kits/" + kit_title + "/kit.pdf")

    for link in soup.find_all("a"):

        target = link.get("href")

        if target is not None and target.endswith(".zip"):
            download_file(urljoin("https://software.intel.com/", target), "kits/" + kit_title + "/" + os.path.split(target)[1])


# get_articles()
get_kits()

for k in kits:
    download_kit(k)
#
# for a in articles:
#     download_article(a)
#
# for v in videos:
#     download_video(v)