mirror of
https://github.com/simon987/vanwanet_scrape.git
synced 2025-04-04 08:12:59 +00:00
initial commit
This commit is contained in:
parent
d1bd0a6bb4
commit
b8d1c2bcc3
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
.idea/
|
||||
*.iml
|
15
README.md
Normal file
15
README.md
Normal file
@ -0,0 +1,15 @@
|
||||
*In early development*
|
||||
|
||||
Python `requests` wrapper with VanwaNet DDoS protection bypass
|
||||
|
||||
`node` environment is required
|
||||
|
||||
### Example usage
|
||||
```python
|
||||
s = vanwanet_scrape.Scraper()
|
||||
r = s.get("https://8kun.top/index.html", timeout=10)
|
||||
```
|
||||
|
||||
```
|
||||
git+git://github.com/simon987/vanwanet_scrape.git
|
||||
```
|
13
setup.py
Normal file
13
setup.py
Normal file
@ -0,0 +1,13 @@
|
||||
from setuptools import setup
|
||||
|
||||
setup(
|
||||
name="vanwanet_scrape",
|
||||
version="1.0",
|
||||
description="VanwaTech DDoS protection bypass",
|
||||
author="simon987",
|
||||
author_email="me@simon987.net",
|
||||
packages=["vanwanet_scrape"],
|
||||
install_requires=[
|
||||
"requests", "bs4", "git+git://github.com/simon987/hexlib.git",
|
||||
]
|
||||
)
|
0
vanwanet_scrape/__init__.py
Normal file
0
vanwanet_scrape/__init__.py
Normal file
123
vanwanet_scrape/aes.js
Normal file
123
vanwanet_scrape/aes.js
Normal file
File diff suppressed because one or more lines are too long
72
vanwanet_scrape/scraper.py
Normal file
72
vanwanet_scrape/scraper.py
Normal file
@ -0,0 +1,72 @@
|
||||
import re
|
||||
import subprocess
|
||||
from http.cookiejar import CookieJar
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from hexlib.web import cookie_from_string
|
||||
|
||||
with open("aes.js", "r") as f:
|
||||
AES = f.read()
|
||||
|
||||
SUB_PATTRN = re.compile(r'document\.cookie="(.+)";location.+$')
|
||||
|
||||
|
||||
class Scraper:
|
||||
|
||||
def __init__(self, domains: list, headers=None, proxies=None):
|
||||
self._session = requests.session()
|
||||
self._domains = domains
|
||||
self._session.cookies = CookieJar()
|
||||
|
||||
if headers:
|
||||
self._session.headers = headers
|
||||
if proxies:
|
||||
self._session.proxies = proxies
|
||||
|
||||
def _get(self, url, **kwargs):
|
||||
return self._session.get(url, **kwargs)
|
||||
|
||||
def get(self, url, **kwargs):
|
||||
r = self._get(url, **kwargs)
|
||||
|
||||
if Scraper._is_challenge_page(r):
|
||||
cookie = Scraper._execute_challenge(Scraper._transform_js(Scraper._get_js(r)))
|
||||
|
||||
for domain in self._domains:
|
||||
self._session.cookies.set_cookie(cookie_from_string(cookie, domain))
|
||||
|
||||
return self.get(url, **kwargs)
|
||||
return r
|
||||
|
||||
@staticmethod
|
||||
def _is_challenge_page(r):
|
||||
if r.text.startswith("<iframe ") and "VanwaNetDDoSMitigation=" in r.text:
|
||||
return True
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def _get_js(r):
|
||||
soup = BeautifulSoup(r.text, "html.parser")
|
||||
return soup.find("script", src=lambda x: not x).text
|
||||
|
||||
@staticmethod
|
||||
def _transform_js(js):
|
||||
# Print cookie to console instead
|
||||
challenge = SUB_PATTRN.sub(r'console.log("\1");', js)
|
||||
return AES + challenge
|
||||
|
||||
@staticmethod
|
||||
def _execute_challenge(js):
|
||||
|
||||
# TODO: run in some kind of sandbox
|
||||
node = subprocess.Popen(
|
||||
["node", "-e", js], stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
||||
universal_newlines=True
|
||||
)
|
||||
result, stderr = node.communicate()
|
||||
|
||||
if stderr != "":
|
||||
raise ValueError(stderr)
|
||||
|
||||
return result
|
Loading…
x
Reference in New Issue
Block a user