initial commit

This commit is contained in:
simon 2019-12-17 10:35:14 -05:00
parent d1bd0a6bb4
commit b8d1c2bcc3
6 changed files with 225 additions and 0 deletions

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
.idea/
*.iml

15
README.md Normal file
View File

@ -0,0 +1,15 @@
*In early development*
Python `requests` wrapper with VanwaNet DDoS protection bypass
`node` environment is required
### Example usage
```python
s = vanwanet_scrape.Scraper()
r = s.get("https://8kun.top/index.html", timeout=10)
```
```
git+git://github.com/simon987/vanwanet_scrape.git
```

13
setup.py Normal file
View File

@ -0,0 +1,13 @@
from setuptools import setup
setup(
name="vanwanet_scrape",
version="1.0",
description="VanwaTech DDoS protection bypass",
author="simon987",
author_email="me@simon987.net",
packages=["vanwanet_scrape"],
install_requires=[
"requests", "bs4", "git+git://github.com/simon987/hexlib.git",
]
)

View File

123
vanwanet_scrape/aes.js Normal file

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,72 @@
import re
import subprocess
from http.cookiejar import CookieJar
import requests
from bs4 import BeautifulSoup
from hexlib.web import cookie_from_string
with open("aes.js", "r") as f:
AES = f.read()
SUB_PATTRN = re.compile(r'document\.cookie="(.+)";location.+$')
class Scraper:
def __init__(self, domains: list, headers=None, proxies=None):
self._session = requests.session()
self._domains = domains
self._session.cookies = CookieJar()
if headers:
self._session.headers = headers
if proxies:
self._session.proxies = proxies
def _get(self, url, **kwargs):
return self._session.get(url, **kwargs)
def get(self, url, **kwargs):
r = self._get(url, **kwargs)
if Scraper._is_challenge_page(r):
cookie = Scraper._execute_challenge(Scraper._transform_js(Scraper._get_js(r)))
for domain in self._domains:
self._session.cookies.set_cookie(cookie_from_string(cookie, domain))
return self.get(url, **kwargs)
return r
@staticmethod
def _is_challenge_page(r):
if r.text.startswith("<iframe ") and "VanwaNetDDoSMitigation=" in r.text:
return True
return False
@staticmethod
def _get_js(r):
soup = BeautifulSoup(r.text, "html.parser")
return soup.find("script", src=lambda x: not x).text
@staticmethod
def _transform_js(js):
# Print cookie to console instead
challenge = SUB_PATTRN.sub(r'console.log("\1");', js)
return AES + challenge
@staticmethod
def _execute_challenge(js):
# TODO: run in some kind of sandbox
node = subprocess.Popen(
["node", "-e", js], stdout=subprocess.PIPE, stderr=subprocess.PIPE,
universal_newlines=True
)
result, stderr = node.communicate()
if stderr != "":
raise ValueError(stderr)
return result