mirror of
https://github.com/simon987/vanwanet_scrape.git
synced 2025-04-10 14:06:43 +00:00
initial commit
This commit is contained in:
parent
d1bd0a6bb4
commit
b8d1c2bcc3
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
.idea/
|
||||||
|
*.iml
|
15
README.md
Normal file
15
README.md
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
*In early development*
|
||||||
|
|
||||||
|
Python `requests` wrapper with VanwaNet DDoS protection bypass
|
||||||
|
|
||||||
|
`node` environment is required
|
||||||
|
|
||||||
|
### Example usage
|
||||||
|
```python
|
||||||
|
s = vanwanet_scrape.Scraper()
|
||||||
|
r = s.get("https://8kun.top/index.html", timeout=10)
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
git+git://github.com/simon987/vanwanet_scrape.git
|
||||||
|
```
|
13
setup.py
Normal file
13
setup.py
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
from setuptools import setup
|
||||||
|
|
||||||
|
setup(
|
||||||
|
name="vanwanet_scrape",
|
||||||
|
version="1.0",
|
||||||
|
description="VanwaTech DDoS protection bypass",
|
||||||
|
author="simon987",
|
||||||
|
author_email="me@simon987.net",
|
||||||
|
packages=["vanwanet_scrape"],
|
||||||
|
install_requires=[
|
||||||
|
"requests", "bs4", "git+git://github.com/simon987/hexlib.git",
|
||||||
|
]
|
||||||
|
)
|
0
vanwanet_scrape/__init__.py
Normal file
0
vanwanet_scrape/__init__.py
Normal file
123
vanwanet_scrape/aes.js
Normal file
123
vanwanet_scrape/aes.js
Normal file
File diff suppressed because one or more lines are too long
72
vanwanet_scrape/scraper.py
Normal file
72
vanwanet_scrape/scraper.py
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
import re
|
||||||
|
import subprocess
|
||||||
|
from http.cookiejar import CookieJar
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from hexlib.web import cookie_from_string
|
||||||
|
|
||||||
|
with open("aes.js", "r") as f:
|
||||||
|
AES = f.read()
|
||||||
|
|
||||||
|
SUB_PATTRN = re.compile(r'document\.cookie="(.+)";location.+$')
|
||||||
|
|
||||||
|
|
||||||
|
class Scraper:
|
||||||
|
|
||||||
|
def __init__(self, domains: list, headers=None, proxies=None):
|
||||||
|
self._session = requests.session()
|
||||||
|
self._domains = domains
|
||||||
|
self._session.cookies = CookieJar()
|
||||||
|
|
||||||
|
if headers:
|
||||||
|
self._session.headers = headers
|
||||||
|
if proxies:
|
||||||
|
self._session.proxies = proxies
|
||||||
|
|
||||||
|
def _get(self, url, **kwargs):
|
||||||
|
return self._session.get(url, **kwargs)
|
||||||
|
|
||||||
|
def get(self, url, **kwargs):
|
||||||
|
r = self._get(url, **kwargs)
|
||||||
|
|
||||||
|
if Scraper._is_challenge_page(r):
|
||||||
|
cookie = Scraper._execute_challenge(Scraper._transform_js(Scraper._get_js(r)))
|
||||||
|
|
||||||
|
for domain in self._domains:
|
||||||
|
self._session.cookies.set_cookie(cookie_from_string(cookie, domain))
|
||||||
|
|
||||||
|
return self.get(url, **kwargs)
|
||||||
|
return r
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _is_challenge_page(r):
|
||||||
|
if r.text.startswith("<iframe ") and "VanwaNetDDoSMitigation=" in r.text:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_js(r):
|
||||||
|
soup = BeautifulSoup(r.text, "html.parser")
|
||||||
|
return soup.find("script", src=lambda x: not x).text
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _transform_js(js):
|
||||||
|
# Print cookie to console instead
|
||||||
|
challenge = SUB_PATTRN.sub(r'console.log("\1");', js)
|
||||||
|
return AES + challenge
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _execute_challenge(js):
|
||||||
|
|
||||||
|
# TODO: run in some kind of sandbox
|
||||||
|
node = subprocess.Popen(
|
||||||
|
["node", "-e", js], stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
||||||
|
universal_newlines=True
|
||||||
|
)
|
||||||
|
result, stderr = node.communicate()
|
||||||
|
|
||||||
|
if stderr != "":
|
||||||
|
raise ValueError(stderr)
|
||||||
|
|
||||||
|
return result
|
Loading…
x
Reference in New Issue
Block a user