mirror of
				https://github.com/simon987/od-database.git
				synced 2025-11-03 22:46:52 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			128 lines
		
	
	
		
			3.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			128 lines
		
	
	
		
			3.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
#! /usr/bin/env python
 | 
						|
 | 
						|
from threading import Thread
 | 
						|
from queue import Queue
 | 
						|
import os
 | 
						|
import time
 | 
						|
import ftputil
 | 
						|
import random
 | 
						|
 | 
						|
 | 
						|
class File:
 | 
						|
 | 
						|
    def __init__(self, name: str, size: int, mtime: str, path: str, is_dir: bool):
 | 
						|
        self.name = name
 | 
						|
        self.size = size
 | 
						|
        self.mtime = mtime
 | 
						|
        self.path = path
 | 
						|
        self.is_dir = is_dir
 | 
						|
 | 
						|
    def __str__(self):
 | 
						|
        return ("DIR " if self.is_dir else "FILE ") + self.path + "/" + self.name
 | 
						|
 | 
						|
 | 
						|
class FTPConnection(object):
 | 
						|
    def __init__(self, host):
 | 
						|
        self.host = host
 | 
						|
        self.failed_attempts = 0
 | 
						|
        self.max_attempts = 5
 | 
						|
        self.stop_when_connected()
 | 
						|
        self._list_fn = None
 | 
						|
 | 
						|
    def _connect(self):
 | 
						|
        # attempt an anonymous FTP connection
 | 
						|
        print("CONNECT %s ATTEMPT", self.host)
 | 
						|
        self.ftp = ftputil.FTPHost(self.host, "anonymous", "od-database")
 | 
						|
        print("CONNECT %s SUCCESS", self.host)
 | 
						|
 | 
						|
    def stop_when_connected(self):
 | 
						|
        # continually tries to reconnect ad infinitum
 | 
						|
        # TODO: Max retries
 | 
						|
        try:
 | 
						|
            self._connect()
 | 
						|
        except Exception:
 | 
						|
            print("CONNECT %s FAILED; trying again...", self.host)
 | 
						|
            time.sleep(5 * random.uniform(0.5, 1.5))
 | 
						|
            self.stop_when_connected()
 | 
						|
 | 
						|
    def list(self, path) -> list:
 | 
						|
        results = []
 | 
						|
        self.ftp.chdir(path)
 | 
						|
        file_names = self.ftp.listdir(path)
 | 
						|
 | 
						|
        for file_name in file_names:
 | 
						|
            stat = self.ftp.stat(file_name)
 | 
						|
            is_dir = self.ftp.path.isdir(os.path.join(path, file_name))
 | 
						|
 | 
						|
            results.append(File(
 | 
						|
                name=file_name,
 | 
						|
                mtime=stat.st_mtime,
 | 
						|
                size=-1 if is_dir else stat.st_size,
 | 
						|
                is_dir=is_dir,
 | 
						|
                path=path
 | 
						|
            ))
 | 
						|
 | 
						|
        return results
 | 
						|
 | 
						|
    def process_path(self, path):
 | 
						|
        while self.failed_attempts < self.max_attempts:
 | 
						|
            try:
 | 
						|
                results = self.list(path)
 | 
						|
                self.failed_attempts = 0
 | 
						|
                return results
 | 
						|
            except Exception as e:
 | 
						|
                print(e)
 | 
						|
                self.failed_attempts += 1
 | 
						|
                self.ftp.close()
 | 
						|
                print("LIST FAILED; reconnecting...")
 | 
						|
                time.sleep(2 * random.uniform(0.5, 1.5))
 | 
						|
                self.stop_when_connected()
 | 
						|
 | 
						|
        # if I get here, I never succeeded in getting the data
 | 
						|
        print("LIST ABANDONED %s", path)
 | 
						|
        self.failed_attempts = 0
 | 
						|
        return []
 | 
						|
 | 
						|
 | 
						|
def process_and_queue(host, q: Queue):
 | 
						|
 | 
						|
    ftp = FTPConnection(host)
 | 
						|
 | 
						|
    while True:
 | 
						|
        file = q.get()
 | 
						|
 | 
						|
        if file.is_dir:
 | 
						|
            print(file)
 | 
						|
            listing = ftp.process_path(os.path.join(file.path, file.name))
 | 
						|
            for f in listing:
 | 
						|
                q.put(f)
 | 
						|
        else:
 | 
						|
            pass
 | 
						|
 | 
						|
        q.task_done()
 | 
						|
 | 
						|
 | 
						|
def do_the_thing():
 | 
						|
 | 
						|
    host = "80.252.155.68"
 | 
						|
    ftp = FTPConnection(host)
 | 
						|
    root_listing = ftp.process_path("/")
 | 
						|
    ftp.ftp.close()
 | 
						|
 | 
						|
    q = Queue(maxsize=0)
 | 
						|
    num_threads = 10
 | 
						|
 | 
						|
    for i in range(num_threads):
 | 
						|
        worker = Thread(target=process_and_queue, args=(host, q,))
 | 
						|
        worker.setDaemon(True)
 | 
						|
        worker.start()
 | 
						|
 | 
						|
    for file in root_listing:
 | 
						|
        q.put(file)
 | 
						|
 | 
						|
    q.join()
 | 
						|
 | 
						|
 | 
						|
if __name__ == '__main__':
 | 
						|
    do_the_thing()
 |