mirror of
				https://github.com/simon987/od-database.git
				synced 2025-11-04 06:56:52 +00:00 
			
		
		
		
	File upload is made in small chunks
This commit is contained in:
		
							parent
							
								
									73afebec28
								
							
						
					
					
						commit
						898ffcf410
					
				
							
								
								
									
										35
									
								
								app.py
									
									
									
									
									
								
							
							
						
						
									
										35
									
								
								app.py
									
									
									
									
									
								
							@ -562,15 +562,7 @@ def api_complete_task():
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        if task:
 | 
					        if task:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            if "file_list" in request.files:
 | 
					            filename = "./tmp/" + str(task_result.website_id) + ".json"
 | 
				
			||||||
                file = request.files['file_list']
 | 
					 | 
				
			||||||
                filename = "./tmp/" + str(task_result.website_id) + ".json"
 | 
					 | 
				
			||||||
                print("Saving temp file " + filename + " ...")
 | 
					 | 
				
			||||||
                file.save(filename)
 | 
					 | 
				
			||||||
                print("Done")
 | 
					 | 
				
			||||||
            else:
 | 
					 | 
				
			||||||
                filename = None
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            taskManager.complete_task(filename, task, task_result, name)
 | 
					            taskManager.complete_task(filename, task, task_result, name)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            if filename and os.path.exists(filename):
 | 
					            if filename and os.path.exists(filename):
 | 
				
			||||||
@ -585,6 +577,31 @@ def api_complete_task():
 | 
				
			|||||||
            return "No such task"
 | 
					            return "No such task"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@app.route("/api/task/upload", methods=["POST"])
 | 
				
			||||||
 | 
					def api_upload():
 | 
				
			||||||
 | 
					    token = request.form.get("token")
 | 
				
			||||||
 | 
					    website_id = request.form.get("website_id")
 | 
				
			||||||
 | 
					    name = db.check_api_token(token)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if name:
 | 
				
			||||||
 | 
					        if "file_list" in request.files:
 | 
				
			||||||
 | 
					            file = request.files['file_list']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            filename = "./tmp/" + str(website_id) + ".json"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            if os.path.exists(filename):
 | 
				
			||||||
 | 
					                print("Appending chunk to existing file...")
 | 
				
			||||||
 | 
					                with open(filename, "ab") as f:
 | 
				
			||||||
 | 
					                    f.write(file.stream.read())
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
 | 
					                print("Saving temp file " + filename + " ...")
 | 
				
			||||||
 | 
					                file.save(filename)
 | 
				
			||||||
 | 
					                print("Done")
 | 
				
			||||||
 | 
					        return "ok"
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        return abort(403)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@app.route("/api/website/by_url", methods=["GET"])
 | 
					@app.route("/api/website/by_url", methods=["GET"])
 | 
				
			||||||
def api_website_by_url():
 | 
					def api_website_by_url():
 | 
				
			||||||
    token = request.args.get("token")
 | 
					    token = request.args.get("token")
 | 
				
			||||||
 | 
				
			|||||||
@ -45,21 +45,31 @@ class TaskManager:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        try:
 | 
					        try:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            logger.info("Uploading file list in small chunks")
 | 
				
			||||||
 | 
					            filename = "./crawled/" + str(task_result.website_id) + ".json"
 | 
				
			||||||
 | 
					            CHUNK_SIZE = 1000000 * 10
 | 
				
			||||||
 | 
					            with open(filename) as f:
 | 
				
			||||||
 | 
					                chunk = f.read(CHUNK_SIZE)
 | 
				
			||||||
 | 
					                while chunk:
 | 
				
			||||||
 | 
					                    payload = {
 | 
				
			||||||
 | 
					                        "token": config.API_TOKEN,
 | 
				
			||||||
 | 
					                        "website_id": task_result.website_id
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    files = {
 | 
				
			||||||
 | 
					                        "file_list": chunk
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    r = requests.post(config.SERVER_URL + "/task/upload", data=payload, files=files)
 | 
				
			||||||
 | 
					                    logger.info("RESPONSE: " + r.text)
 | 
				
			||||||
 | 
					                    chunk = f.read(CHUNK_SIZE)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            payload = {
 | 
					            payload = {
 | 
				
			||||||
                "token": config.API_TOKEN,
 | 
					                "token": config.API_TOKEN,
 | 
				
			||||||
                "result": json.dumps(task_result.to_json())
 | 
					                "result": json.dumps(task_result.to_json())
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            filename = "./crawled/" + str(task_result.website_id) + ".json"
 | 
					            r = requests.post(config.SERVER_URL + "/task/complete", data=payload)
 | 
				
			||||||
            if os.path.exists(filename):
 | 
					 | 
				
			||||||
                files = {
 | 
					 | 
				
			||||||
                    "file_list": open(filename)
 | 
					 | 
				
			||||||
                }
 | 
					 | 
				
			||||||
            else:
 | 
					 | 
				
			||||||
                files = None
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            r = requests.post(config.SERVER_URL + "/task/complete", data=payload, files=files)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            logger.info("RESPONSE: " + r.text)
 | 
					            logger.info("RESPONSE: " + r.text)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            if os.path.exists(filename):
 | 
					            if os.path.exists(filename):
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user