Decentralised crawling should work in theory + temporary fix for going further than the maximum 10k results elasticsearch allows by default

This commit is contained in:
Simon
2018-06-21 19:44:27 -04:00
parent 098ad2be72
commit 14d384e366
9 changed files with 275 additions and 84 deletions

View File

@@ -168,7 +168,7 @@ class ElasticSearchEngine(SearchEngine):
"path": {"pre_tags": ["<mark>"], "post_tags": ["</mark>"]}
}
},
"size": per_page, "from": page * per_page}, index=self.index_name)
"size": per_page, "from": min(page * per_page, 10000 - per_page)}, index=self.index_name)
return page