mirror of
https://github.com/terorie/od-database-crawler.git
synced 2025-04-16 08:56:44 +00:00
Performance improvements
This commit is contained in:
parent
a12bca01c8
commit
ed5e35f005
21
crawl.go
21
crawl.go
@ -2,14 +2,14 @@ package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/base64"
|
||||
"fmt"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/terorie/oddb-go/ds/redblackhash"
|
||||
"github.com/terorie/oddb-go/fasturl"
|
||||
"github.com/valyala/fasthttp"
|
||||
"golang.org/x/crypto/blake2b"
|
||||
"golang.org/x/net/html"
|
||||
"golang.org/x/net/html/atom"
|
||||
"net/url"
|
||||
"path"
|
||||
"strconv"
|
||||
"strings"
|
||||
@ -18,12 +18,12 @@ import (
|
||||
|
||||
var client fasthttp.Client
|
||||
|
||||
func GetDir(j *Job, f *File) (links []url.URL, err error) {
|
||||
func GetDir(j *Job, f *File) (links []fasturl.URL, err error) {
|
||||
f.IsDir = true
|
||||
f.Name = path.Base(j.Uri.Path)
|
||||
|
||||
req := fasthttp.AcquireRequest()
|
||||
req.SetRequestURI(j.Uri.String())
|
||||
req.SetRequestURI(j.UriStr)
|
||||
|
||||
res := fasthttp.AcquireResponse()
|
||||
defer fasthttp.ReleaseResponse(res)
|
||||
@ -94,11 +94,10 @@ func GetDir(j *Job, f *File) (links []url.URL, err error) {
|
||||
}
|
||||
}
|
||||
|
||||
subref, err := url.Parse(href)
|
||||
var link fasturl.URL
|
||||
err = j.Uri.ParseRel(&link, href)
|
||||
if err != nil { continue }
|
||||
|
||||
link := *j.Uri.ResolveReference(subref)
|
||||
|
||||
if link.Scheme != j.Uri.Scheme ||
|
||||
link.Host != j.Uri.Host ||
|
||||
link.Path == j.Uri.Path ||
|
||||
@ -116,7 +115,7 @@ func GetDir(j *Job, f *File) (links []url.URL, err error) {
|
||||
return
|
||||
}
|
||||
|
||||
func GetFile(u url.URL, f *File) (err error) {
|
||||
func GetFile(u fasturl.URL, f *File) (err error) {
|
||||
f.IsDir = false
|
||||
u.Path = path.Clean(u.Path)
|
||||
f.Name = path.Base(u.Path)
|
||||
@ -145,7 +144,7 @@ func GetFile(u url.URL, f *File) (err error) {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f *File) HashDir(links []url.URL) string {
|
||||
func (f *File) HashDir(links []fasturl.URL) (o redblackhash.Key) {
|
||||
h, _ := blake2b.New256(nil)
|
||||
h.Write([]byte(f.Name))
|
||||
for _, link := range links {
|
||||
@ -153,8 +152,8 @@ func (f *File) HashDir(links []url.URL) string {
|
||||
h.Write([]byte(fileName))
|
||||
}
|
||||
sum := h.Sum(nil)
|
||||
b64sum := base64.StdEncoding.EncodeToString(sum)
|
||||
return b64sum
|
||||
copy(o[:redblackhash.KeySize], sum)
|
||||
return
|
||||
}
|
||||
|
||||
func (f *File) ParseHeader(h []byte) {
|
||||
|
521
ds/redblackhash/redblack.go
Normal file
521
ds/redblackhash/redblack.go
Normal file
@ -0,0 +1,521 @@
|
||||
// Copyright (c) 2015, Emir Pasic. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Modifications by terorie
|
||||
|
||||
// Package redblacktree implements a red-black tree.
|
||||
//
|
||||
// Used by TreeSet and TreeMap.
|
||||
//
|
||||
// Structure is not thread safe.
|
||||
//
|
||||
// References: http://en.wikipedia.org/wiki/Red%E2%80%93black_tree
|
||||
package redblackhash
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
const (
|
||||
black, red color = true, false
|
||||
KeySize = 64
|
||||
)
|
||||
|
||||
type color bool
|
||||
type Key [KeySize]byte
|
||||
|
||||
// Tree holds elements of the red-black tree
|
||||
type Tree struct {
|
||||
Root *Node
|
||||
size int
|
||||
}
|
||||
|
||||
// Node is a single element within the tree
|
||||
type Node struct {
|
||||
Key Key
|
||||
color color
|
||||
Left *Node
|
||||
Right *Node
|
||||
Parent *Node
|
||||
}
|
||||
|
||||
func (k *Key) Compare(o *Key) int {
|
||||
// TODO Assembly
|
||||
/*for i := 0; i < KeySize / 8; i++ {
|
||||
a := uint64(k[i+0] ) |
|
||||
uint64(k[i+1] >> 8) |
|
||||
uint64(k[i+2] >> 16) |
|
||||
uint64(k[i+3] >> 24) |
|
||||
uint64(k[i+4] >> 32) |
|
||||
uint64(k[i+5] >> 40) |
|
||||
uint64(k[i+6] >> 48) |
|
||||
uint64(k[i+7] >> 56)
|
||||
|
||||
b := uint64(o[i+0] ) |
|
||||
uint64(o[i+1] >> 8) |
|
||||
uint64(o[i+2] >> 16) |
|
||||
uint64(o[i+3] >> 24) |
|
||||
uint64(o[i+4] >> 32) |
|
||||
uint64(o[i+5] >> 40) |
|
||||
uint64(o[i+6] >> 48) |
|
||||
uint64(o[i+7] >> 56)
|
||||
|
||||
switch {
|
||||
case a < b:
|
||||
return -1
|
||||
case a > b:
|
||||
return 1
|
||||
}
|
||||
}*/
|
||||
for i := 0; i < KeySize; i++ {
|
||||
switch {
|
||||
case k[i] < o[i]:
|
||||
return -1
|
||||
case k[i] > o[i]:
|
||||
return 1
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// Put inserts node into the tree.
|
||||
// Key should adhere to the comparator's type assertion, otherwise method panics.
|
||||
func (tree *Tree) Put(key *Key) {
|
||||
var insertedNode *Node
|
||||
if tree.Root == nil {
|
||||
// Assert key is of comparator's type for initial tree
|
||||
tree.Root = &Node{Key: *key, color: red}
|
||||
insertedNode = tree.Root
|
||||
} else {
|
||||
node := tree.Root
|
||||
loop := true
|
||||
for loop {
|
||||
compare := key.Compare(&node.Key)
|
||||
switch {
|
||||
case compare == 0:
|
||||
node.Key = *key
|
||||
return
|
||||
case compare < 0:
|
||||
if node.Left == nil {
|
||||
node.Left = &Node{Key: *key, color: red}
|
||||
insertedNode = node.Left
|
||||
loop = false
|
||||
} else {
|
||||
node = node.Left
|
||||
}
|
||||
case compare > 0:
|
||||
if node.Right == nil {
|
||||
node.Right = &Node{Key: *key, color: red}
|
||||
insertedNode = node.Right
|
||||
loop = false
|
||||
} else {
|
||||
node = node.Right
|
||||
}
|
||||
}
|
||||
}
|
||||
insertedNode.Parent = node
|
||||
}
|
||||
tree.insertCase1(insertedNode)
|
||||
tree.size++
|
||||
}
|
||||
|
||||
// Get searches the node in the tree by key and returns its value or nil if key is not found in tree.
|
||||
// Second return parameter is true if key was found, otherwise false.
|
||||
// Key should adhere to the comparator's type assertion, otherwise method panics.
|
||||
func (tree *Tree) Get(key *Key) (found bool) {
|
||||
node := tree.lookup(key)
|
||||
return node != nil
|
||||
}
|
||||
|
||||
// Remove remove the node from the tree by key.
|
||||
// Key should adhere to the comparator's type assertion, otherwise method panics.
|
||||
func (tree *Tree) Remove(key *Key) {
|
||||
var child *Node
|
||||
node := tree.lookup(key)
|
||||
if node == nil {
|
||||
return
|
||||
}
|
||||
if node.Left != nil && node.Right != nil {
|
||||
pred := node.Left.maximumNode()
|
||||
node.Key = pred.Key
|
||||
node = pred
|
||||
}
|
||||
if node.Left == nil || node.Right == nil {
|
||||
if node.Right == nil {
|
||||
child = node.Left
|
||||
} else {
|
||||
child = node.Right
|
||||
}
|
||||
if node.color == black {
|
||||
node.color = nodeColor(child)
|
||||
tree.deleteCase1(node)
|
||||
}
|
||||
tree.replaceNode(node, child)
|
||||
if node.Parent == nil && child != nil {
|
||||
child.color = black
|
||||
}
|
||||
}
|
||||
tree.size--
|
||||
}
|
||||
|
||||
// Empty returns true if tree does not contain any nodes
|
||||
func (tree *Tree) Empty() bool {
|
||||
return tree.size == 0
|
||||
}
|
||||
|
||||
// Size returns number of nodes in the tree.
|
||||
func (tree *Tree) Size() int {
|
||||
return tree.size
|
||||
}
|
||||
|
||||
// Left returns the left-most (min) node or nil if tree is empty.
|
||||
func (tree *Tree) Left() *Node {
|
||||
var parent *Node
|
||||
current := tree.Root
|
||||
for current != nil {
|
||||
parent = current
|
||||
current = current.Left
|
||||
}
|
||||
return parent
|
||||
}
|
||||
|
||||
// Right returns the right-most (max) node or nil if tree is empty.
|
||||
func (tree *Tree) Right() *Node {
|
||||
var parent *Node
|
||||
current := tree.Root
|
||||
for current != nil {
|
||||
parent = current
|
||||
current = current.Right
|
||||
}
|
||||
return parent
|
||||
}
|
||||
|
||||
// Floor Finds floor node of the input key, return the floor node or nil if no floor is found.
|
||||
// Second return parameter is true if floor was found, otherwise false.
|
||||
//
|
||||
// Floor node is defined as the largest node that is smaller than or equal to the given node.
|
||||
// A floor node may not be found, either because the tree is empty, or because
|
||||
// all nodes in the tree are larger than the given node.
|
||||
//
|
||||
// Key should adhere to the comparator's type assertion, otherwise method panics.
|
||||
func (tree *Tree) Floor(key *Key) (floor *Node, found bool) {
|
||||
found = false
|
||||
node := tree.Root
|
||||
for node != nil {
|
||||
compare := key.Compare(&node.Key)
|
||||
switch {
|
||||
case compare == 0:
|
||||
return node, true
|
||||
case compare < 0:
|
||||
node = node.Left
|
||||
case compare > 0:
|
||||
floor, found = node, true
|
||||
node = node.Right
|
||||
}
|
||||
}
|
||||
if found {
|
||||
return floor, true
|
||||
}
|
||||
return nil, false
|
||||
}
|
||||
|
||||
// Ceiling finds ceiling node of the input key, return the ceiling node or nil if no ceiling is found.
|
||||
// Second return parameter is true if ceiling was found, otherwise false.
|
||||
//
|
||||
// Ceiling node is defined as the smallest node that is larger than or equal to the given node.
|
||||
// A ceiling node may not be found, either because the tree is empty, or because
|
||||
// all nodes in the tree are smaller than the given node.
|
||||
//
|
||||
// Key should adhere to the comparator's type assertion, otherwise method panics.
|
||||
func (tree *Tree) Ceiling(key *Key) (ceiling *Node, found bool) {
|
||||
found = false
|
||||
node := tree.Root
|
||||
for node != nil {
|
||||
compare := key.Compare(&node.Key)
|
||||
switch {
|
||||
case compare == 0:
|
||||
return node, true
|
||||
case compare < 0:
|
||||
ceiling, found = node, true
|
||||
node = node.Left
|
||||
case compare > 0:
|
||||
node = node.Right
|
||||
}
|
||||
}
|
||||
if found {
|
||||
return ceiling, true
|
||||
}
|
||||
return nil, false
|
||||
}
|
||||
|
||||
// Clear removes all nodes from the tree.
|
||||
func (tree *Tree) Clear() {
|
||||
tree.Root = nil
|
||||
tree.size = 0
|
||||
}
|
||||
|
||||
// String returns a string representation of container
|
||||
func (tree *Tree) String() string {
|
||||
str := "RedBlackTree\n"
|
||||
if !tree.Empty() {
|
||||
output(tree.Root, "", true, &str)
|
||||
}
|
||||
return str
|
||||
}
|
||||
|
||||
func (node *Node) String() string {
|
||||
return fmt.Sprintf("%v", node.Key)
|
||||
}
|
||||
|
||||
func output(node *Node, prefix string, isTail bool, str *string) {
|
||||
if node.Right != nil {
|
||||
newPrefix := prefix
|
||||
if isTail {
|
||||
newPrefix += "│ "
|
||||
} else {
|
||||
newPrefix += " "
|
||||
}
|
||||
output(node.Right, newPrefix, false, str)
|
||||
}
|
||||
*str += prefix
|
||||
if isTail {
|
||||
*str += "└── "
|
||||
} else {
|
||||
*str += "┌── "
|
||||
}
|
||||
*str += node.String() + "\n"
|
||||
if node.Left != nil {
|
||||
newPrefix := prefix
|
||||
if isTail {
|
||||
newPrefix += " "
|
||||
} else {
|
||||
newPrefix += "│ "
|
||||
}
|
||||
output(node.Left, newPrefix, true, str)
|
||||
}
|
||||
}
|
||||
|
||||
func (tree *Tree) lookup(key *Key) *Node {
|
||||
node := tree.Root
|
||||
for node != nil {
|
||||
compare := key.Compare(&node.Key)
|
||||
switch {
|
||||
case compare == 0:
|
||||
return node
|
||||
case compare < 0:
|
||||
node = node.Left
|
||||
case compare > 0:
|
||||
node = node.Right
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (node *Node) grandparent() *Node {
|
||||
if node != nil && node.Parent != nil {
|
||||
return node.Parent.Parent
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (node *Node) uncle() *Node {
|
||||
if node == nil || node.Parent == nil || node.Parent.Parent == nil {
|
||||
return nil
|
||||
}
|
||||
return node.Parent.sibling()
|
||||
}
|
||||
|
||||
func (node *Node) sibling() *Node {
|
||||
if node == nil || node.Parent == nil {
|
||||
return nil
|
||||
}
|
||||
if node == node.Parent.Left {
|
||||
return node.Parent.Right
|
||||
}
|
||||
return node.Parent.Left
|
||||
}
|
||||
|
||||
func (tree *Tree) rotateLeft(node *Node) {
|
||||
right := node.Right
|
||||
tree.replaceNode(node, right)
|
||||
node.Right = right.Left
|
||||
if right.Left != nil {
|
||||
right.Left.Parent = node
|
||||
}
|
||||
right.Left = node
|
||||
node.Parent = right
|
||||
}
|
||||
|
||||
func (tree *Tree) rotateRight(node *Node) {
|
||||
left := node.Left
|
||||
tree.replaceNode(node, left)
|
||||
node.Left = left.Right
|
||||
if left.Right != nil {
|
||||
left.Right.Parent = node
|
||||
}
|
||||
left.Right = node
|
||||
node.Parent = left
|
||||
}
|
||||
|
||||
func (tree *Tree) replaceNode(old *Node, new *Node) {
|
||||
if old.Parent == nil {
|
||||
tree.Root = new
|
||||
} else {
|
||||
if old == old.Parent.Left {
|
||||
old.Parent.Left = new
|
||||
} else {
|
||||
old.Parent.Right = new
|
||||
}
|
||||
}
|
||||
if new != nil {
|
||||
new.Parent = old.Parent
|
||||
}
|
||||
}
|
||||
|
||||
func (tree *Tree) insertCase1(node *Node) {
|
||||
if node.Parent == nil {
|
||||
node.color = black
|
||||
} else {
|
||||
tree.insertCase2(node)
|
||||
}
|
||||
}
|
||||
|
||||
func (tree *Tree) insertCase2(node *Node) {
|
||||
if nodeColor(node.Parent) == black {
|
||||
return
|
||||
}
|
||||
tree.insertCase3(node)
|
||||
}
|
||||
|
||||
func (tree *Tree) insertCase3(node *Node) {
|
||||
uncle := node.uncle()
|
||||
if nodeColor(uncle) == red {
|
||||
node.Parent.color = black
|
||||
uncle.color = black
|
||||
node.grandparent().color = red
|
||||
tree.insertCase1(node.grandparent())
|
||||
} else {
|
||||
tree.insertCase4(node)
|
||||
}
|
||||
}
|
||||
|
||||
func (tree *Tree) insertCase4(node *Node) {
|
||||
grandparent := node.grandparent()
|
||||
if node == node.Parent.Right && node.Parent == grandparent.Left {
|
||||
tree.rotateLeft(node.Parent)
|
||||
node = node.Left
|
||||
} else if node == node.Parent.Left && node.Parent == grandparent.Right {
|
||||
tree.rotateRight(node.Parent)
|
||||
node = node.Right
|
||||
}
|
||||
tree.insertCase5(node)
|
||||
}
|
||||
|
||||
func (tree *Tree) insertCase5(node *Node) {
|
||||
node.Parent.color = black
|
||||
grandparent := node.grandparent()
|
||||
grandparent.color = red
|
||||
if node == node.Parent.Left && node.Parent == grandparent.Left {
|
||||
tree.rotateRight(grandparent)
|
||||
} else if node == node.Parent.Right && node.Parent == grandparent.Right {
|
||||
tree.rotateLeft(grandparent)
|
||||
}
|
||||
}
|
||||
|
||||
func (node *Node) maximumNode() *Node {
|
||||
if node == nil {
|
||||
return nil
|
||||
}
|
||||
for node.Right != nil {
|
||||
node = node.Right
|
||||
}
|
||||
return node
|
||||
}
|
||||
|
||||
func (tree *Tree) deleteCase1(node *Node) {
|
||||
if node.Parent == nil {
|
||||
return
|
||||
}
|
||||
tree.deleteCase2(node)
|
||||
}
|
||||
|
||||
func (tree *Tree) deleteCase2(node *Node) {
|
||||
sibling := node.sibling()
|
||||
if nodeColor(sibling) == red {
|
||||
node.Parent.color = red
|
||||
sibling.color = black
|
||||
if node == node.Parent.Left {
|
||||
tree.rotateLeft(node.Parent)
|
||||
} else {
|
||||
tree.rotateRight(node.Parent)
|
||||
}
|
||||
}
|
||||
tree.deleteCase3(node)
|
||||
}
|
||||
|
||||
func (tree *Tree) deleteCase3(node *Node) {
|
||||
sibling := node.sibling()
|
||||
if nodeColor(node.Parent) == black &&
|
||||
nodeColor(sibling) == black &&
|
||||
nodeColor(sibling.Left) == black &&
|
||||
nodeColor(sibling.Right) == black {
|
||||
sibling.color = red
|
||||
tree.deleteCase1(node.Parent)
|
||||
} else {
|
||||
tree.deleteCase4(node)
|
||||
}
|
||||
}
|
||||
|
||||
func (tree *Tree) deleteCase4(node *Node) {
|
||||
sibling := node.sibling()
|
||||
if nodeColor(node.Parent) == red &&
|
||||
nodeColor(sibling) == black &&
|
||||
nodeColor(sibling.Left) == black &&
|
||||
nodeColor(sibling.Right) == black {
|
||||
sibling.color = red
|
||||
node.Parent.color = black
|
||||
} else {
|
||||
tree.deleteCase5(node)
|
||||
}
|
||||
}
|
||||
|
||||
func (tree *Tree) deleteCase5(node *Node) {
|
||||
sibling := node.sibling()
|
||||
if node == node.Parent.Left &&
|
||||
nodeColor(sibling) == black &&
|
||||
nodeColor(sibling.Left) == red &&
|
||||
nodeColor(sibling.Right) == black {
|
||||
sibling.color = red
|
||||
sibling.Left.color = black
|
||||
tree.rotateRight(sibling)
|
||||
} else if node == node.Parent.Right &&
|
||||
nodeColor(sibling) == black &&
|
||||
nodeColor(sibling.Right) == red &&
|
||||
nodeColor(sibling.Left) == black {
|
||||
sibling.color = red
|
||||
sibling.Right.color = black
|
||||
tree.rotateLeft(sibling)
|
||||
}
|
||||
tree.deleteCase6(node)
|
||||
}
|
||||
|
||||
func (tree *Tree) deleteCase6(node *Node) {
|
||||
sibling := node.sibling()
|
||||
sibling.color = nodeColor(node.Parent)
|
||||
node.Parent.color = black
|
||||
if node == node.Parent.Left && nodeColor(sibling.Right) == red {
|
||||
sibling.Right.color = black
|
||||
tree.rotateLeft(node.Parent)
|
||||
} else if nodeColor(sibling.Left) == red {
|
||||
sibling.Left.color = black
|
||||
tree.rotateRight(node.Parent)
|
||||
}
|
||||
}
|
||||
|
||||
func nodeColor(node *Node) color {
|
||||
if node == nil {
|
||||
return black
|
||||
}
|
||||
return node.color
|
||||
}
|
7
main.go
7
main.go
@ -3,11 +3,11 @@ package main
|
||||
import (
|
||||
"context"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/terorie/oddb-go/fasturl"
|
||||
"github.com/urfave/cli"
|
||||
"log"
|
||||
"net/http"
|
||||
_ "net/http/pprof"
|
||||
"net/url"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
@ -55,12 +55,13 @@ func cmdCrawler(clic *cli.Context) error {
|
||||
if !strings.Contains(arg, "://") {
|
||||
arg = "http://" + arg
|
||||
}
|
||||
u, err := url.Parse(arg)
|
||||
var u fasturl.URL
|
||||
err := u.Parse(arg)
|
||||
if !strings.HasSuffix(u.Path, "/") {
|
||||
u.Path += "/"
|
||||
}
|
||||
if err != nil { return err }
|
||||
remotes[i] = &OD{ BaseUri: *u }
|
||||
remotes[i] = &OD{ BaseUri: u }
|
||||
}
|
||||
|
||||
c := context.Background()
|
||||
|
23
model.go
23
model.go
@ -1,14 +1,15 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"net/url"
|
||||
"github.com/terorie/oddb-go/ds/redblackhash"
|
||||
"github.com/terorie/oddb-go/fasturl"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
type Job struct {
|
||||
OD *OD
|
||||
Uri url.URL
|
||||
Uri fasturl.URL
|
||||
UriStr string
|
||||
Fails int
|
||||
LastError error
|
||||
@ -16,11 +17,12 @@ type Job struct {
|
||||
|
||||
type OD struct {
|
||||
Wait sync.WaitGroup
|
||||
BaseUri url.URL
|
||||
lock sync.Mutex
|
||||
BaseUri fasturl.URL
|
||||
Files []File
|
||||
WCtx WorkerContext
|
||||
Scanned sync.Map
|
||||
Scanned redblackhash.Tree
|
||||
|
||||
lock sync.Mutex
|
||||
}
|
||||
|
||||
type File struct {
|
||||
@ -30,3 +32,14 @@ type File struct {
|
||||
Path string `json:"path"`
|
||||
IsDir bool `json:"-"`
|
||||
}
|
||||
|
||||
func (o *OD) LoadOrStoreKey(k *redblackhash.Key) (exists bool) {
|
||||
o.lock.Lock()
|
||||
defer o.lock.Unlock()
|
||||
|
||||
exists = o.Scanned.Get(k)
|
||||
if exists { return true }
|
||||
|
||||
o.Scanned.Put(k)
|
||||
return false
|
||||
}
|
||||
|
26
worker.go
26
worker.go
@ -3,7 +3,6 @@ package main
|
||||
import (
|
||||
"github.com/sirupsen/logrus"
|
||||
"math"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
@ -67,12 +66,13 @@ func (w WorkerContext) step(job Job) {
|
||||
}
|
||||
|
||||
func DoJob(job *Job, f *File) (newJobs []Job, err error) {
|
||||
if strings.HasSuffix(job.Uri.Path, "/") {
|
||||
if len(job.Uri.Path) == 0 { return }
|
||||
if job.Uri.Path[len(job.Uri.Path)-1] == '/' {
|
||||
// Load directory
|
||||
links, err := GetDir(job, f)
|
||||
if err != nil {
|
||||
logrus.WithError(err).
|
||||
WithField("url", job.Uri.String()).
|
||||
WithField("url", job.UriStr).
|
||||
Error("Failed getting dir")
|
||||
return nil, err
|
||||
}
|
||||
@ -81,15 +81,15 @@ func DoJob(job *Job, f *File) (newJobs []Job, err error) {
|
||||
hash := f.HashDir(links)
|
||||
|
||||
// Skip symlinked dirs
|
||||
if _, old := job.OD.Scanned.LoadOrStore(hash, true); old {
|
||||
if job.OD.LoadOrStoreKey(&hash) {
|
||||
return nil, ErrKnown
|
||||
}
|
||||
|
||||
for _, link := range links {
|
||||
// Skip already queued links
|
||||
if _, old := job.OD.Scanned.LoadOrStore(link, true); old {
|
||||
continue
|
||||
}
|
||||
//if _, old := job.OD.Scanned.LoadOrStore(link, true); old {
|
||||
// continue
|
||||
//}
|
||||
job.OD.Wait.Add(1)
|
||||
newJobs = append(newJobs, Job{
|
||||
OD: job.OD,
|
||||
@ -98,16 +98,18 @@ func DoJob(job *Job, f *File) (newJobs []Job, err error) {
|
||||
Fails: 0,
|
||||
})
|
||||
}
|
||||
logrus.WithFields(logrus.Fields{
|
||||
"url": job.UriStr,
|
||||
"files": len(links),
|
||||
}).Debug("Listed")
|
||||
if config.Verbose {
|
||||
logrus.WithFields(logrus.Fields{
|
||||
"url": job.UriStr,
|
||||
"files": len(links),
|
||||
}).Debug("Listed")
|
||||
}
|
||||
} else {
|
||||
// Load file
|
||||
err := GetFile(job.Uri, f)
|
||||
if err != nil {
|
||||
logrus.WithError(err).
|
||||
WithField("url", job.Uri.String()).
|
||||
WithField("url", job.UriStr).
|
||||
Error("Failed getting file")
|
||||
return nil, err
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user