Exclude dups in dir instead of keeping hashes of links

This commit is contained in:
Richard Patel 2018-11-11 23:11:30 +01:00
parent 9c8174dd8d
commit 4c071171eb
No known key found for this signature in database
GPG Key ID: C268B2BBDA2ABECB
2 changed files with 17 additions and 12 deletions

View File

@ -155,14 +155,6 @@ func (f *File) HashDir(links []fasturl.URL) (o redblackhash.Key) {
return return
} }
func HashString(s string) (o redblackhash.Key) {
h, _ := blake2b.New256(nil)
h.Write([]byte(s))
sum := h.Sum(nil)
copy(o[:redblackhash.KeySize], sum)
return
}
func (f *File) applyContentLength(v string) { func (f *File) applyContentLength(v string) {
if v == "" { return } if v == "" { return }
size, err := strconv.ParseInt(v, 10, 64) size, err := strconv.ParseInt(v, 10, 64)

View File

@ -3,6 +3,8 @@ package main
import ( import (
"github.com/sirupsen/logrus" "github.com/sirupsen/logrus"
"math" "math"
"sort"
"strings"
"sync" "sync"
"sync/atomic" "sync/atomic"
"time" "time"
@ -85,13 +87,22 @@ func DoJob(job *Job, f *File) (newJobs []Job, err error) {
return nil, ErrKnown return nil, ErrKnown
} }
// Sort by path
sort.Slice(links, func(i, j int) bool {
return strings.Compare(links[i].Path, links[j].Path) < 0
})
var newJobCount int
var lastLink string
for _, link := range links { for _, link := range links {
uriStr := link.String() uriStr := link.String()
// Skip already queued links
linkHash := HashString(uriStr) // Ignore dupes
if job.OD.LoadOrStoreKey(&linkHash) { if uriStr == lastLink {
continue continue
} }
lastLink = uriStr
job.OD.Wait.Add(1) job.OD.Wait.Add(1)
newJobs = append(newJobs, Job{ newJobs = append(newJobs, Job{
OD: job.OD, OD: job.OD,
@ -99,11 +110,13 @@ func DoJob(job *Job, f *File) (newJobs []Job, err error) {
UriStr: uriStr, UriStr: uriStr,
Fails: 0, Fails: 0,
}) })
newJobCount++
} }
if config.Verbose { if config.Verbose {
logrus.WithFields(logrus.Fields{ logrus.WithFields(logrus.Fields{
"url": job.UriStr, "url": job.UriStr,
"files": len(links), "files": newJobCount,
}).Debug("Listed") }).Debug("Listed")
} }
} else { } else {