File HEAD requests

This commit is contained in:
Richard Patel 2018-10-27 16:22:01 +02:00
parent 2844d344ec
commit d748be72cd
No known key found for this signature in database
GPG Key ID: C268B2BBDA2ABECB

View File

@ -1,12 +1,14 @@
package main
import (
"bytes"
"github.com/sirupsen/logrus"
"github.com/valyala/fasthttp"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
"net/http"
"net/url"
"os"
"strconv"
"strings"
"sync"
"sync/atomic"
@ -17,7 +19,7 @@ const (
nConns = 100
)
var client = http.DefaultClient
var client = fasthttp.Client{}
var wait sync.WaitGroup
var visited int64
@ -25,6 +27,14 @@ var visited int64
var in chan<- url.URL
var out <-chan url.URL
type File struct {
Name string `json:"name"`
Size int64 `json:"size"`
MTime time.Time `json:"mtime"`
Path string `json:"path"`
IsDir bool `json:"-"`
}
func main() {
if len(os.Args) != 2 {
println("Usage: ./crawl <url>")
@ -62,27 +72,38 @@ func worker() {
for _, sub := range links {
subrefi, err := url.Parse(sub)
subref := *subrefi
// TODO Print errors
if err != nil { continue }
abs := *u.ResolveReference(&subref)
in <- abs
}
} else {
// File
// TODO check file
var fil File
err := fileInfo(u, &fil)
// TODO Print errors
if err != nil { continue }
}
}
wait.Done()
}
func listDir(u url.URL) (links []string) {
res, err := client.Get(u.String())
req := fasthttp.AcquireRequest()
req.SetRequestURI(u.String())
res := fasthttp.AcquireResponse()
defer fasthttp.ReleaseResponse(res)
err := client.Do(req, res)
fasthttp.ReleaseRequest(req)
if err != nil {
logrus.Error(err)
return
}
defer res.Body.Close()
doc := html.NewTokenizer(res.Body)
doc := html.NewTokenizer(bytes.NewReader(res.Body()))
var linkHref string
var linkTexts []string
@ -148,6 +169,55 @@ func listDir(u url.URL) (links []string) {
return
}
func fileInfo(u url.URL, f *File) (err error) {
req := fasthttp.AcquireRequest()
req.Header.SetMethod("HEAD")
req.SetRequestURI(u.String())
res := fasthttp.AcquireResponse()
res.SkipBody = true
defer fasthttp.ReleaseResponse(res)
err = client.Do(req, res)
fasthttp.ReleaseRequest(req)
if err != nil { return }
// TODO Inefficient af
header := res.Header.String()
for _, line := range strings.Split(header, "\r\n") {
if line == "" { continue }
if strings.HasPrefix(line, "HTTP/1") { continue }
parts := strings.SplitN(line, ": ", 2)
if len(parts) != 2 { continue }
k, v := parts[0], parts[1]
k = strings.ToLower(k)
switch k {
case "content-length":
size, err := strconv.ParseInt(v, 10, 64)
if err != nil { break }
if size < 0 { break }
f.Size = size
case "last-modified":
var err error
f.MTime, err = time.Parse(time.RFC1123, v)
if err == nil { break }
f.MTime, err = time.Parse(time.RFC850, v)
if err == nil { break }
// TODO Parse asctime
f.MTime, err = time.Parse("2006-01-02", v[:10])
if err == nil { break }
}
}
return nil
}
func makeInfinite() (chan<- url.URL, <-chan url.URL) {
in := make(chan url.URL)
out := make(chan url.URL)