From 76c8c13d499d395a73351f3e2679564ac082dfc1 Mon Sep 17 00:00:00 2001 From: Richard Patel Date: Sat, 27 Oct 2018 16:55:00 +0200 Subject: [PATCH] Use finite state machine --- crawl.go | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/crawl.go b/crawl.go index 6f5c2ab..9f1b0ae 100644 --- a/crawl.go +++ b/crawl.go @@ -8,7 +8,6 @@ import ( "golang.org/x/net/html/atom" "net/url" "os" - "regexp" "strconv" "strings" "sync" @@ -28,8 +27,6 @@ var visited int64 var in chan<- url.URL var out <-chan url.URL -var matchHeader = regexp.MustCompile("([\\w-]+): (.*)") - type File struct { Name string `json:"name"` Size int64 `json:"size"` @@ -188,24 +185,12 @@ func fileInfo(u url.URL, f *File) (err error) { // TODO Inefficient af header := res.Header.Header() - s := time.Now() - for i := 0; i < 10000; i++ { - f.ParseHeaderRegex(header) - } - println(time.Since(s).String()) + f.ParseHeader(header) return nil } -func (f *File) ParseHeaderRegex(h []byte) { - for _, parts := range matchHeader.FindAllSubmatch(h, -1) { - k := string(parts[1]) - v := string(parts[2]) - f.applyHeader(k, v) - } -} - -func (f *File) ParseHeaderMachine(h []byte) { +func (f *File) ParseHeader(h []byte) { var k1, k2 int var v1, v2 int