Compare finite state machine and Regex

This commit is contained in:
Richard Patel 2018-10-27 16:53:45 +02:00
parent 9e090d109d
commit 442a2cf8a7
No known key found for this signature in database
GPG Key ID: C268B2BBDA2ABECB

103
crawl.go
View File

@ -2,13 +2,13 @@ package main
import ( import (
"bytes" "bytes"
"errors"
"github.com/sirupsen/logrus" "github.com/sirupsen/logrus"
"github.com/valyala/fasthttp" "github.com/valyala/fasthttp"
"golang.org/x/net/html" "golang.org/x/net/html"
"golang.org/x/net/html/atom" "golang.org/x/net/html/atom"
"net/url" "net/url"
"os" "os"
"regexp"
"strconv" "strconv"
"strings" "strings"
"sync" "sync"
@ -28,6 +28,8 @@ var visited int64
var in chan<- url.URL var in chan<- url.URL
var out <-chan url.URL var out <-chan url.URL
var matchHeader = regexp.MustCompile("([\\w-]+): (.*)")
type File struct { type File struct {
Name string `json:"name"` Name string `json:"name"`
Size int64 `json:"size"` Size int64 `json:"size"`
@ -186,68 +188,81 @@ func fileInfo(u url.URL, f *File) (err error) {
// TODO Inefficient af // TODO Inefficient af
header := res.Header.Header() header := res.Header.Header()
s := time.Now()
for i := 0; i < 10000; i++ {
f.ParseHeaderRegex(header)
}
println(time.Since(s).String())
var k []byte return nil
var v []byte }
func (f *File) ParseHeaderRegex(h []byte) {
for _, parts := range matchHeader.FindAllSubmatch(h, -1) {
k := string(parts[1])
v := string(parts[2])
f.applyHeader(k, v)
}
}
func (f *File) ParseHeaderMachine(h []byte) {
var k1, k2 int
var v1, v2 int
// Simple finite state machine // Simple finite state machine
state := 0 state := 0
for _, b := range header { for i, b := range h {
switch state { switch state {
case 0: case 0:
if b == byte(':') { if b == byte(':') {
state = 1 state = 1
} else { k2 = i
k = append(k, b)
} }
case 1: case 1:
if b == byte(' ') { state = 2
state = 2
} else {
return errors.New("bad request")
}
case 2: case 2:
if b == byte('\r') { state = 3
state = 3 v1 = i
} else {
v = append(v, b)
}
case 3: case 3:
if b == byte('\n') { if b == byte('\r') {
state = 0 state = 4
key := strings.ToLower(string(k))
val := string(v)
switch key {
case "content-length":
size, err := strconv.ParseInt(val, 10, 64)
if err != nil { break }
if size < 0 { break }
f.Size = size
case "last-modified":
var err error
f.MTime, err = time.Parse(time.RFC1123, val)
if err == nil { break }
f.MTime, err = time.Parse(time.RFC850, val)
if err == nil { break }
// TODO Parse asctime
f.MTime, err = time.Parse("2006-01-02", val[:10])
if err == nil { break }
}
k = k[:0]
v = v[:0]
} else {
return errors.New("bad request")
} }
case 4:
state = 0
v2 = i - 1
key := string(h[k1:k2])
val := string(h[v1:v2])
k1 = i
f.applyHeader(key, val)
} }
} }
return nil }
func (f *File) applyHeader(k, v string) {
switch k {
case "content-length":
size, err := strconv.ParseInt(v, 10, 64)
if err != nil { break }
if size < 0 { break }
f.Size = size
case "last-modified":
var err error
f.MTime, err = time.Parse(time.RFC1123, v)
if err == nil { break }
f.MTime, err = time.Parse(time.RFC850, v)
if err == nil { break }
// TODO Parse asctime
f.MTime, err = time.Parse("2006-01-02", v[:10])
if err == nil { break }
}
} }
func makeInfinite() (chan<- url.URL, <-chan url.URL) { func makeInfinite() (chan<- url.URL, <-chan url.URL) {