Skip to content

Commit 1988d1d

Browse files
author
Artyom Pervukhin
committed
Process html using low-level tokenizer
name old time/op new time/op delta OpenGraph_ProcessHTML-4 54.2µs ± 1% 31.5µs ± 2% -41.84% (p=0.008 n=5+5) name old speed new speed delta OpenGraph_ProcessHTML-4 24.3MB/s ± 1% 41.8MB/s ± 2% +71.96% (p=0.008 n=5+5) name old alloc/op new alloc/op delta OpenGraph_ProcessHTML-4 13.6kB ± 0% 5.9kB ± 0% -57.05% (p=0.008 n=5+5) name old allocs/op new allocs/op delta OpenGraph_ProcessHTML-4 175 ± 0% 52 ± 0% -70.29% (p=0.008 n=5+5)
1 parent 73eb1ff commit 1988d1d

File tree

1 file changed

+23
-31
lines changed

1 file changed

+23
-31
lines changed

opengraph/opengraph.go

Lines changed: 23 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"time"
88

99
"golang.org/x/net/html"
10+
"golang.org/x/net/html/atom"
1011
)
1112

1213
// Image defines Open Graph Image type
@@ -104,41 +105,32 @@ func (og *OpenGraph) String() string {
104105

105106
// ProcessHTML parses given html from Reader interface and fills up OpenGraph structure
106107
func (og *OpenGraph) ProcessHTML(buffer io.Reader) error {
107-
doc, err := html.Parse(buffer)
108-
if err != nil {
109-
return err
110-
}
111-
112-
var parseHead func(*html.Node)
113-
parseHead = func(n *html.Node) {
114-
for c := n.FirstChild; c != nil; c = c.NextSibling {
115-
if c.Type == html.ElementNode && c.Data == "meta" {
116-
m := make(map[string]string)
117-
for _, a := range c.Attr {
118-
m[a.Key] = a.Val
119-
}
120-
121-
og.ProcessMeta(m)
108+
z := html.NewTokenizer(buffer)
109+
for {
110+
tt := z.Next()
111+
switch tt {
112+
case html.ErrorToken:
113+
if z.Err() == io.EOF {
114+
return nil
122115
}
123-
}
124-
}
125-
126-
var f func(*html.Node)
127-
f = func(n *html.Node) {
128-
for c := n.FirstChild; c != nil; c = c.NextSibling {
129-
if c.Type == html.ElementNode {
130-
if c.Data == "head" {
131-
parseHead(c)
132-
continue
133-
} else if c.Data == "body" { // OpenGraph is only in head, so we don't need body
134-
break
135-
}
116+
return z.Err()
117+
case html.StartTagToken, html.SelfClosingTagToken, html.EndTagToken:
118+
name, hasAttr := z.TagName()
119+
if atom.Lookup(name) == atom.Body {
120+
return nil // OpenGraph is only in head, so we don't need body
121+
}
122+
if atom.Lookup(name) != atom.Meta || !hasAttr {
123+
continue
136124
}
137-
f(c)
125+
m := make(map[string]string)
126+
var key, val []byte
127+
for hasAttr {
128+
key, val, hasAttr = z.TagAttr()
129+
m[atom.String(key)] = string(val)
130+
}
131+
og.ProcessMeta(m)
138132
}
139133
}
140-
f(doc)
141-
142134
return nil
143135
}
144136

0 commit comments

Comments
 (0)