Skip to content

Commit 41a3523

Browse files
committed
Merge pull request #1 from artyom/master
Reduce memory allocations and improve speed of html parsing
2 parents 8b5150f + 1988d1d commit 41a3523

File tree

2 files changed

+37
-33
lines changed

2 files changed

+37
-33
lines changed

opengraph/opengraph.go

Lines changed: 23 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"time"
88

99
"golang.org/x/net/html"
10+
"golang.org/x/net/html/atom"
1011
)
1112

1213
// Image defines Open Graph Image type
@@ -104,41 +105,32 @@ func (og *OpenGraph) String() string {
104105

105106
// ProcessHTML parses given html from Reader interface and fills up OpenGraph structure
106107
func (og *OpenGraph) ProcessHTML(buffer io.Reader) error {
107-
doc, err := html.Parse(buffer)
108-
if err != nil {
109-
return err
110-
}
111-
112-
var parseHead func(*html.Node)
113-
parseHead = func(n *html.Node) {
114-
for c := n.FirstChild; c != nil; c = c.NextSibling {
115-
if c.Type == html.ElementNode && c.Data == "meta" {
116-
m := make(map[string]string)
117-
for _, a := range c.Attr {
118-
m[a.Key] = a.Val
119-
}
120-
121-
og.ProcessMeta(m)
108+
z := html.NewTokenizer(buffer)
109+
for {
110+
tt := z.Next()
111+
switch tt {
112+
case html.ErrorToken:
113+
if z.Err() == io.EOF {
114+
return nil
122115
}
123-
}
124-
}
125-
126-
var f func(*html.Node)
127-
f = func(n *html.Node) {
128-
for c := n.FirstChild; c != nil; c = c.NextSibling {
129-
if c.Type == html.ElementNode {
130-
if c.Data == "head" {
131-
parseHead(c)
132-
continue
133-
} else if c.Data == "body" { // OpenGraph is only in head, so we don't need body
134-
break
135-
}
116+
return z.Err()
117+
case html.StartTagToken, html.SelfClosingTagToken, html.EndTagToken:
118+
name, hasAttr := z.TagName()
119+
if atom.Lookup(name) == atom.Body {
120+
return nil // OpenGraph is only in head, so we don't need body
121+
}
122+
if atom.Lookup(name) != atom.Meta || !hasAttr {
123+
continue
136124
}
137-
f(c)
125+
m := make(map[string]string)
126+
var key, val []byte
127+
for hasAttr {
128+
key, val, hasAttr = z.TagAttr()
129+
m[atom.String(key)] = string(val)
130+
}
131+
og.ProcessMeta(m)
138132
}
139133
}
140-
f(doc)
141-
142134
return nil
143135
}
144136

opengraph/opengraph_test.go

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,7 @@ import (
88
"github.com/dyatlov/go-opengraph/opengraph"
99
)
1010

11-
func TestOpenGraphProcessHTML(t *testing.T) {
12-
html := `
11+
const html = `
1312
<!DOCTYPE html>
1413
<html xmlns="http://www.w3.org/1999/xhtml" dir="ltr" lang="en-US">
1514
<head profile="http://gmpg.org/xfn/11">
@@ -31,6 +30,19 @@ func TestOpenGraphProcessHTML(t *testing.T) {
3130
<meta name="twitter:card" content="summary" />
3231
<meta name="twitter:creator" content="@WordPress" />
3332
`
33+
34+
func BenchmarkOpenGraph_ProcessHTML(b *testing.B) {
35+
og := opengraph.NewOpenGraph()
36+
b.ReportAllocs()
37+
b.SetBytes(int64(len(html)))
38+
for i := 0; i < b.N; i++ {
39+
if err := og.ProcessHTML(strings.NewReader(html)); err != nil {
40+
b.Fatal(err)
41+
}
42+
}
43+
}
44+
45+
func TestOpenGraphProcessHTML(t *testing.T) {
3446
og := opengraph.NewOpenGraph()
3547
err := og.ProcessHTML(strings.NewReader(html))
3648

0 commit comments

Comments
 (0)