|
7 | 7 | "time" |
8 | 8 |
|
9 | 9 | "golang.org/x/net/html" |
| 10 | + "golang.org/x/net/html/atom" |
10 | 11 | ) |
11 | 12 |
|
12 | 13 | // Image defines Open Graph Image type |
@@ -104,41 +105,32 @@ func (og *OpenGraph) String() string { |
104 | 105 |
|
105 | 106 | // ProcessHTML parses given html from Reader interface and fills up OpenGraph structure |
106 | 107 | func (og *OpenGraph) ProcessHTML(buffer io.Reader) error { |
107 | | - doc, err := html.Parse(buffer) |
108 | | - if err != nil { |
109 | | - return err |
110 | | - } |
111 | | - |
112 | | - var parseHead func(*html.Node) |
113 | | - parseHead = func(n *html.Node) { |
114 | | - for c := n.FirstChild; c != nil; c = c.NextSibling { |
115 | | - if c.Type == html.ElementNode && c.Data == "meta" { |
116 | | - m := make(map[string]string) |
117 | | - for _, a := range c.Attr { |
118 | | - m[a.Key] = a.Val |
119 | | - } |
120 | | - |
121 | | - og.ProcessMeta(m) |
| 108 | + z := html.NewTokenizer(buffer) |
| 109 | + for { |
| 110 | + tt := z.Next() |
| 111 | + switch tt { |
| 112 | + case html.ErrorToken: |
| 113 | + if z.Err() == io.EOF { |
| 114 | + return nil |
122 | 115 | } |
123 | | - } |
124 | | - } |
125 | | - |
126 | | - var f func(*html.Node) |
127 | | - f = func(n *html.Node) { |
128 | | - for c := n.FirstChild; c != nil; c = c.NextSibling { |
129 | | - if c.Type == html.ElementNode { |
130 | | - if c.Data == "head" { |
131 | | - parseHead(c) |
132 | | - continue |
133 | | - } else if c.Data == "body" { // OpenGraph is only in head, so we don't need body |
134 | | - break |
135 | | - } |
| 116 | + return z.Err() |
| 117 | + case html.StartTagToken, html.SelfClosingTagToken, html.EndTagToken: |
| 118 | + name, hasAttr := z.TagName() |
| 119 | + if atom.Lookup(name) == atom.Body { |
| 120 | + return nil // OpenGraph is only in head, so we don't need body |
| 121 | + } |
| 122 | + if atom.Lookup(name) != atom.Meta || !hasAttr { |
| 123 | + continue |
136 | 124 | } |
137 | | - f(c) |
| 125 | + m := make(map[string]string) |
| 126 | + var key, val []byte |
| 127 | + for hasAttr { |
| 128 | + key, val, hasAttr = z.TagAttr() |
| 129 | + m[atom.String(key)] = string(val) |
| 130 | + } |
| 131 | + og.ProcessMeta(m) |
138 | 132 | } |
139 | 133 | } |
140 | | - f(doc) |
141 | | - |
142 | 134 | return nil |
143 | 135 | } |
144 | 136 |
|
|
0 commit comments