Skip to content

Commit 62755a7

Browse files
committed
Documented the Tokenizer
1 parent 599645e commit 62755a7

File tree

6 files changed

+43
-6
lines changed

6 files changed

+43
-6
lines changed

FUTURE-CHANGELOG.md

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,5 @@
33
- Serializer bug fix
44

55
## TODO
6-
* Document Combinator and CombinatorEl struct.
7-
* Document TokenizeSelectorsAndCombinators and show a example for it.
8-
* Document Tokenizers and Selectors
96
* Update the the readme example and change the CHANGELOG
107

parser.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ func Decode(r io.Reader) (*Node, error) {
2020
nodeTreeBuilder.WriteNodeTree(t.GetCurrentNode(), tt)
2121
}
2222
return nodeTreeBuilder.GetRootNode(), nil
23-
2423
}
2524

2625
// HTMLToNodeTree return html code as a node-tree. If error were to occur it would be SyntaxError.

selectors.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ const (
1313
Tag
1414
)
1515

16+
//Selector struct represents a single css selector
17+
//Ex: .my-class, #video, div
1618
type Selector struct {
1719
selector string
1820
selectorName string
@@ -40,6 +42,8 @@ func matchNode(node *Node, basicSelectorName string, basicSelectorType BasicSele
4042
return false
4143
}
4244

45+
//NewSelector takes a single css selector and returns a Selector struct.
46+
//Selector string should be only of basic selector.
4347
func NewSelector(selector string) Selector {
4448
selector = strings.TrimSpace(html.EscapeString(selector))
4549
selectorStruct := Selector{}
@@ -76,12 +80,14 @@ const (
7680
NoneCombinator
7781
)
7882

83+
//CombinatorEl is used to represent selectors that are around a combinator.
7984
type CombinatorEl struct {
8085
Type Combinator
8186
Selector1 Selector
8287
Selector2 Selector
8388
}
8489

90+
//This takes a selector or combinators and selectors and then returns a slice of CombinatorEl.
8591
func TokenizeSelectorsAndCombinators(selector string) []CombinatorEl {
8692
iter := func(yield func(string) bool) {
8793
currentStr := ""

tokenizer.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ func NewNodeTreeBuilder() NodeTreeBuilder {
7171
}
7272
}
7373

74-
// WriteNodeTree append the node given html.TokenType
74+
// WriteNodeTree append the node given html.TokenType.
7575
func (ntb *NodeTreeBuilder) WriteNodeTree(node *Node, tt html.TokenType) {
7676
switch tt {
7777
case html.EndTagToken:

tokenizer_test.go

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
package GoHtml_test
2+
3+
import (
4+
"fmt"
5+
"net/http"
6+
7+
GoHtml "github.com/udan-jayanith/GoHTML"
8+
"golang.org/x/net/html"
9+
)
10+
11+
func ExampleTokenizer() {
12+
//Request the html
13+
res, err := http.Get("https://go.dev/")
14+
if err != nil || res.StatusCode != http.StatusOK {
15+
return
16+
}
17+
defer res.Body.Close()
18+
19+
//NewTokenizer takes a io.reader that receives UTF-8 encoded html code and returns a Tokenizer.
20+
t := GoHtml.NewTokenizer(res.Body)
21+
//NewNodeTreeBuilder return a new NodeTreeBuilder that can be used to build a node tree.
22+
nodeTreeBuilder := GoHtml.NewNodeTreeBuilder()
23+
for {
24+
//Advanced scans the next token and returns its type.
25+
tt := t.Advanced()
26+
if tt == html.ErrorToken {
27+
break
28+
}
29+
30+
//WriteNodeTree takes a node and a token type. The node can be nil so if token type is EndTagToken.
31+
nodeTreeBuilder.WriteNodeTree(t.GetCurrentNode(), tt)
32+
}
33+
34+
//Prints the root node of the node tree in the nodeTreeBuilder.
35+
fmt.Println(nodeTreeBuilder.GetRootNode())
36+
}

traverser_test.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ func TestWalkthrough(t *testing.T) {
3636
t.Fatal("Expected ", testList[i], "but got ", resList[i], "in index ", i)
3737
}
3838
}
39-
t.Log(GoHtml.NodeTreeToHTML(body))
4039
}
4140

4241
func ExampleTraverser_Walkthrough() {

0 commit comments

Comments
 (0)