Skip to content

Commit 1a79a08

Browse files
committed
Half finished selectors
1 parent caaab5f commit 1a79a08

File tree

8 files changed

+204
-97
lines changed

8 files changed

+204
-97
lines changed

benchmarks/benchmark_test.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@ import(
66
"net/http"
77
"time"
88
)
9-
9+
/*
10+
Adapted from [GoQuery example](https://github.com/PuerkitoBio/goquery?tab=readme-ov-file#examples)
11+
*/
1012
func TestFetchPostCovers(t *testing.T){
1113
res, err := http.Get("https://www.metalsucks.net/")
1214
if err != nil {

node-tree.go

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -219,18 +219,7 @@ But this is not the case for QuerySearch, QuerySelector and QuerySelectorAll.
219219
*/
220220
// Adapted from [https://developer.mozilla.org/en-US/docs/Web/API/Element/closest](MDN Element: closest() method)
221221
func (node *Node) Closest(query string) *Node {
222-
queryTokens := TokenizeQuery(query)
223222
traverser := NewTraverser(node)
224-
for traverser.GetCurrentNode() != nil {
225-
if matchQueryTokens(traverser.GetCurrentNode(), queryTokens) {
226-
break
227-
}
228-
229-
if traverser.GetCurrentNode().GetPreviousNode() == nil {
230-
traverser.SetCurrentNodeTo(traverser.GetCurrentNode().GetParent())
231-
}else{
232-
traverser.Previous()
233-
}
234-
}
223+
235224
return traverser.GetCurrentNode()
236225
}

node-tree_test.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ func TestRemoveNode(t *testing.T){
114114
//t.Log(GoHtml.NodeTreeToHTML(article))
115115
}
116116

117+
/*
117118
func TestClosest(t *testing.T){
118119
node, err := testFile4NodeTree()
119120
if err != nil{
@@ -132,4 +133,5 @@ func TestClosest(t *testing.T){
132133
}
133134
134135
135-
}
136+
}
137+
*/

querying.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
package GoHtml
22

33
import (
4-
"iter"
4+
//"iter"
55
"strings"
66
)
77

@@ -103,6 +103,7 @@ func (node *Node) GetElementsById(idName string) NodeList {
103103
/*
104104
QuerySearch tokenizes the query string and search for nodes that matches with the right most query token. After matching right most query it proceeds to match nodes parents nodes for left over tokens and then passed that node to (yield/range). QuerySearch search the whole node tree for matches unless yield get canceled or range iterator get cancel.
105105
*/
106+
/*
106107
func QuerySearch(node *Node, query string) iter.Seq[*Node] {
107108
traverser := NewTraverser(node)
108109
return func(yield func(node *Node) bool) {
@@ -180,3 +181,5 @@ func (node *Node) QuerySelectorAll(query string) NodeList {
180181
}
181182
return nodeList
182183
}
184+
185+
*/

querying_test.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ func TestGetElementsById(t *testing.T) {
138138
}
139139
}
140140

141+
/*
141142
func TestSelectorTokenizer(t *testing.T) {
142143
stack := linkedliststack.New()
143144
stack.Push("article .content")
@@ -209,3 +210,5 @@ func TestQuerySelectorAll(t *testing.T) {
209210
}
210211
}
211212
}
213+
214+
*/

selectors.go

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
package GoHtml
2+
3+
import (
4+
"strings"
5+
)
6+
7+
type BasicSelector int
8+
9+
const (
10+
Id BasicSelector = iota
11+
Class
12+
Tag
13+
)
14+
15+
type Selector struct {
16+
selector string
17+
selectorName string
18+
selectorType BasicSelector
19+
}
20+
21+
func matchNode(node *Node, basicSelectorName string, basicSelectorType BasicSelector) bool {
22+
if basicSelectorName == ""{
23+
return true
24+
}else if node == nil {
25+
return false
26+
}
27+
28+
switch basicSelectorType {
29+
case Id:
30+
idName, _ := node.GetAttribute("id")
31+
return idName == basicSelectorName
32+
case Class:
33+
classList := NewClassList()
34+
classList.DecodeFrom(node)
35+
return classList.Contains(basicSelectorName)
36+
case Tag:
37+
return node.GetTagName() == basicSelectorName
38+
}
39+
return false
40+
}
41+
42+
func NewSelector(selector string) Selector {
43+
selector = strings.TrimSpace(selector)
44+
selectorStruct := Selector{}
45+
if len(selector) == 0 || (selector[0] == '.' || selector[0] == '#') && len(selector) <= 1 {
46+
return selectorStruct
47+
}
48+
49+
switch selector[0] {
50+
case '.':
51+
selectorStruct.selectorType = Class
52+
case '#':
53+
selectorStruct.selectorType = Id
54+
default:
55+
selectorStruct.selectorType = Tag
56+
}
57+
58+
selectorStruct.selector = selector
59+
if selectorStruct.selectorType != Tag {
60+
selectorStruct.selectorName = selector[1:]
61+
} else {
62+
selectorStruct.selectorName = selector
63+
}
64+
return selectorStruct
65+
}
66+
67+
type Combinator int
68+
69+
const (
70+
Descendant Combinator = iota
71+
Child
72+
NextSibling
73+
SubsequentSibling
74+
//if no combinator
75+
NoneCombinator
76+
)
77+
78+
type CombinatorEl struct {
79+
Type Combinator
80+
Selector1 Selector
81+
Selector2 Selector
82+
}
83+
84+
func TokenizeSelectorsAndCombinators(selector string) []CombinatorEl {
85+
list := make([]CombinatorEl, 0, 1)
86+
slice := strings.SplitSeq(selector, " ")
87+
currentCombinator := *new(CombinatorEl)
88+
currentCombinator.Selector1 = NewSelector("")
89+
for str := range slice {
90+
if strings.TrimSpace(str) == "" {
91+
continue
92+
}
93+
94+
switch str {
95+
case "+":
96+
currentCombinator.Type = NextSibling
97+
case ">":
98+
currentCombinator.Type = Child
99+
case "~":
100+
currentCombinator.Type = SubsequentSibling
101+
default:
102+
newSelector := NewSelector(str)
103+
currentCombinator.Selector2 = newSelector
104+
list = append(list, currentCombinator)
105+
currentCombinator = *new(CombinatorEl)
106+
currentCombinator.Selector1 = newSelector
107+
}
108+
109+
}
110+
111+
if len(list) == 1 {
112+
list[0].Type = NoneCombinator
113+
}
114+
115+
return list
116+
}
117+
118+
func (ce *CombinatorEl) IsMatchingNode(node *Node) bool {
119+
switch ce.Type {
120+
case Descendant:
121+
return ce.isDescended(node)
122+
case Child:
123+
return ce.isDirectChild(node)
124+
case NextSibling:
125+
return ce.isNextSibling(node)
126+
case SubsequentSibling:
127+
return ce.isSubsequentSibling(node)
128+
case NoneCombinator:
129+
return matchNode(node, ce.Selector2.selectorName, ce.Selector2.selectorType)
130+
}
131+
return false
132+
}
133+
134+
// isDescended returns wether the given node is a ce.Selector2 and descended of ce.Selector1.
135+
func (ce *CombinatorEl) isDescended(node *Node) bool {
136+
if !matchNode(node, ce.Selector2.selectorName, ce.Selector2.selectorType) {
137+
return false
138+
}
139+
140+
parentNode := node.GetParent()
141+
for parentNode != nil && !matchNode(parentNode, ce.Selector1.selectorName, ce.Selector1.selectorType) {
142+
parentNode = parentNode.GetParent()
143+
}
144+
return parentNode != nil
145+
}
146+
147+
// isDirectChild returns whether the given node is a direct child of ce.Selector1 and node is of ce.Selector2
148+
func (ce *CombinatorEl) isDirectChild(node *Node) bool {
149+
if node == nil {
150+
return false
151+
}
152+
153+
return matchNode(node, ce.Selector2.selectorName, ce.Selector2.selectorType) && matchNode(node.GetParent(), ce.Selector1.selectorName, ce.Selector1.selectorType)
154+
}
155+
156+
// isNextSibling return whether the given node is of ce.Selector2 and next sibling of ce.Selector1
157+
func (ce *CombinatorEl) isNextSibling(node *Node) bool {
158+
if node == nil {
159+
return false
160+
}
161+
162+
return matchNode(node, ce.Selector2.selectorName, ce.Selector2.selectorType) && matchNode(node.GetPreviousNode(), ce.Selector1.selectorName, ce.Selector1.selectorType)
163+
}
164+
165+
func (ce *CombinatorEl) isSubsequentSibling(node *Node) bool {
166+
if !matchNode(node, ce.Selector2.selector, ce.Selector2.selectorType) {
167+
return false
168+
}
169+
170+
traverser := NewTraverser(node)
171+
for traverser.GetCurrentNode() != nil && !matchNode(traverser.GetCurrentNode(), ce.Selector1.selector, ce.Selector1.selectorType) {
172+
traverser.Previous()
173+
}
174+
return matchNode(traverser.GetCurrentNode(), ce.Selector1.selector, ce.Selector1.selectorType)
175+
}

selectors_test.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
package GoHtml_test
2+
3+
import(
4+
"testing"
5+
"github.com/udan-jayanith/GoHTML"
6+
)
7+
8+
func TestTokenizeSelector(t *testing.T){
9+
slice := GoHtml.TokenizeSelectorsAndCombinators(".class-1 > .class-2 + .class-3 a")
10+
for _, el := range slice{
11+
t.Log(el)
12+
}
13+
}

tokenizer.go

Lines changed: 2 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ import (
88
"golang.org/x/net/html"
99
)
1010

11-
1211
// Tokenizer contains a *html.Tokenizer.
1312
type Tokenizer struct {
1413
z *html.Tokenizer
@@ -26,7 +25,7 @@ func (t *Tokenizer) Advanced() html.TokenType {
2625
return t.z.Next()
2726
}
2827

29-
// CurrentNode returns the current node.
28+
// CurrentNode returns the current node.
3029
// Returned value can be nil regardless of tt.
3130
func (t *Tokenizer) GetCurrentNode() *Node {
3231
currentToken := t.z.Token()
@@ -85,7 +84,7 @@ func (ntb *NodeTreeBuilder) WriteNodeTree(node *Node, tt html.TokenType) {
8584
if node == nil {
8685
return
8786
}
88-
87+
8988
if isTopNode(ntb.currentNode, ntb.stack) {
9089
ntb.currentNode.AppendChild(node)
9190
} else {
@@ -120,83 +119,4 @@ func isTopNode(node *Node, stack *linkedliststack.Stack) bool {
120119

121120
topNode := val.(*Node)
122121
return topNode == node
123-
}
124-
125-
// QueryToken types
126-
const (
127-
Id int = iota
128-
Tag
129-
Class
130-
)
131-
132-
// QueryToken store data about basic css selectors(ids, classes, tags).
133-
type QueryToken struct {
134-
Type int
135-
SelectorName string
136-
Selector string
137-
}
138-
139-
/*
140-
TokenizeQuery tokenizes the query and returns a list of QueryToken.
141-
142-
query should be of only consists of class, tag and/or id. This applies to every function that accepts a parameter name query.
143-
query should not consists of css selectors, Combinators and separators.
144-
*/
145-
func TokenizeQuery(query string) []QueryToken {
146-
slice := make([]QueryToken, 0, 1)
147-
if strings.TrimSpace(query) == "" {
148-
return slice
149-
}
150-
151-
iter := strings.SplitSeq(query, " ")
152-
for sec := range iter {
153-
token := QueryToken{}
154-
switch sec {
155-
case "", " ", ".", "#":
156-
continue
157-
}
158-
159-
switch string(sec[0]) {
160-
case ".":
161-
token.Type = Class
162-
token.SelectorName = sec[1:]
163-
case "#":
164-
token.Type = Id
165-
token.SelectorName = sec[1:]
166-
default:
167-
token.Type = Tag
168-
token.SelectorName = sec
169-
}
170-
token.Selector = sec
171-
slice = append(slice, token)
172-
}
173-
174-
return slice
175-
}
176-
177-
// matchQueryTokens returns wether the queryTokens match given the node.
178-
func matchQueryTokens(node *Node, queryTokens []QueryToken) bool {
179-
if len(queryTokens) == 0 {
180-
return false
181-
}
182-
classList := NewClassList()
183-
classList.DecodeFrom(node)
184-
for _, token := range queryTokens {
185-
switch token.Type {
186-
case Id:
187-
idName, _ := node.GetAttribute("id")
188-
if token.SelectorName != idName {
189-
return false
190-
}
191-
case Tag:
192-
if node.GetTagName() != token.SelectorName {
193-
return false
194-
}
195-
case Class:
196-
if !classList.Contains(token.SelectorName) {
197-
return false
198-
}
199-
}
200-
}
201-
return true
202122
}

0 commit comments

Comments
 (0)