Skip to content

Commit c16f5fc

Browse files
committed
Add new selectors. Need manual testing. Closest is temparorly commented for devolopment of selectors and combinators. Once selectors and combinatore are done testing closest will work again.
1 parent 985b84d commit c16f5fc

File tree

3 files changed

+54
-99
lines changed

3 files changed

+54
-99
lines changed

querying.go

Lines changed: 11 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
package GoHtml
22

33
import (
4-
//"iter"
4+
"iter"
55
"strings"
66
)
77

@@ -103,23 +103,14 @@ func (node *Node) GetElementsById(idName string) NodeList {
103103
/*
104104
QuerySearch tokenizes the query string and search for nodes that matches with the right most query token. After matching right most query it proceeds to match nodes parents nodes for left over tokens and then passed that node to (yield/range). QuerySearch search the whole node tree for matches unless yield get canceled or range iterator get cancel.
105105
*/
106-
/*
107-
func QuerySearch(node *Node, query string) iter.Seq[*Node] {
106+
107+
func QuerySearch(node *Node, selector string) iter.Seq[*Node] {
108108
traverser := NewTraverser(node)
109109
return func(yield func(node *Node) bool) {
110-
queryTokens := TokenizeQuery(query)
110+
selectorTokens := TokenizeSelectorsAndCombinators(selector)
111111
iter := traverser.Walkthrough
112112
for node := range iter {
113-
i := matchFromRightMostQueryToken(node, queryTokens, len(queryTokens)-1)
114-
if i == len(queryTokens)-1{
115-
continue
116-
}
117-
parentNode := node.GetParent()
118-
for parentNode != nil && i>=0 {
119-
i = matchFromRightMostQueryToken(parentNode, queryTokens, i)
120-
parentNode = parentNode.GetParent()
121-
}
122-
if i < 0 && !yield(node){
113+
if matchFromRightMostSelectors(node, selectorTokens) && !yield(node) {
123114
return
124115
}
125116
}
@@ -128,40 +119,17 @@ func QuerySearch(node *Node, query string) iter.Seq[*Node] {
128119
}
129120

130121
// matchFromRightMostQueryToken tries to match query tokens from right to left and return the index at which point query token last matched.
131-
func matchFromRightMostQueryToken(node *Node, queryTokens []QueryToken, i int) int {
132-
classList := NewClassList()
133-
classList.DecodeFrom(node)
134-
checked := make(map[string]struct{})
135-
outer:
136-
for i >= 0 {
137-
token := queryTokens[i]
138-
_, ok := checked[token.Selector]
139-
if ok {
122+
func matchFromRightMostSelectors(node *Node, selectorTokens []CombinatorEl) bool {
123+
for i := len(selectorTokens) - 1; i >= 0; i-- {
124+
if node == nil {
140125
break
141-
} else {
142-
checked[token.Selector] = struct{}{}
143-
}
144-
145-
switch token.Type {
146-
case Id:
147-
idName, _ := node.GetAttribute("id")
148-
if token.SelectorName != idName {
149-
break outer
150-
}
151-
case Class:
152-
if !classList.Contains(token.SelectorName) {
153-
break outer
154-
}
155-
case Tag:
156-
if node.GetTagName() != token.SelectorName {
157-
break outer
158-
}
159126
}
160-
i--
127+
node = selectorTokens[i].getMatchingNode(node)
161128
}
162-
return i
129+
return node != nil
163130
}
164131

132+
165133
// QuerySelector only returns the first node that matches with the QuerySearch.
166134
func (node *Node) QuerySelector(query string) *Node {
167135
iter := QuerySearch(node, query)
@@ -182,4 +150,3 @@ func (node *Node) QuerySelectorAll(query string) NodeList {
182150
return nodeList
183151
}
184152

185-
*/

querying_test.go

Lines changed: 2 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -138,45 +138,17 @@ func TestGetElementsById(t *testing.T) {
138138
}
139139
}
140140

141-
/*
142-
func TestSelectorTokenizer(t *testing.T) {
143-
stack := linkedliststack.New()
144-
stack.Push("article .content")
145-
stack.Push("article p h1")
146-
stack.Push("article p")
147-
stack.Push(".title #user")
148-
stack.Push("#user title .title-1")
149-
150-
for stack.Size() > 0 {
151-
val, _ := stack.Pop()
152-
selector := val.(string)
153-
154-
tokens := GoHtml.TokenizeQuery(selector)
155-
s := ""
156-
for _, token := range tokens {
157-
if s == "" {
158-
s += token.Selector
159-
} else {
160-
s += " " + token.Selector
161-
}
162-
}
163-
164-
if s != selector {
165-
t.Fatal("Expected ", selector, "but got", s)
166-
}
167-
}
168-
}
169-
170141
func TestQuerySelector(t *testing.T) {
171142
node, err := testFile4NodeTree()
172143
if err != nil {
173144
t.Fatal(err)
174145
return
175146
}
176-
node = node.QuerySelector("html .ordered-list ol li .ordered-item")
147+
node = node.QuerySelector("html ol li")
177148
if node == nil {
178149
t.Fatal("Node is nill after QuerySelector")
179150
} else if node.GetInnerText() != "Apple" {
151+
t.Log(node)
180152
t.Fatal("Unexpected text")
181153
}
182154
}
@@ -211,4 +183,3 @@ func TestQuerySelectorAll(t *testing.T) {
211183
}
212184
}
213185

214-
*/

selectors.go

Lines changed: 41 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ type Selector struct {
1919
}
2020

2121
func matchNode(node *Node, basicSelectorName string, basicSelectorType BasicSelector) bool {
22-
if basicSelectorName == ""{
22+
if basicSelectorName == "" {
2323
return true
24-
}else if node == nil {
24+
} else if node == nil {
2525
return false
2626
}
2727

@@ -86,6 +86,7 @@ func TokenizeSelectorsAndCombinators(selector string) []CombinatorEl {
8686
slice := strings.SplitSeq(selector, " ")
8787
currentCombinator := *new(CombinatorEl)
8888
currentCombinator.Selector1 = NewSelector("")
89+
currentCombinator.Type = NoneCombinator
8990
for str := range slice {
9091
if strings.TrimSpace(str) == "" {
9192
continue
@@ -115,61 +116,77 @@ func TokenizeSelectorsAndCombinators(selector string) []CombinatorEl {
115116
return list
116117
}
117118

118-
func (ce *CombinatorEl) IsMatchingNode(node *Node) bool {
119+
func (ce *CombinatorEl) getMatchingNode(node *Node) *Node {
119120
switch ce.Type {
120121
case Descendant:
121-
return ce.isDescended(node)
122+
return ce.getDescended(node)
122123
case Child:
123-
return ce.isDirectChild(node)
124+
return ce.getDirectChild(node)
124125
case NextSibling:
125-
return ce.isNextSibling(node)
126+
return ce.getNextSibling(node)
126127
case SubsequentSibling:
127-
return ce.isSubsequentSibling(node)
128+
return ce.getSubsequentSibling(node)
128129
case NoneCombinator:
129-
return matchNode(node, ce.Selector2.selectorName, ce.Selector2.selectorType)
130+
if matchNode(node, ce.Selector2.selectorName, ce.Selector2.selectorType) {
131+
return node
132+
}
130133
}
131-
return false
134+
return nil
132135
}
133136

134137
// isDescended returns wether the given node is a ce.Selector2 and descended of ce.Selector1.
135-
func (ce *CombinatorEl) isDescended(node *Node) bool {
138+
func (ce *CombinatorEl) getDescended(node *Node) *Node {
136139
if !matchNode(node, ce.Selector2.selectorName, ce.Selector2.selectorType) {
137-
return false
140+
return nil
138141
}
139142

140143
parentNode := node.GetParent()
141-
for parentNode != nil && !matchNode(parentNode, ce.Selector1.selectorName, ce.Selector1.selectorType) {
144+
for parentNode != nil {
145+
if matchNode(parentNode, ce.Selector1.selectorName, ce.Selector1.selectorType) {
146+
return parentNode
147+
}
142148
parentNode = parentNode.GetParent()
143149
}
144-
return parentNode != nil
150+
return nil
145151
}
146152

147153
// isDirectChild returns whether the given node is a direct child of ce.Selector1 and node is of ce.Selector2
148-
func (ce *CombinatorEl) isDirectChild(node *Node) bool {
154+
func (ce *CombinatorEl) getDirectChild(node *Node) *Node {
149155
if node == nil {
150-
return false
156+
return nil
151157
}
152158

153-
return matchNode(node, ce.Selector2.selectorName, ce.Selector2.selectorType) && matchNode(node.GetParent(), ce.Selector1.selectorName, ce.Selector1.selectorType)
159+
if matchNode(node, ce.Selector2.selectorName, ce.Selector2.selectorType) &&
160+
matchNode(node.GetParent(), ce.Selector1.selectorName, ce.Selector1.selectorType) {
161+
return node.GetParent()
162+
}
163+
return nil
154164
}
155165

156166
// isNextSibling return whether the given node is of ce.Selector2 and next sibling of ce.Selector1
157-
func (ce *CombinatorEl) isNextSibling(node *Node) bool {
167+
func (ce *CombinatorEl) getNextSibling(node *Node) *Node {
158168
if node == nil {
159-
return false
169+
return nil
160170
}
161171

162-
return matchNode(node, ce.Selector2.selectorName, ce.Selector2.selectorType) && matchNode(node.GetPreviousNode(), ce.Selector1.selectorName, ce.Selector1.selectorType)
172+
if matchNode(node, ce.Selector2.selectorName, ce.Selector2.selectorType) &&
173+
matchNode(node.GetPreviousNode(), ce.Selector1.selectorName, ce.Selector1.selectorType) {
174+
return node.GetPreviousNode()
175+
}
176+
return nil
163177
}
164178

165-
func (ce *CombinatorEl) isSubsequentSibling(node *Node) bool {
166-
if !matchNode(node, ce.Selector2.selector, ce.Selector2.selectorType) {
167-
return false
179+
func (ce *CombinatorEl) getSubsequentSibling(node *Node) *Node {
180+
if node == nil || !matchNode(node, ce.Selector2.selector, ce.Selector2.selectorType) {
181+
return nil
168182
}
169183

170184
traverser := NewTraverser(node)
171-
for traverser.GetCurrentNode() != nil && !matchNode(traverser.GetCurrentNode(), ce.Selector1.selector, ce.Selector1.selectorType) {
185+
for traverser.GetCurrentNode() != nil {
186+
if matchNode(traverser.GetCurrentNode(), ce.Selector1.selector, ce.Selector1.selectorType){
187+
return traverser.GetCurrentNode()
188+
}
172189
traverser.Previous()
173190
}
174-
return matchNode(traverser.GetCurrentNode(), ce.Selector1.selector, ce.Selector1.selectorType)
191+
return nil
175192
}

0 commit comments

Comments
 (0)