Skip to content

Commit d3bbf16

Browse files
committed
exp pipeline v7 - added depth to node
1 parent cf2218f commit d3bbf16

File tree

7 files changed

+12
-8
lines changed

7 files changed

+12
-8
lines changed

experimental/PIPELINE_ZDESIGN.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11

22
Pipeline
33

4-
url => structure => remove duplicates => validate urls => make request => parse page for urls ||
5-
^ => output adapter ||
6-
^===================================================================================
4+
url => structure => absolutify links => remove duplicates => validate urls => make request => parse page for urls ||
5+
^ => output adapter ||
6+
^=====================================================================================================
77

88

99
1. Composition

experimental/model.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ type Node struct {
1010
type ReqProp struct {
1111
ParentUrl string
1212
UrlStr string
13+
Depth int
1314
}
1415

1516
type Monster struct {

experimental/pipeline.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@ func (m *Monster) BuildSystem(opAdapterPipe chan<- *Node) {
1212
parsePipe, compPipeChan := MakeParsingPipe()
1313
var reqPipe chan<- *Node
1414
if opAdapterPipe == nil {
15-
reqPipe = MakeRequistionPipe(parsePipe, nil)
15+
reqPipe = MakeRequisitionPipe(parsePipe, nil)
1616
} else {
17-
reqPipe = MakeRequistionPipe(parsePipe, opAdapterPipe)
17+
reqPipe = MakeRequisitionPipe(parsePipe, opAdapterPipe)
1818
}
1919
validationPipe := MakeUrlValidationPipe(reqPipe)
2020
unduplPipe := MakeUnduplicationPipe(validationPipe)
@@ -34,6 +34,7 @@ func (m *Monster) StartCrawling(baseUrlString string) {
3434
m.compPipe <- &ReqProp{
3535
"",
3636
urlStr,
37+
0,
3738
}
3839
}
3940
}

experimental/pipeline_cleaning.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ func Test_makeLinksAbsolute() {
4141
&ReqProp{
4242
"https://en.wikipedia.org/wiki/Main_Page",
4343
"/wiki/Caijia_language",
44+
1,
4445
},
4546
nil,
4647
}

experimental/pipeline_parsing.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ func parsePage(node *Node, compositionPipe chan<- *ReqProp) {
3535
compositionPipe <- &ReqProp{
3636
ParentUrl: node.UrlStr,
3737
UrlStr: attr.Val,
38+
Depth: node.Depth + 1,
3839
}
3940
}
4041
}

experimental/pipeline_requisition.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ package experimental
22

33
import "net/http"
44

5-
func MakeRequistionPipe(parsePipe chan<- *Node, opAdapterPipe chan<- *Node) chan<- *Node {
5+
func MakeRequisitionPipe(parsePipe chan<- *Node, opAdapterPipe chan<- *Node) chan<- *Node {
66
requisitionPipe := make(chan *Node)
77
go func() {
88
for {

octopus/models.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ type Node struct {
1212
Depth int
1313
}
1414

15-
// octopus is a concurrent version of webSpider.
16-
// It has an inbuilt parser based of htmlparser.Parser to collect all links in a web-page.
15+
// octopus is a concurrent web crawler.
16+
// It has an inbuilt parser based of html.NewTokenizer to collect all links in a web-page.
1717
// It also has a CrawlOptions structure to initialize setting specific
1818
// to an instance of the crawler.
1919
type octopus struct {

0 commit comments

Comments
 (0)