Skip to content

Commit 79ae385

Browse files
committed
exp pipeline v8 - depth limited crawling via options
1 parent d3bbf16 commit 79ae385

File tree

10 files changed

+63
-19
lines changed

10 files changed

+63
-19
lines changed

experimental/model.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,12 @@ type ReqProp struct {
1313
Depth int
1414
}
1515

16+
type Options struct {
17+
MaxDepth int
18+
}
19+
1620
type Monster struct {
21+
*Options
1722
listenPipe chan string
1823
compPipe chan<- *ReqProp
1924
}

experimental/pipeline.go

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,43 @@
11
package experimental
22

3+
func NewMonsterWithOptions(options *Options) *Monster {
4+
listenChan := make(chan string)
5+
var opt *Options
6+
if options != nil {
7+
opt = options
8+
} else {
9+
opt = &Options{
10+
-1,
11+
}
12+
}
13+
return &Monster{
14+
Options: opt,
15+
listenPipe: listenChan,
16+
compPipe: nil,
17+
}
18+
}
19+
320
func NewMonster() *Monster {
421
listenChan := make(chan string)
522
return &Monster{
6-
listenChan,
7-
nil,
23+
Options: nil,
24+
listenPipe: listenChan,
25+
compPipe: nil,
826
}
927
}
1028

1129
func (m *Monster) BuildSystem(opAdapterPipe chan<- *Node) {
12-
parsePipe, compPipeChan := MakeParsingPipe()
30+
parsePipe, compPipeChan := m.MakeParsingPipe()
1331
var reqPipe chan<- *Node
1432
if opAdapterPipe == nil {
15-
reqPipe = MakeRequisitionPipe(parsePipe, nil)
33+
reqPipe = m.MakeRequisitionPipe(parsePipe, nil)
1634
} else {
17-
reqPipe = MakeRequisitionPipe(parsePipe, opAdapterPipe)
35+
reqPipe = m.MakeRequisitionPipe(parsePipe, opAdapterPipe)
1836
}
19-
validationPipe := MakeUrlValidationPipe(reqPipe)
20-
unduplPipe := MakeUnduplicationPipe(validationPipe)
21-
cleanPipe := MakeLinkCleaningPipe(unduplPipe)
22-
compPipe := MakeCompositionPipe(cleanPipe)
37+
validationPipe := m.MakeUrlValidationPipe(reqPipe)
38+
unduplPipe := m.MakeUnduplicationPipe(validationPipe)
39+
cleanPipe := m.MakeLinkCleaningPipe(unduplPipe)
40+
compPipe := m.MakeCompositionPipe(cleanPipe)
2341
compPipeChan <- compPipe
2442
m.compPipe = compPipe
2543
}

experimental/pipeline_adapter.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,6 @@ func HandleOutput(opAdapterPipe <-chan *Node) {
1313
for {
1414
opNode := <-opAdapterPipe
1515
count++
16-
fmt.Printf("%d - %v\n", count, opNode.UrlStr)
16+
fmt.Printf("%d - %d - %v\n", count, opNode.Depth, opNode.UrlStr)
1717
}
1818
}

experimental/pipeline_cleaning.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import (
55
"net/url"
66
)
77

8-
func MakeLinkCleaningPipe(undupPipe chan<- *Node) chan<- *Node {
8+
func (m *Monster) MakeLinkCleaningPipe(undupPipe chan<- *Node) chan<- *Node {
99
cleanPipe := make(chan *Node)
1010
go func() {
1111
for {
Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,32 @@
11
package experimental
22

3-
func MakeCompositionPipe(cleanPipe chan<- *Node) chan<- *ReqProp {
3+
func (m *Monster) MakeCompositionPipe(cleanPipe chan<- *Node) chan<- *ReqProp {
44
compositionPipe := make(chan *ReqProp)
5+
structFunc := structurize
6+
if m.MaxDepth > 0 {
7+
structFunc = structurizeWithDepth
8+
}
59
go func() {
610
for {
711
select {
812
case req := <-compositionPipe:
9-
go structurize(req, cleanPipe)
13+
go structFunc(m.MaxDepth, req, cleanPipe)
1014
}
1115
}
1216
}()
1317
return compositionPipe
1418
}
1519

16-
func structurize(reqProp *ReqProp, cleanPipe chan<- *Node) {
20+
func structurize(maxDepth int, reqProp *ReqProp, cleanPipe chan<- *Node) {
1721
node := &Node{
1822
reqProp,
1923
nil,
2024
}
2125
cleanPipe <- node
2226
}
27+
28+
func structurizeWithDepth(maxDepth int, reqProp *ReqProp, cleanPipe chan<- *Node) {
29+
if reqProp.Depth <= maxDepth {
30+
structurize(maxDepth, reqProp, cleanPipe)
31+
}
32+
}

experimental/pipeline_parsing.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ package experimental
22

33
import "golang.org/x/net/html"
44

5-
func MakeParsingPipe() (chan<- *Node, chan<- chan<- *ReqProp) {
5+
func (m *Monster) MakeParsingPipe() (chan<- *Node, chan<- chan<- *ReqProp) {
66
parsePipe := make(chan *Node)
77
compPipeChan := make(chan chan<- *ReqProp)
88
go func() {

experimental/pipeline_requisition.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ package experimental
22

33
import "net/http"
44

5-
func MakeRequisitionPipe(parsePipe chan<- *Node, opAdapterPipe chan<- *Node) chan<- *Node {
5+
func (m *Monster) MakeRequisitionPipe(parsePipe chan<- *Node, opAdapterPipe chan<- *Node) chan<- *Node {
66
requisitionPipe := make(chan *Node)
77
go func() {
88
for {

experimental/pipeline_unduplication.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ package experimental
22

33
import "sync"
44

5-
func MakeUnduplicationPipe(validationPipe chan<- *Node) chan<- *Node {
5+
func (m *Monster) MakeUnduplicationPipe(validationPipe chan<- *Node) chan<- *Node {
66
var visitMap sync.Map
77
undupPipe := make(chan *Node)
88
go func() {

experimental/pipeline_urlvalidation.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import (
55
"net/http"
66
)
77

8-
func MakeUrlValidationPipe(requisitionPipe chan<- *Node) chan<- *Node {
8+
func (m *Monster) MakeUrlValidationPipe(requisitionPipe chan<- *Node) chan<- *Node {
99
validationPipe := make(chan *Node)
1010
go func() {
1111
for {

main.go

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@ const (
1717

1818
func main() {
1919
//exp.Test_makeLinksAbsolute()
20-
runPipeline()
20+
//runPipeline()
21+
runPipelineWithOptions()
2122
}
2223

2324
func checkPipelineA() {
@@ -63,3 +64,13 @@ func runPipeline() {
6364
crawler.BuildSystem(opAdapterPipe)
6465
crawler.StartCrawling(LessLinkUrl)
6566
}
67+
68+
func runPipelineWithOptions() {
69+
opt := &exp.Options{
70+
MaxDepth: 3,
71+
}
72+
crawler := exp.NewMonsterWithOptions(opt)
73+
opAdapterPipe := exp.GetOutputAdapterPipe()
74+
crawler.BuildSystem(opAdapterPipe)
75+
crawler.StartCrawling(LessLinkUrl)
76+
}

0 commit comments

Comments
 (0)