Skip to content

Commit 09d931b

Browse files
committed
design octopus v1.3 - working design
1 parent 622da72 commit 09d931b

File tree

4 files changed

+7
-48
lines changed

4 files changed

+7
-48
lines changed

main.go

Lines changed: 1 addition & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -22,35 +22,14 @@ func main() {
2222
// exp.Test_makeLinksAbsolute()
2323
// runPipeline()
2424
// runPipelineWithOptions()
25-
// octopusTest()
26-
stupid()
25+
octopusTest()
2726
}
2827

29-
func stupid() {
30-
// resp, err := http.Head(HomeUrl)
31-
// if err == nil && resp == nil {
32-
// log.Fatal("WOW resp is nill although err is not")
33-
// }
34-
// if err == nil && resp != nil && resp.StatusCode == 200 {
35-
// fmt.Printf("%s\n", resp.Status)
36-
// }
37-
resp, err := http.Head("https://en.wikipedia.org/wiki/Main_Page")
38-
fmt.Println("A")
39-
if err == nil && resp == nil {
40-
log.Fatal("WOW resp is nill although err is not")
41-
}
42-
fmt.Println("B")
43-
if err == nil && resp.StatusCode == 200 {
44-
fmt.Printf("\nXX%s\n", resp.Status)
45-
}
46-
fmt.Println("C")
47-
}
4828
func octopusTest() {
4929
outputAdapter := &adapter.StdOpAdapter{}
5030
// outputAdapter := &adapter.FileWriterAdapter{"crawl_output.txt"}
5131

5232
crawlOpt := oct.GetDefaultCrawlOptions()
53-
crawlOpt.MaxCrawlDepth = 3
5433
crawlOpt.OpAdapter = outputAdapter
5534

5635
octopus := oct.New(crawlOpt)

octopus/core.go

Lines changed: 5 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -41,25 +41,23 @@ func (o *octopus) SetupSystem() {
4141
ingestQuitCh,
4242
}
4343

44+
o.inputUrlStrChan = ingestStrCh
45+
o.masterQuitCh = make(chan int, 1)
46+
4447
outAdapterChSet := o.OpAdapter.Consume()
4548

4649
pageParseChSet := o.makeParseNodeFromHtmlPipe(ingestChSet)
4750
depthLimitChSet := o.makeCrawlDepthFilterPipe(pageParseChSet)
48-
// maxDelayChSet := o.makeMaxDelayPipe(depthLimitChSet)
49-
// distributorChSet := o.makeDistributorPipe(maxDelayChSet, outAdapterChSet)
50-
distributorChSet := o.makeDistributorPipe(depthLimitChSet, outAdapterChSet)
51+
maxDelayChSet := o.makeMaxDelayPipe(depthLimitChSet)
52+
distributorChSet := o.makeDistributorPipe(maxDelayChSet, outAdapterChSet)
5153
pageReqChSet := o.makePageRequisitionPipe(distributorChSet)
5254
invUrlFilterChSet := o.makeInvalidUrlFilterPipe(pageReqChSet)
5355
dupFilterChSet := o.makeDuplicateUrlFilterPipe(invUrlFilterChSet)
5456
protoFilterChSet := o.makeUrlProtocolFilterPipe(dupFilterChSet)
5557
linkAbsChSet := o.makeLinkAbsolutionPipe(protoFilterChSet)
5658

57-
5859
o.makeIngestPipe(inPipeChSet, linkAbsChSet)
5960

60-
o.inputUrlStrChan = ingestStrCh
61-
o.masterQuitCh = make(chan int, 1)
62-
6361
<-time.After(500 * time.Millisecond)
6462
o.isReady = true
6563
}
@@ -71,19 +69,6 @@ func (o *octopus) BeginCrawling(baseUrlStr string) {
7169
go func() {
7270
o.inputUrlStrChan <- baseUrlStr
7371
}()
74-
// for {
75-
// select {
76-
// // case urlStr := <-o.inputUrlStrChan:
77-
// // {
78-
// // o.inputUrlStrChan <- urlStr
79-
// // }
80-
// case <-o.masterQuitCh:
81-
// {
82-
// fmt.Println("Master Kill Switch Activated")
83-
// return
84-
// }
85-
// }
86-
// }
8772
<-o.masterQuitCh
8873
fmt.Println("Master Kill Switch Activated")
8974
}

octopus/modelfactory.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ const (
66
defaultMaxDepth int64 = 2
77
anchorTag = "a"
88
anchorAttrb = "href"
9-
defaultTimeToQuit = 10
9+
defaultTimeToQuit = 5
1010
)
1111

1212
// NewWithDefaultOptions - Create an Instance of the Octopus with the default CrawlOptions.
Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
package octopus
22

33
import (
4-
"log"
54
"net/http"
65
)
76

@@ -11,11 +10,7 @@ func (o *octopus) makeInvalidUrlFilterPipe(outChSet *NodeChSet) *NodeChSet {
1110

1211
func validateUrl(node *Node, outChSet *NodeChSet) {
1312
resp, err := http.Head(node.UrlString)
14-
if err == nil && resp == nil {
15-
log.Fatal("WOW resp is nill although err is not")
16-
}
1713
if err == nil && resp != nil && resp.StatusCode == 200 {
1814
outChSet.NodeCh <- node
1915
}
20-
// log.Printf("%v\n", err)
2116
}

0 commit comments

Comments
 (0)