Skip to content

Commit 622da72

Browse files
committed
recovering from the git fiasco
1 parent fe83c08 commit 622da72

File tree

9 files changed

+115
-47
lines changed

9 files changed

+115
-47
lines changed

adapter/basicadapters.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,6 @@ func (fw *FileWriterAdapter) writeToFile(listenCh chan *oct.Node,
7373
}
7474

7575
func (fw *FileWriterAdapter) getFilePointer() (w io.WriteCloser, err error) {
76-
w, err = os.OpenFile(fw.FilePath, os.O_RDWR|os.O_CREATE, 0755)
76+
w, err = os.OpenFile(fw.FilePath, os.O_RDWR|os.O_CREATE, 0644)
7777
return
7878
}

main.go

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ import (
66
"net/http"
77
"time"
88

9-
adapter2 "github.com/rapidclock/web-octopus/adapter"
9+
"github.com/rapidclock/web-octopus/adapter"
1010
exp "github.com/rapidclock/web-octopus/experimental"
1111
oct "github.com/rapidclock/web-octopus/octopus"
1212
)
@@ -22,18 +22,40 @@ func main() {
2222
// exp.Test_makeLinksAbsolute()
2323
// runPipeline()
2424
// runPipelineWithOptions()
25-
octopusTest()
25+
// octopusTest()
26+
stupid()
2627
}
2728

29+
func stupid() {
30+
// resp, err := http.Head(HomeUrl)
31+
// if err == nil && resp == nil {
32+
// log.Fatal("WOW resp is nill although err is not")
33+
// }
34+
// if err == nil && resp != nil && resp.StatusCode == 200 {
35+
// fmt.Printf("%s\n", resp.Status)
36+
// }
37+
resp, err := http.Head("https://en.wikipedia.org/wiki/Main_Page")
38+
fmt.Println("A")
39+
if err == nil && resp == nil {
40+
log.Fatal("WOW resp is nill although err is not")
41+
}
42+
fmt.Println("B")
43+
if err == nil && resp.StatusCode == 200 {
44+
fmt.Printf("\nXX%s\n", resp.Status)
45+
}
46+
fmt.Println("C")
47+
}
2848
func octopusTest() {
29-
adapter := &adapter2.StdOpAdapter{}
49+
outputAdapter := &adapter.StdOpAdapter{}
50+
// outputAdapter := &adapter.FileWriterAdapter{"crawl_output.txt"}
3051

3152
crawlOpt := oct.GetDefaultCrawlOptions()
32-
crawlOpt.OpAdapter = adapter
53+
crawlOpt.MaxCrawlDepth = 3
54+
crawlOpt.OpAdapter = outputAdapter
3355

3456
octopus := oct.New(crawlOpt)
3557
octopus.SetupSystem()
36-
octopus.BeginCrawling(Url2)
58+
octopus.BeginCrawling(Url1)
3759
}
3860

3961
func checkPipelineA() {

octopus/core.go

Lines changed: 43 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -20,21 +20,23 @@ func (o *octopus) setupValidProtocolMap() {
2020

2121
func (o *octopus) setupTimeToQuit() {
2222
if o.TimeToQuit > 0 {
23-
o.timeToQuit = time.Duration(o.TimeToQuit)
23+
o.timeToQuit = time.Duration(o.TimeToQuit) * time.Second
2424
} else {
2525
log.Fatalln("TimeToQuit is not greater than 0")
2626
}
2727
}
2828

2929
func (o *octopus) SetupSystem() {
30+
o.isReady = false
3031
o.setupOctopus()
3132

32-
ingestCh := make(chan *Node)
33+
ingestNodeCh := make(chan *Node)
3334
ingestQuitCh := make(chan int, 1)
34-
ingestChSet := MakeNodeChSet(ingestCh, ingestQuitCh)
3535
ingestStrCh := make(chan string)
36+
37+
ingestChSet := MakeNodeChSet(ingestNodeCh, ingestQuitCh)
3638
inPipeChSet := &ingestPipeChSet{
37-
ingestCh,
39+
ingestNodeCh,
3840
ingestStrCh,
3941
ingestQuitCh,
4042
}
@@ -43,17 +45,22 @@ func (o *octopus) SetupSystem() {
4345

4446
pageParseChSet := o.makeParseNodeFromHtmlPipe(ingestChSet)
4547
depthLimitChSet := o.makeCrawlDepthFilterPipe(pageParseChSet)
48+
// maxDelayChSet := o.makeMaxDelayPipe(depthLimitChSet)
49+
// distributorChSet := o.makeDistributorPipe(maxDelayChSet, outAdapterChSet)
4650
distributorChSet := o.makeDistributorPipe(depthLimitChSet, outAdapterChSet)
4751
pageReqChSet := o.makePageRequisitionPipe(distributorChSet)
4852
invUrlFilterChSet := o.makeInvalidUrlFilterPipe(pageReqChSet)
4953
dupFilterChSet := o.makeDuplicateUrlFilterPipe(invUrlFilterChSet)
5054
protoFilterChSet := o.makeUrlProtocolFilterPipe(dupFilterChSet)
5155
linkAbsChSet := o.makeLinkAbsolutionPipe(protoFilterChSet)
5256

57+
5358
o.makeIngestPipe(inPipeChSet, linkAbsChSet)
5459

55-
o.inpUrlStrChan = ingestStrCh
60+
o.inputUrlStrChan = ingestStrCh
5661
o.masterQuitCh = make(chan int, 1)
62+
63+
<-time.After(500 * time.Millisecond)
5764
o.isReady = true
5865
}
5966

@@ -62,19 +69,37 @@ func (o *octopus) BeginCrawling(baseUrlStr string) {
6269
log.Fatal("Call BuildSystem first to setup Octopus")
6370
}
6471
go func() {
65-
o.inpUrlStrChan <- baseUrlStr
72+
o.inputUrlStrChan <- baseUrlStr
6673
}()
67-
for {
68-
select {
69-
case urlStr := <-o.inpUrlStrChan:
70-
{
71-
o.inpUrlStrChan <- urlStr
72-
}
73-
case <-o.masterQuitCh:
74-
{
75-
fmt.Println("Master Kill Switch Activated")
76-
return
77-
}
78-
}
74+
// for {
75+
// select {
76+
// // case urlStr := <-o.inputUrlStrChan:
77+
// // {
78+
// // o.inputUrlStrChan <- urlStr
79+
// // }
80+
// case <-o.masterQuitCh:
81+
// {
82+
// fmt.Println("Master Kill Switch Activated")
83+
// return
84+
// }
85+
// }
86+
// }
87+
<-o.masterQuitCh
88+
fmt.Println("Master Kill Switch Activated")
89+
}
90+
91+
func (o *octopus) GetInputUrlStrChan() chan<- string {
92+
if o.isReady {
93+
return o.inputUrlStrChan
94+
} else {
95+
return nil
96+
}
97+
}
98+
99+
func (o *octopus) GetMasterQuitChan() chan<- int {
100+
if o.isReady {
101+
return o.masterQuitCh
102+
} else {
103+
return nil
79104
}
80105
}

octopus/modelfactory.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ const (
66
defaultMaxDepth int64 = 2
77
anchorTag = "a"
88
anchorAttrb = "href"
9-
defaultTimeToQuit = 5
9+
defaultTimeToQuit = 10
1010
)
1111

1212
// NewWithDefaultOptions - Create an Instance of the Octopus with the default CrawlOptions.

octopus/models.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ type octopus struct {
1717
adapterChSet *NodeChSet
1818
isValidProtocol map[string]bool
1919
timeToQuit time.Duration
20-
inpUrlStrChan chan string
20+
inputUrlStrChan chan string
2121
masterQuitCh chan int
2222
}
2323

octopus/pipe_augment_linkabsolution.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package octopus
22

33
import (
4+
"log"
45
"net/url"
56
)
67

@@ -9,6 +10,10 @@ func (o *octopus) makeLinkAbsolutionPipe(outChSet *NodeChSet) *NodeChSet {
910
}
1011

1112
func makeLinkAbsolute(node *Node, outChSet *NodeChSet) {
13+
if node == nil || outChSet == nil {
14+
log.Fatal("NIL ERROR")
15+
return
16+
}
1217
if node.ParentUrlString != "" {
1318
linkUrl, err := url.Parse(node.UrlString)
1419
if err != nil {

octopus/pipe_filter_urlvalidation.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package octopus
22

33
import (
4+
"log"
45
"net/http"
56
)
67

@@ -10,7 +11,10 @@ func (o *octopus) makeInvalidUrlFilterPipe(outChSet *NodeChSet) *NodeChSet {
1011

1112
func validateUrl(node *Node, outChSet *NodeChSet) {
1213
resp, err := http.Head(node.UrlString)
13-
if err == nil && resp.StatusCode == 200 {
14+
if err == nil && resp == nil {
15+
log.Fatal("WOW resp is nill although err is not")
16+
}
17+
if err == nil && resp != nil && resp.StatusCode == 200 {
1418
outChSet.NodeCh <- node
1519
}
1620
// log.Printf("%v\n", err)

octopus/pipe_spl_ingest.go

Lines changed: 29 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
package octopus
22

33
import (
4-
"fmt"
54
"time"
65
)
76

@@ -18,33 +17,45 @@ func setupStringIngestPipe(inChSet *ingestPipeChSet, nodeOpChSet *NodeChSet,
1817
{
1918
nodeOpChSet.NodeCh <- createNode("", str, 1)
2019
}
21-
// case i := <-inChSet.QuitCh:
22-
// {
23-
// nodeOpChSet.QuitCh <- i
24-
// masterQuitCh <- i
25-
// }
20+
case i := <-inChSet.QuitCh:
21+
{
22+
nodeOpChSet.QuitCh <- i
23+
masterQuitCh <- i
24+
}
2625
}
2726
}
2827
}
2928

3029
func channelConnector(inChSet *ingestPipeChSet, opChSet *NodeChSet,
3130
timeOut time.Duration, masterQuitCh chan int) {
31+
// timeOutTimer := time.NewTimer(timeOut)
3232
for {
33+
// timeOutCh = time.After(timeOut * time.Second)
34+
// timeOutCh = time.NewTimer(timeOut)
3335
select {
3436
case node := <-inChSet.NodeCh:
3537
opChSet.NodeCh <- node
36-
case i := <-inChSet.QuitCh:
37-
{
38-
fmt.Println("Quit Received on Ingest Channel")
39-
opChSet.QuitCh <- i
40-
masterQuitCh <- i
41-
}
42-
case <-time.After(timeOut * time.Second):
43-
{
44-
fmt.Println("Timeout Triggered in Ingest Channel")
45-
opChSet.QuitCh <- 1
46-
return
47-
}
38+
// if !timeOutTimer.Stop() {
39+
// <-timeOutTimer.C
40+
// }
41+
// log.Println("abc")
42+
// timeOutTimer.Reset(timeOut)
43+
// case i := <-inChSet.QuitCh:
44+
// {
45+
// fmt.Println("Quit Received on Ingest Channel")
46+
// opChSet.QuitCh <- i
47+
// masterQuitCh <- i
48+
// if !timeOutTimer.Stop() {
49+
// <-timeOutTimer.C
50+
// }
51+
// timeOutTimer.Reset(timeOut)
52+
// }
53+
// case <-timeOutTimer.C:
54+
// fmt.Println("Timeout Triggered in Ingest Channel")
55+
// opChSet.QuitCh <- 1
56+
// masterQuitCh <- 1
57+
// return
58+
4859
}
4960
}
5061
}

octopus/stdpipefunc.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,10 @@ func stdLinearNodeFunc(stdFn stdFunc, outChSet *NodeChSet) *NodeChSet {
1515
QuitCh: listenQuitCh,
1616
},
1717
}
18+
// listenChSet := MakeNodeChSet(listenCh, listenQuitCh)
1819
go func() {
19-
defer close(listenCh)
20-
defer close(listenQuitCh)
20+
// defer close(listenCh)
21+
// defer close(listenQuitCh)
2122
for {
2223
select {
2324
case node := <-listenCh:

0 commit comments

Comments
 (0)