File tree 8 files changed +89
-24
lines changed
8 files changed +89
-24
lines changed Original file line number Diff line number Diff line change 1
1
package main
2
2
3
3
import (
4
+ "flag"
4
5
"polaris/crawler-distributed/config"
5
6
itemsaver "polaris/crawler-distributed/persist/client"
7
+ "polaris/crawler-distributed/pool"
6
8
worker "polaris/crawler-distributed/worker/client"
7
9
"polaris/crawler/engine"
8
10
"polaris/crawler/scheduler"
9
11
"polaris/crawler/zhenai/parser"
12
+ "strings"
10
13
)
11
14
15
+ var (
16
+ itemSaverHost = flag .String ("itemsaver_host" , "" ,
17
+ "itemsaver_host" )
18
+
19
+ workerHosts = flag .String ("worker_host" , "" ,
20
+ "worker_host(comma separated)" )
21
+ )
12
22
func main () {
13
23
// run condition:
14
24
// run itemsaver.go first
15
25
// then to run worker.go
16
- itemChan , err := itemsaver .ItemSaver (config .ItemSaver0Host )
26
+ flag .Parse ()
27
+ itemChan , err := itemsaver .ItemSaver (* itemSaverHost )
17
28
if err != nil {
18
29
panic (err )
19
30
}
20
31
21
- processor , err := worker .CreateProcessor ()
32
+ clientPool := pool .CreateClientPool (strings .Split (* workerHosts , "," ))
33
+ processor , err := worker .CreateProcessor (clientPool )
22
34
if err != nil {
23
35
panic (err )
24
36
}
@@ -34,3 +46,4 @@ func main() {
34
46
Parser : engine .NewFuncParser (parser .ParseCityList , config .ParseCityList ),
35
47
})
36
48
}
49
+
Original file line number Diff line number Diff line change @@ -11,15 +11,14 @@ type ItemSaverService struct {
11
11
Index string
12
12
}
13
13
14
-
15
14
func (s * ItemSaverService ) Save (item engine.Item , result * string ) error {
16
- const index = "polaris"
15
+ const index = "polaris"
17
16
err := persist .Save (item , index )
18
17
if err == nil {
19
- fmt .Println ("saving item success" )
18
+ fmt .Printf ("saving item#%v \n " , item . Url )
20
19
* result = "ok"
21
20
} else {
22
- log .Error ("saving item fail" )
21
+ log .Error ("saving item#%v fail" , item )
23
22
* result = "failed"
24
23
}
25
24
return err
Original file line number Diff line number Diff line change 1
1
package main
2
2
3
3
import (
4
- "polaris/crawler-distributed/config"
4
+ "flag"
5
+ "fmt"
6
+ "log"
5
7
"polaris/crawler-distributed/persist"
6
8
"polaris/crawler-distributed/rpc"
7
9
)
@@ -13,6 +15,14 @@ func serveRpc(host, index string) error {
13
15
})
14
16
}
15
17
18
+ var port = flag .Int ("port" , 0 ,
19
+ "The port for me to listen on" )
20
+
16
21
func main () {
17
- serveRpc (config .ItemSaver0Host , "polaris" )
22
+ flag .Parse ()
23
+ if * port == 0 {
24
+ log .Printf ("Must specify a port " )
25
+ return
26
+ }
27
+ serveRpc (fmt .Sprintf (":%d" , * port ), "polaris" )
18
28
}
Original file line number Diff line number Diff line change
1
+ package pool
2
+
3
+ import (
4
+ "log"
5
+ "net/rpc"
6
+ "polaris/crawler-distributed/rpc"
7
+ )
8
+
9
+ func CreateClientPool (hosts []string ) chan * rpc.Client {
10
+
11
+ var clients []* rpc.Client
12
+ for _ , h := range hosts {
13
+ client , err := rpcsupport .NewClient (h )
14
+ if err == nil {
15
+ clients = append (clients , client )
16
+ log .Printf ("Connected to %s" , h )
17
+ } else {
18
+ log .Printf ("Error connecting to %s: %v" ,
19
+ h , err )
20
+ }
21
+ }
22
+
23
+
24
+ out := make (chan * rpc.Client )
25
+ go func () {
26
+ for {
27
+ for _ , client := range clients {
28
+ out <- client
29
+ }
30
+ }
31
+ }()
32
+ return out
33
+ }
Original file line number Diff line number Diff line change @@ -13,7 +13,7 @@ func ServeRpc(host string, service interface{}) error {
13
13
if err != nil {
14
14
return err
15
15
}
16
-
16
+ log . Printf ( "Listen on %s" , host )
17
17
for {
18
18
conn , err := listener .Accept ()
19
19
if err != nil {
Original file line number Diff line number Diff line change 1
1
package client
2
2
3
3
import (
4
- "fmt "
4
+ "net/rpc "
5
5
"polaris/crawler-distributed/config"
6
- "polaris/crawler-distributed/rpc"
7
6
"polaris/crawler-distributed/worker"
8
7
"polaris/crawler/engine"
9
8
)
10
9
11
- func CreateProcessor () (engine.Processor , error ) {
12
- client , err := rpcsupport .NewClient (fmt .Sprintf ("%s" , config .Worker0Host ))
13
- if err != nil {
14
- return nil , err
15
- }
10
+ func CreateProcessor (
11
+ pool chan * rpc.Client ) (engine.Processor , error ) {
16
12
17
13
return func (request engine.Request ) (engine.ParseResult , error ) {
18
14
sReq := worker .SerializedRequest (request )
19
15
var sResult worker.ParseResult
20
- err := client .Call (config .CrawlServiceRpc , sReq , & sResult )
16
+
17
+ c := <- pool
18
+ err := c .Call (config .CrawlServiceRpc , sReq , & sResult )
21
19
if err != nil {
22
20
return engine.ParseResult {}, err
23
21
}
Original file line number Diff line number Diff line change 1
1
package main
2
2
3
3
import (
4
- "polaris/crawler-distributed/config"
4
+ "flag"
5
+ "fmt"
6
+ "log"
5
7
"polaris/crawler-distributed/rpc"
6
8
"polaris/crawler-distributed/worker"
7
9
)
8
10
11
+ var port = flag .Int ("port" , 0 ,
12
+ "The port for me to listen on" )
13
+
9
14
func main () {
10
- err := rpcsupport .ServeRpc (config .Worker0Host , worker.CrawlService {})
11
- if err != nil {
15
+ flag .Parse ()
16
+ if * port == 0 {
17
+ log .Printf ("Must specify a port " )
18
+ return
19
+ }
20
+ err := rpcsupport .ServeRpc (fmt .Sprintf (
21
+ ":%d" , * port ), worker.CrawlService {})
22
+ if err != nil {
12
23
panic (err )
13
24
}
14
25
}
Original file line number Diff line number Diff line change 1
1
package main
2
2
3
3
import (
4
+ "polaris/crawler-distributed/config"
4
5
"polaris/crawler/engine"
5
6
"polaris/crawler/persist"
6
7
"polaris/crawler/scheduler"
@@ -19,16 +20,16 @@ func main() {
19
20
RequestProcessor : engine .Worker ,
20
21
}
21
22
22
- /* e.Run(engine.Request{
23
+ e .Run (engine.Request {
23
24
Url : seed ,
24
25
Parser : engine .NewFuncParser (
25
26
parser .ParseCityList ,
26
27
config .ParseCityList ),
27
- })*/
28
- e .Run (engine.Request {
28
+ })
29
+ /* e.Run(engine.Request{
29
30
Url: shanghai,
30
31
Parser: engine.NewFuncParser(
31
32
parser.ParseCity,
32
33
"ParseCity"),
33
- })
34
+ })*/
34
35
}
You can’t perform that action at this time.
0 commit comments