delete extra stuff, formatting, removed crawl mode
This commit is contained in:
parent
1f26aeeb5c
commit
1ee831dbf2
9 changed files with 59 additions and 293 deletions
|
@ -1,78 +0,0 @@
|
|||
![](https://raw.githubusercontent.com/shiyanhui/dht/master/doc/screen-shot.png)
|
||||
|
||||
在这个视频上你可以看到爬取效果[Youtube](https://www.youtube.com/watch?v=AIpeQtw22kc).
|
||||
|
||||
## Introduction
|
||||
|
||||
DHT实现了BitTorrent DHT协议,主要包括:
|
||||
|
||||
- [BEP-3 (部分)](http://www.bittorrent.org/beps/bep_0003.html)
|
||||
- [BEP-5](http://www.bittorrent.org/beps/bep_0005.html)
|
||||
- [BEP-9](http://www.bittorrent.org/beps/bep_0009.html)
|
||||
- [BEP-10](http://www.bittorrent.org/beps/bep_0010.html)
|
||||
|
||||
它包含两种模式,标准模式和爬虫模式。标准模式遵循DHT协议,你可以把它当做一个标准
|
||||
的DHT组件。爬虫模式是为了嗅探到更多torrent文件信息,它在某些方面不遵循DHT协议。
|
||||
基于爬虫模式,你可以打造你自己的[BTDigg](http://btdigg.org/)。
|
||||
|
||||
[bthub.io](http://bthub.io)是一个基于这个爬虫而建的BT搜索引擎,你可以把他当做
|
||||
BTDigg的替代品。
|
||||
|
||||
## Installation
|
||||
|
||||
go get github.com/shiyanhui/dht
|
||||
|
||||
## Example
|
||||
|
||||
下面是一个简单的爬虫例子,你可以到[这里](https://github.com/shiyanhui/dht/blob/master/sample)看完整的Demo。
|
||||
|
||||
```go
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/shiyanhui/dht"
|
||||
)
|
||||
|
||||
func main() {
|
||||
downloader := dht.NewWire(65536)
|
||||
go func() {
|
||||
// once we got the request result
|
||||
for resp := range downloader.Response() {
|
||||
fmt.Println(resp.InfoHash, resp.MetadataInfo)
|
||||
}
|
||||
}()
|
||||
go downloader.Run()
|
||||
|
||||
config := dht.NewCrawlConfig()
|
||||
config.OnAnnouncePeer = func(infoHash, ip string, port int) {
|
||||
// request to download the metadata info
|
||||
downloader.Request([]byte(infoHash), ip, port)
|
||||
}
|
||||
d := dht.New(config)
|
||||
|
||||
d.Run()
|
||||
}
|
||||
```
|
||||
|
||||
## Download
|
||||
|
||||
这个是已经编译好的Demo二进制文件,你可以到这里[下载](https://github.com/shiyanhui/dht/files/407021/spider.zip)。
|
||||
|
||||
## 注意
|
||||
|
||||
- 默认的爬虫配置需要300M左右内存,你可以根据你的服务器内存大小调整MaxNodes和
|
||||
BlackListMaxSize
|
||||
- 目前还不能穿透NAT,因此还不能在局域网运行
|
||||
|
||||
## TODO
|
||||
|
||||
- [ ] NAT穿透,在局域网内也能够运行
|
||||
- [ ] 完整地实现BEP-3,这样不但能够下载种子,也能够下载资源
|
||||
- [ ] 优化
|
||||
|
||||
## Blog
|
||||
|
||||
你可以在[这里](https://github.com/shiyanhui/dht/wiki)看到DHT Spider教程。
|
||||
|
||||
## License
|
||||
|
||||
[MIT](https://github.com/shiyanhui/dht/blob/master/LICENSE)
|
|
@ -21,8 +21,7 @@ func find(data []byte, start int, target rune) (index int) {
|
|||
|
||||
// DecodeString decodes a string in the data. It returns a tuple
|
||||
// (decoded result, the end position, error).
|
||||
func DecodeString(data []byte, start int) (
|
||||
result interface{}, index int, err error) {
|
||||
func DecodeString(data []byte, start int) (result interface{}, index int, err error) {
|
||||
|
||||
if start >= len(data) || data[start] < '0' || data[start] > '9' {
|
||||
err = errors.New("invalid string bencode")
|
||||
|
@ -57,8 +56,7 @@ func DecodeString(data []byte, start int) (
|
|||
}
|
||||
|
||||
// DecodeInt decodes int value in the data.
|
||||
func DecodeInt(data []byte, start int) (
|
||||
result interface{}, index int, err error) {
|
||||
func DecodeInt(data []byte, start int) (result interface{}, index int, err error) {
|
||||
|
||||
if start >= len(data) || data[start] != 'i' {
|
||||
err = errors.New("invalid int bencode")
|
||||
|
@ -82,9 +80,7 @@ func DecodeInt(data []byte, start int) (
|
|||
}
|
||||
|
||||
// decodeItem decodes an item of dict or list.
|
||||
func decodeItem(data []byte, i int) (
|
||||
result interface{}, index int, err error) {
|
||||
|
||||
func decodeItem(data []byte, i int) (result interface{}, index int, err error) {
|
||||
var decodeFunc = []func([]byte, int) (interface{}, int, error){
|
||||
DecodeString, DecodeInt, DecodeList, DecodeDict,
|
||||
}
|
||||
|
@ -101,8 +97,7 @@ func decodeItem(data []byte, i int) (
|
|||
}
|
||||
|
||||
// DecodeList decodes a list value.
|
||||
func DecodeList(data []byte, start int) (
|
||||
result interface{}, index int, err error) {
|
||||
func DecodeList(data []byte, start int) (result interface{}, index int, err error) {
|
||||
|
||||
if start >= len(data) || data[start] != 'l' {
|
||||
err = errors.New("invalid list bencode")
|
||||
|
@ -137,8 +132,7 @@ func DecodeList(data []byte, start int) (
|
|||
}
|
||||
|
||||
// DecodeDict decodes a map value.
|
||||
func DecodeDict(data []byte, start int) (
|
||||
result interface{}, index int, err error) {
|
||||
func DecodeDict(data []byte, start int) (result interface{}, index int, err error) {
|
||||
|
||||
if start >= len(data) || data[start] != 'd' {
|
||||
err = errors.New("invalid dict bencode")
|
||||
|
|
48
dht/dht.go
48
dht/dht.go
|
@ -5,18 +5,12 @@ package dht
|
|||
import (
|
||||
"encoding/hex"
|
||||
"errors"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"math"
|
||||
"net"
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
// StandardMode follows the standard protocol
|
||||
StandardMode = iota
|
||||
// CrawlMode for crawling the dht network.
|
||||
CrawlMode
|
||||
)
|
||||
|
||||
// Config represents the configure of dht.
|
||||
type Config struct {
|
||||
// in mainline dht, k = 8
|
||||
|
@ -50,8 +44,6 @@ type Config struct {
|
|||
BlockedIPs []string
|
||||
// blacklist size
|
||||
BlackListMaxSize int
|
||||
// StandardMode or CrawlMode
|
||||
Mode int
|
||||
// the times it tries when send fails
|
||||
Try int
|
||||
// the size of packet need to be dealt with
|
||||
|
@ -83,26 +75,12 @@ func NewStandardConfig() *Config {
|
|||
BlockedIPs: make([]string, 0),
|
||||
BlackListMaxSize: 65536,
|
||||
Try: 2,
|
||||
Mode: StandardMode,
|
||||
PacketJobLimit: 1024,
|
||||
PacketWorkerLimit: 256,
|
||||
RefreshNodeNum: 8,
|
||||
}
|
||||
}
|
||||
|
||||
// NewCrawlConfig returns a config in crawling mode.
|
||||
func NewCrawlConfig() *Config {
|
||||
config := NewStandardConfig()
|
||||
config.NodeExpriedAfter = 0
|
||||
config.KBucketExpiredAfter = 0
|
||||
config.CheckKBucketPeriod = time.Second * 5
|
||||
config.KBucketSize = math.MaxInt32
|
||||
config.Mode = CrawlMode
|
||||
config.RefreshNodeNum = 256
|
||||
|
||||
return config
|
||||
}
|
||||
|
||||
// DHT represents a DHT node.
|
||||
type DHT struct {
|
||||
*Config
|
||||
|
@ -156,18 +134,9 @@ func New(config *Config) *DHT {
|
|||
return d
|
||||
}
|
||||
|
||||
// IsStandardMode returns whether mode is StandardMode.
|
||||
func (dht *DHT) IsStandardMode() bool {
|
||||
return dht.Mode == StandardMode
|
||||
}
|
||||
|
||||
// IsCrawlMode returns whether mode is CrawlMode.
|
||||
func (dht *DHT) IsCrawlMode() bool {
|
||||
return dht.Mode == CrawlMode
|
||||
}
|
||||
|
||||
// init initializes global varables.
|
||||
// init initializes global variables.
|
||||
func (dht *DHT) init() {
|
||||
log.Info("Initializing DHT")
|
||||
listener, err := net.ListenPacket(dht.Network, dht.Address)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
|
@ -210,6 +179,7 @@ func (dht *DHT) listen() {
|
|||
if err != nil {
|
||||
continue
|
||||
}
|
||||
log.Infof("Received %s", buff)
|
||||
|
||||
dht.packets <- packet{buff[:n], raddr}
|
||||
}
|
||||
|
@ -219,10 +189,7 @@ func (dht *DHT) listen() {
|
|||
// id returns a id near to target if target is not null, otherwise it returns
|
||||
// the dht's node id.
|
||||
func (dht *DHT) id(target string) string {
|
||||
if dht.IsStandardMode() || target == "" {
|
||||
return dht.node.id.RawString()
|
||||
}
|
||||
return target[:15] + dht.node.id.RawString()[15:]
|
||||
return dht.node.id.RawString()
|
||||
}
|
||||
|
||||
// GetPeers returns peers who have announced having infoHash.
|
||||
|
@ -255,10 +222,10 @@ func (dht *DHT) GetPeers(infoHash string) ([]*Peer, error) {
|
|||
}
|
||||
|
||||
i := 0
|
||||
for _ = range time.Tick(time.Second * 1) {
|
||||
for range time.Tick(time.Second * 1) {
|
||||
i++
|
||||
peers = dht.peersManager.GetPeers(infoHash, dht.K)
|
||||
if len(peers) != 0 || i == 30 {
|
||||
if len(peers) != 0 || i >= 30 {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
@ -277,6 +244,7 @@ func (dht *DHT) Run() {
|
|||
dht.join()
|
||||
|
||||
dht.Ready = true
|
||||
log.Info("DHT ready")
|
||||
|
||||
var pkt packet
|
||||
tick := time.Tick(dht.CheckKBucketPeriod)
|
||||
|
|
Binary file not shown.
Before Width: | Height: | Size: 695 KiB |
99
dht/krpc.go
99
dht/krpc.go
|
@ -2,6 +2,8 @@ package dht
|
|||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"net"
|
||||
"strings"
|
||||
"sync"
|
||||
|
@ -313,8 +315,7 @@ func (tm *transactionManager) run() {
|
|||
}
|
||||
|
||||
// sendQuery send query-formed data to the chan.
|
||||
func (tm *transactionManager) sendQuery(
|
||||
no *node, queryType string, a map[string]interface{}) {
|
||||
func (tm *transactionManager) sendQuery(no *node, queryType string, a map[string]interface{}) {
|
||||
|
||||
// If the target is self, then stop.
|
||||
if no.id != nil && no.id.RawString() == tm.dht.node.id.RawString() ||
|
||||
|
@ -354,9 +355,7 @@ func (tm *transactionManager) getPeers(no *node, infoHash string) {
|
|||
}
|
||||
|
||||
// announcePeer sends announce_peer query to the chan.
|
||||
func (tm *transactionManager) announcePeer(
|
||||
no *node, infoHash string, impliedPort, port int, token string) {
|
||||
|
||||
func (tm *transactionManager) announcePeer(no *node, infoHash string, impliedPort, port int, token string) {
|
||||
tm.sendQuery(no, announcePeerType, map[string]interface{}{
|
||||
"id": tm.dht.id(no.id.RawString()),
|
||||
"info_hash": infoHash,
|
||||
|
@ -422,8 +421,7 @@ func parseMessage(data interface{}) (map[string]interface{}, error) {
|
|||
}
|
||||
|
||||
// handleRequest handles the requests received from udp.
|
||||
func handleRequest(dht *DHT, addr *net.UDPAddr,
|
||||
response map[string]interface{}) (success bool) {
|
||||
func handleRequest(dht *DHT, addr *net.UDPAddr, response map[string]interface{}) (success bool) {
|
||||
|
||||
t := response["t"].(string)
|
||||
|
||||
|
@ -469,36 +467,34 @@ func handleRequest(dht *DHT, addr *net.UDPAddr,
|
|||
"id": dht.id(id),
|
||||
}))
|
||||
case findNodeType:
|
||||
if dht.IsStandardMode() {
|
||||
if err := parseKey(a, "target", "string"); err != nil {
|
||||
send(dht, addr, makeError(t, protocolError, err.Error()))
|
||||
return
|
||||
}
|
||||
|
||||
target := a["target"].(string)
|
||||
if len(target) != 20 {
|
||||
send(dht, addr, makeError(t, protocolError, "invalid target"))
|
||||
return
|
||||
}
|
||||
|
||||
var nodes string
|
||||
targetID := newBitmapFromString(target)
|
||||
|
||||
no, _ := dht.routingTable.GetNodeKBucktByID(targetID)
|
||||
if no != nil {
|
||||
nodes = no.CompactNodeInfo()
|
||||
} else {
|
||||
nodes = strings.Join(
|
||||
dht.routingTable.GetNeighborCompactInfos(targetID, dht.K),
|
||||
"",
|
||||
)
|
||||
}
|
||||
|
||||
send(dht, addr, makeResponse(t, map[string]interface{}{
|
||||
"id": dht.id(target),
|
||||
"nodes": nodes,
|
||||
}))
|
||||
if err := parseKey(a, "target", "string"); err != nil {
|
||||
send(dht, addr, makeError(t, protocolError, err.Error()))
|
||||
return
|
||||
}
|
||||
|
||||
target := a["target"].(string)
|
||||
if len(target) != 20 {
|
||||
send(dht, addr, makeError(t, protocolError, "invalid target"))
|
||||
return
|
||||
}
|
||||
|
||||
var nodes string
|
||||
targetID := newBitmapFromString(target)
|
||||
|
||||
no, _ := dht.routingTable.GetNodeKBucktByID(targetID)
|
||||
if no != nil {
|
||||
nodes = no.CompactNodeInfo()
|
||||
} else {
|
||||
nodes = strings.Join(
|
||||
dht.routingTable.GetNeighborCompactInfos(targetID, dht.K),
|
||||
"",
|
||||
)
|
||||
}
|
||||
|
||||
send(dht, addr, makeResponse(t, map[string]interface{}{
|
||||
"id": dht.id(target),
|
||||
"nodes": nodes,
|
||||
}))
|
||||
case getPeersType:
|
||||
if err := parseKey(a, "info_hash", "string"); err != nil {
|
||||
send(dht, addr, makeError(t, protocolError, err.Error()))
|
||||
|
@ -512,13 +508,7 @@ func handleRequest(dht *DHT, addr *net.UDPAddr,
|
|||
return
|
||||
}
|
||||
|
||||
if dht.IsCrawlMode() {
|
||||
send(dht, addr, makeResponse(t, map[string]interface{}{
|
||||
"id": dht.id(infoHash),
|
||||
"token": dht.tokenManager.token(addr),
|
||||
"nodes": "",
|
||||
}))
|
||||
} else if peers := dht.peersManager.GetPeers(
|
||||
if peers := dht.peersManager.GetPeers(
|
||||
infoHash, dht.K); len(peers) > 0 {
|
||||
|
||||
values := make([]interface{}, len(peers))
|
||||
|
@ -568,13 +558,11 @@ func handleRequest(dht *DHT, addr *net.UDPAddr,
|
|||
port = addr.Port
|
||||
}
|
||||
|
||||
if dht.IsStandardMode() {
|
||||
dht.peersManager.Insert(infoHash, newPeer(addr.IP, port, token))
|
||||
dht.peersManager.Insert(infoHash, newPeer(addr.IP, port, token))
|
||||
|
||||
send(dht, addr, makeResponse(t, map[string]interface{}{
|
||||
"id": dht.id(id),
|
||||
}))
|
||||
}
|
||||
send(dht, addr, makeResponse(t, map[string]interface{}{
|
||||
"id": dht.id(id),
|
||||
}))
|
||||
|
||||
if dht.OnAnnouncePeer != nil {
|
||||
dht.OnAnnouncePeer(infoHash, addr.IP.String(), port)
|
||||
|
@ -592,8 +580,7 @@ func handleRequest(dht *DHT, addr *net.UDPAddr,
|
|||
// findOn puts nodes in the response to the routingTable, then if target is in
|
||||
// the nodes or all nodes are in the routingTable, it stops. Otherwise it
|
||||
// continues to findNode or getPeers.
|
||||
func findOn(dht *DHT, r map[string]interface{}, target *bitmap,
|
||||
queryType string) error {
|
||||
func findOn(dht *DHT, r map[string]interface{}, target *bitmap, queryType string) error {
|
||||
|
||||
if err := parseKey(r, "nodes", "string"); err != nil {
|
||||
return err
|
||||
|
@ -637,8 +624,7 @@ func findOn(dht *DHT, r map[string]interface{}, target *bitmap,
|
|||
}
|
||||
|
||||
// handleResponse handles responses received from udp.
|
||||
func handleResponse(dht *DHT, addr *net.UDPAddr,
|
||||
response map[string]interface{}) (success bool) {
|
||||
func handleResponse(dht *DHT, addr *net.UDPAddr, response map[string]interface{}) (success bool) {
|
||||
|
||||
t := response["t"].(string)
|
||||
|
||||
|
@ -722,8 +708,7 @@ func handleResponse(dht *DHT, addr *net.UDPAddr,
|
|||
}
|
||||
|
||||
// handleError handles errors received from udp.
|
||||
func handleError(dht *DHT, addr *net.UDPAddr,
|
||||
response map[string]interface{}) (success bool) {
|
||||
func handleError(dht *DHT, addr *net.UDPAddr, response map[string]interface{}) (success bool) {
|
||||
|
||||
if err := parseKey(response, "e", "list"); err != nil {
|
||||
return
|
||||
|
@ -750,6 +735,7 @@ var handlers = map[string]func(*DHT, *net.UDPAddr, map[string]interface{}) bool{
|
|||
|
||||
// handle handles packets received from udp.
|
||||
func handle(dht *DHT, pkt packet) {
|
||||
log.Infof("Packet from %s: %s", pkt.raddr.IP.String(), pkt.data)
|
||||
if len(dht.workerTokens) == dht.PacketWorkerLimit {
|
||||
return
|
||||
}
|
||||
|
@ -762,16 +748,19 @@ func handle(dht *DHT, pkt packet) {
|
|||
}()
|
||||
|
||||
if dht.blackList.in(pkt.raddr.IP.String(), pkt.raddr.Port) {
|
||||
log.Infof("%s blacklisted, ignoring packet", pkt.raddr.IP.String())
|
||||
return
|
||||
}
|
||||
|
||||
data, err := Decode(pkt.data)
|
||||
if err != nil {
|
||||
log.Errorf("Error decoding data: %s\n%s", err, pkt.data)
|
||||
return
|
||||
}
|
||||
|
||||
response, err := parseMessage(data)
|
||||
if err != nil {
|
||||
log.Errorf("Error parsing message: %s", err)
|
||||
return
|
||||
}
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@ package dht
|
|||
|
||||
import (
|
||||
"container/heap"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net"
|
||||
"strings"
|
||||
"sync"
|
||||
|
@ -521,12 +521,6 @@ func (rt *routingTable) Fresh() {
|
|||
}
|
||||
}
|
||||
|
||||
if rt.dht.IsCrawlMode() {
|
||||
for e := range rt.clearQueue.Iter() {
|
||||
rt.Remove(e.Value.(*node).id)
|
||||
}
|
||||
}
|
||||
|
||||
rt.clearQueue.Clear()
|
||||
}
|
||||
|
||||
|
|
|
@ -1,23 +0,0 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/shiyanhui/dht"
|
||||
"time"
|
||||
)
|
||||
|
||||
func main() {
|
||||
d := dht.New(nil)
|
||||
go d.Run()
|
||||
|
||||
for {
|
||||
// ubuntu-14.04.2-desktop-amd64.iso
|
||||
peers, err := d.GetPeers("546cf15f724d19c4319cc17b179d7e035f89c1f4")
|
||||
if err != nil {
|
||||
time.Sleep(time.Second * 1)
|
||||
continue
|
||||
}
|
||||
|
||||
fmt.Println("Found peers:", peers)
|
||||
}
|
||||
}
|
|
@ -1,77 +0,0 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"github.com/shiyanhui/dht"
|
||||
"net/http"
|
||||
_ "net/http/pprof"
|
||||
)
|
||||
|
||||
type file struct {
|
||||
Path []interface{} `json:"path"`
|
||||
Length int `json:"length"`
|
||||
}
|
||||
|
||||
type bitTorrent struct {
|
||||
InfoHash string `json:"infohash"`
|
||||
Name string `json:"name"`
|
||||
Files []file `json:"files,omitempty"`
|
||||
Length int `json:"length,omitempty"`
|
||||
}
|
||||
|
||||
func main() {
|
||||
go func() {
|
||||
http.ListenAndServe(":6060", nil)
|
||||
}()
|
||||
|
||||
w := dht.NewWire(65536, 1024, 256)
|
||||
go func() {
|
||||
for resp := range w.Response() {
|
||||
metadata, err := dht.Decode(resp.MetadataInfo)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
info := metadata.(map[string]interface{})
|
||||
|
||||
if _, ok := info["name"]; !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
bt := bitTorrent{
|
||||
InfoHash: hex.EncodeToString(resp.InfoHash),
|
||||
Name: info["name"].(string),
|
||||
}
|
||||
|
||||
if v, ok := info["files"]; ok {
|
||||
files := v.([]interface{})
|
||||
bt.Files = make([]file, len(files))
|
||||
|
||||
for i, item := range files {
|
||||
f := item.(map[string]interface{})
|
||||
bt.Files[i] = file{
|
||||
Path: f["path"].([]interface{}),
|
||||
Length: f["length"].(int),
|
||||
}
|
||||
}
|
||||
} else if _, ok := info["length"]; ok {
|
||||
bt.Length = info["length"].(int)
|
||||
}
|
||||
|
||||
data, err := json.Marshal(bt)
|
||||
if err == nil {
|
||||
fmt.Printf("%s\n\n", data)
|
||||
}
|
||||
}
|
||||
}()
|
||||
go w.Run()
|
||||
|
||||
config := dht.NewCrawlConfig()
|
||||
config.OnAnnouncePeer = func(infoHash, ip string, port int) {
|
||||
w.Request([]byte(infoHash), ip, port)
|
||||
}
|
||||
d := dht.New(config)
|
||||
|
||||
d.Run()
|
||||
}
|
|
@ -66,8 +66,7 @@ func decodeCompactIPPortInfo(info string) (ip net.IP, port int, err error) {
|
|||
// compactIP-address/port info.
|
||||
func encodeCompactIPPortInfo(ip net.IP, port int) (info string, err error) {
|
||||
if port > 65535 || port < 0 {
|
||||
err = errors.New(
|
||||
"port should be no greater than 65535 and no less than 0")
|
||||
err = errors.New("port should be no greater than 65535 and no less than 0")
|
||||
return
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue