delete extra stuff, formatting, removed crawl mode

This commit is contained in:
Alex Grintsvayg 2017-08-16 11:56:35 -04:00
parent 1f26aeeb5c
commit 1ee831dbf2
9 changed files with 59 additions and 293 deletions

View file

@ -1,78 +0,0 @@
![](https://raw.githubusercontent.com/shiyanhui/dht/master/doc/screen-shot.png)
在这个视频上你可以看到爬取效果[Youtube](https://www.youtube.com/watch?v=AIpeQtw22kc).
## Introduction
DHT实现了BitTorrent DHT协议主要包括
- [BEP-3 (部分)](http://www.bittorrent.org/beps/bep_0003.html)
- [BEP-5](http://www.bittorrent.org/beps/bep_0005.html)
- [BEP-9](http://www.bittorrent.org/beps/bep_0009.html)
- [BEP-10](http://www.bittorrent.org/beps/bep_0010.html)
它包含两种模式标准模式和爬虫模式。标准模式遵循DHT协议你可以把它当做一个标准
的DHT组件。爬虫模式是为了嗅探到更多torrent文件信息它在某些方面不遵循DHT协议。
基于爬虫模式,你可以打造你自己的[BTDigg](http://btdigg.org/)。
[bthub.io](http://bthub.io)是一个基于这个爬虫而建的BT搜索引擎你可以把他当做
BTDigg的替代品。
## Installation
go get github.com/shiyanhui/dht
## Example
下面是一个简单的爬虫例子,你可以到[这里](https://github.com/shiyanhui/dht/blob/master/sample)看完整的Demo。
```go
import (
"fmt"
"github.com/shiyanhui/dht"
)
func main() {
downloader := dht.NewWire(65536)
go func() {
// once we got the request result
for resp := range downloader.Response() {
fmt.Println(resp.InfoHash, resp.MetadataInfo)
}
}()
go downloader.Run()
config := dht.NewCrawlConfig()
config.OnAnnouncePeer = func(infoHash, ip string, port int) {
// request to download the metadata info
downloader.Request([]byte(infoHash), ip, port)
}
d := dht.New(config)
d.Run()
}
```
## Download
这个是已经编译好的Demo二进制文件你可以到这里[下载](https://github.com/shiyanhui/dht/files/407021/spider.zip)。
## 注意
- 默认的爬虫配置需要300M左右内存你可以根据你的服务器内存大小调整MaxNodes和
BlackListMaxSize
- 目前还不能穿透NAT因此还不能在局域网运行
## TODO
- [ ] NAT穿透在局域网内也能够运行
- [ ] 完整地实现BEP-3这样不但能够下载种子也能够下载资源
- [ ] 优化
## Blog
你可以在[这里](https://github.com/shiyanhui/dht/wiki)看到DHT Spider教程。
## License
[MIT](https://github.com/shiyanhui/dht/blob/master/LICENSE)

View file

@ -21,8 +21,7 @@ func find(data []byte, start int, target rune) (index int) {
// DecodeString decodes a string in the data. It returns a tuple
// (decoded result, the end position, error).
func DecodeString(data []byte, start int) (
result interface{}, index int, err error) {
func DecodeString(data []byte, start int) (result interface{}, index int, err error) {
if start >= len(data) || data[start] < '0' || data[start] > '9' {
err = errors.New("invalid string bencode")
@ -57,8 +56,7 @@ func DecodeString(data []byte, start int) (
}
// DecodeInt decodes int value in the data.
func DecodeInt(data []byte, start int) (
result interface{}, index int, err error) {
func DecodeInt(data []byte, start int) (result interface{}, index int, err error) {
if start >= len(data) || data[start] != 'i' {
err = errors.New("invalid int bencode")
@ -82,9 +80,7 @@ func DecodeInt(data []byte, start int) (
}
// decodeItem decodes an item of dict or list.
func decodeItem(data []byte, i int) (
result interface{}, index int, err error) {
func decodeItem(data []byte, i int) (result interface{}, index int, err error) {
var decodeFunc = []func([]byte, int) (interface{}, int, error){
DecodeString, DecodeInt, DecodeList, DecodeDict,
}
@ -101,8 +97,7 @@ func decodeItem(data []byte, i int) (
}
// DecodeList decodes a list value.
func DecodeList(data []byte, start int) (
result interface{}, index int, err error) {
func DecodeList(data []byte, start int) (result interface{}, index int, err error) {
if start >= len(data) || data[start] != 'l' {
err = errors.New("invalid list bencode")
@ -137,8 +132,7 @@ func DecodeList(data []byte, start int) (
}
// DecodeDict decodes a map value.
func DecodeDict(data []byte, start int) (
result interface{}, index int, err error) {
func DecodeDict(data []byte, start int) (result interface{}, index int, err error) {
if start >= len(data) || data[start] != 'd' {
err = errors.New("invalid dict bencode")

View file

@ -5,18 +5,12 @@ package dht
import (
"encoding/hex"
"errors"
log "github.com/sirupsen/logrus"
"math"
"net"
"time"
)
const (
// StandardMode follows the standard protocol
StandardMode = iota
// CrawlMode for crawling the dht network.
CrawlMode
)
// Config represents the configure of dht.
type Config struct {
// in mainline dht, k = 8
@ -50,8 +44,6 @@ type Config struct {
BlockedIPs []string
// blacklist size
BlackListMaxSize int
// StandardMode or CrawlMode
Mode int
// the times it tries when send fails
Try int
// the size of packet need to be dealt with
@ -83,26 +75,12 @@ func NewStandardConfig() *Config {
BlockedIPs: make([]string, 0),
BlackListMaxSize: 65536,
Try: 2,
Mode: StandardMode,
PacketJobLimit: 1024,
PacketWorkerLimit: 256,
RefreshNodeNum: 8,
}
}
// NewCrawlConfig returns a config in crawling mode.
func NewCrawlConfig() *Config {
config := NewStandardConfig()
config.NodeExpriedAfter = 0
config.KBucketExpiredAfter = 0
config.CheckKBucketPeriod = time.Second * 5
config.KBucketSize = math.MaxInt32
config.Mode = CrawlMode
config.RefreshNodeNum = 256
return config
}
// DHT represents a DHT node.
type DHT struct {
*Config
@ -156,18 +134,9 @@ func New(config *Config) *DHT {
return d
}
// IsStandardMode returns whether mode is StandardMode.
func (dht *DHT) IsStandardMode() bool {
return dht.Mode == StandardMode
}
// IsCrawlMode returns whether mode is CrawlMode.
func (dht *DHT) IsCrawlMode() bool {
return dht.Mode == CrawlMode
}
// init initializes global varables.
// init initializes global variables.
func (dht *DHT) init() {
log.Info("Initializing DHT")
listener, err := net.ListenPacket(dht.Network, dht.Address)
if err != nil {
panic(err)
@ -210,6 +179,7 @@ func (dht *DHT) listen() {
if err != nil {
continue
}
log.Infof("Received %s", buff)
dht.packets <- packet{buff[:n], raddr}
}
@ -219,10 +189,7 @@ func (dht *DHT) listen() {
// id returns a id near to target if target is not null, otherwise it returns
// the dht's node id.
func (dht *DHT) id(target string) string {
if dht.IsStandardMode() || target == "" {
return dht.node.id.RawString()
}
return target[:15] + dht.node.id.RawString()[15:]
return dht.node.id.RawString()
}
// GetPeers returns peers who have announced having infoHash.
@ -255,10 +222,10 @@ func (dht *DHT) GetPeers(infoHash string) ([]*Peer, error) {
}
i := 0
for _ = range time.Tick(time.Second * 1) {
for range time.Tick(time.Second * 1) {
i++
peers = dht.peersManager.GetPeers(infoHash, dht.K)
if len(peers) != 0 || i == 30 {
if len(peers) != 0 || i >= 30 {
break
}
}
@ -277,6 +244,7 @@ func (dht *DHT) Run() {
dht.join()
dht.Ready = true
log.Info("DHT ready")
var pkt packet
tick := time.Tick(dht.CheckKBucketPeriod)

Binary file not shown.

Before

Width:  |  Height:  |  Size: 695 KiB

View file

@ -2,6 +2,8 @@ package dht
import (
"errors"
"fmt"
log "github.com/sirupsen/logrus"
"net"
"strings"
"sync"
@ -313,8 +315,7 @@ func (tm *transactionManager) run() {
}
// sendQuery send query-formed data to the chan.
func (tm *transactionManager) sendQuery(
no *node, queryType string, a map[string]interface{}) {
func (tm *transactionManager) sendQuery(no *node, queryType string, a map[string]interface{}) {
// If the target is self, then stop.
if no.id != nil && no.id.RawString() == tm.dht.node.id.RawString() ||
@ -354,9 +355,7 @@ func (tm *transactionManager) getPeers(no *node, infoHash string) {
}
// announcePeer sends announce_peer query to the chan.
func (tm *transactionManager) announcePeer(
no *node, infoHash string, impliedPort, port int, token string) {
func (tm *transactionManager) announcePeer(no *node, infoHash string, impliedPort, port int, token string) {
tm.sendQuery(no, announcePeerType, map[string]interface{}{
"id": tm.dht.id(no.id.RawString()),
"info_hash": infoHash,
@ -422,8 +421,7 @@ func parseMessage(data interface{}) (map[string]interface{}, error) {
}
// handleRequest handles the requests received from udp.
func handleRequest(dht *DHT, addr *net.UDPAddr,
response map[string]interface{}) (success bool) {
func handleRequest(dht *DHT, addr *net.UDPAddr, response map[string]interface{}) (success bool) {
t := response["t"].(string)
@ -469,36 +467,34 @@ func handleRequest(dht *DHT, addr *net.UDPAddr,
"id": dht.id(id),
}))
case findNodeType:
if dht.IsStandardMode() {
if err := parseKey(a, "target", "string"); err != nil {
send(dht, addr, makeError(t, protocolError, err.Error()))
return
}
target := a["target"].(string)
if len(target) != 20 {
send(dht, addr, makeError(t, protocolError, "invalid target"))
return
}
var nodes string
targetID := newBitmapFromString(target)
no, _ := dht.routingTable.GetNodeKBucktByID(targetID)
if no != nil {
nodes = no.CompactNodeInfo()
} else {
nodes = strings.Join(
dht.routingTable.GetNeighborCompactInfos(targetID, dht.K),
"",
)
}
send(dht, addr, makeResponse(t, map[string]interface{}{
"id": dht.id(target),
"nodes": nodes,
}))
if err := parseKey(a, "target", "string"); err != nil {
send(dht, addr, makeError(t, protocolError, err.Error()))
return
}
target := a["target"].(string)
if len(target) != 20 {
send(dht, addr, makeError(t, protocolError, "invalid target"))
return
}
var nodes string
targetID := newBitmapFromString(target)
no, _ := dht.routingTable.GetNodeKBucktByID(targetID)
if no != nil {
nodes = no.CompactNodeInfo()
} else {
nodes = strings.Join(
dht.routingTable.GetNeighborCompactInfos(targetID, dht.K),
"",
)
}
send(dht, addr, makeResponse(t, map[string]interface{}{
"id": dht.id(target),
"nodes": nodes,
}))
case getPeersType:
if err := parseKey(a, "info_hash", "string"); err != nil {
send(dht, addr, makeError(t, protocolError, err.Error()))
@ -512,13 +508,7 @@ func handleRequest(dht *DHT, addr *net.UDPAddr,
return
}
if dht.IsCrawlMode() {
send(dht, addr, makeResponse(t, map[string]interface{}{
"id": dht.id(infoHash),
"token": dht.tokenManager.token(addr),
"nodes": "",
}))
} else if peers := dht.peersManager.GetPeers(
if peers := dht.peersManager.GetPeers(
infoHash, dht.K); len(peers) > 0 {
values := make([]interface{}, len(peers))
@ -568,13 +558,11 @@ func handleRequest(dht *DHT, addr *net.UDPAddr,
port = addr.Port
}
if dht.IsStandardMode() {
dht.peersManager.Insert(infoHash, newPeer(addr.IP, port, token))
dht.peersManager.Insert(infoHash, newPeer(addr.IP, port, token))
send(dht, addr, makeResponse(t, map[string]interface{}{
"id": dht.id(id),
}))
}
send(dht, addr, makeResponse(t, map[string]interface{}{
"id": dht.id(id),
}))
if dht.OnAnnouncePeer != nil {
dht.OnAnnouncePeer(infoHash, addr.IP.String(), port)
@ -592,8 +580,7 @@ func handleRequest(dht *DHT, addr *net.UDPAddr,
// findOn puts nodes in the response to the routingTable, then if target is in
// the nodes or all nodes are in the routingTable, it stops. Otherwise it
// continues to findNode or getPeers.
func findOn(dht *DHT, r map[string]interface{}, target *bitmap,
queryType string) error {
func findOn(dht *DHT, r map[string]interface{}, target *bitmap, queryType string) error {
if err := parseKey(r, "nodes", "string"); err != nil {
return err
@ -637,8 +624,7 @@ func findOn(dht *DHT, r map[string]interface{}, target *bitmap,
}
// handleResponse handles responses received from udp.
func handleResponse(dht *DHT, addr *net.UDPAddr,
response map[string]interface{}) (success bool) {
func handleResponse(dht *DHT, addr *net.UDPAddr, response map[string]interface{}) (success bool) {
t := response["t"].(string)
@ -722,8 +708,7 @@ func handleResponse(dht *DHT, addr *net.UDPAddr,
}
// handleError handles errors received from udp.
func handleError(dht *DHT, addr *net.UDPAddr,
response map[string]interface{}) (success bool) {
func handleError(dht *DHT, addr *net.UDPAddr, response map[string]interface{}) (success bool) {
if err := parseKey(response, "e", "list"); err != nil {
return
@ -750,6 +735,7 @@ var handlers = map[string]func(*DHT, *net.UDPAddr, map[string]interface{}) bool{
// handle handles packets received from udp.
func handle(dht *DHT, pkt packet) {
log.Infof("Packet from %s: %s", pkt.raddr.IP.String(), pkt.data)
if len(dht.workerTokens) == dht.PacketWorkerLimit {
return
}
@ -762,16 +748,19 @@ func handle(dht *DHT, pkt packet) {
}()
if dht.blackList.in(pkt.raddr.IP.String(), pkt.raddr.Port) {
log.Infof("%s blacklisted, ignoring packet", pkt.raddr.IP.String())
return
}
data, err := Decode(pkt.data)
if err != nil {
log.Errorf("Error decoding data: %s\n%s", err, pkt.data)
return
}
response, err := parseMessage(data)
if err != nil {
log.Errorf("Error parsing message: %s", err)
return
}

View file

@ -2,7 +2,7 @@ package dht
import (
"container/heap"
"errors"
"fmt"
"net"
"strings"
"sync"
@ -521,12 +521,6 @@ func (rt *routingTable) Fresh() {
}
}
if rt.dht.IsCrawlMode() {
for e := range rt.clearQueue.Iter() {
rt.Remove(e.Value.(*node).id)
}
}
rt.clearQueue.Clear()
}

View file

@ -1,23 +0,0 @@
package main
import (
"fmt"
"github.com/shiyanhui/dht"
"time"
)
func main() {
d := dht.New(nil)
go d.Run()
for {
// ubuntu-14.04.2-desktop-amd64.iso
peers, err := d.GetPeers("546cf15f724d19c4319cc17b179d7e035f89c1f4")
if err != nil {
time.Sleep(time.Second * 1)
continue
}
fmt.Println("Found peers:", peers)
}
}

View file

@ -1,77 +0,0 @@
package main
import (
"encoding/hex"
"encoding/json"
"fmt"
"github.com/shiyanhui/dht"
"net/http"
_ "net/http/pprof"
)
type file struct {
Path []interface{} `json:"path"`
Length int `json:"length"`
}
type bitTorrent struct {
InfoHash string `json:"infohash"`
Name string `json:"name"`
Files []file `json:"files,omitempty"`
Length int `json:"length,omitempty"`
}
func main() {
go func() {
http.ListenAndServe(":6060", nil)
}()
w := dht.NewWire(65536, 1024, 256)
go func() {
for resp := range w.Response() {
metadata, err := dht.Decode(resp.MetadataInfo)
if err != nil {
continue
}
info := metadata.(map[string]interface{})
if _, ok := info["name"]; !ok {
continue
}
bt := bitTorrent{
InfoHash: hex.EncodeToString(resp.InfoHash),
Name: info["name"].(string),
}
if v, ok := info["files"]; ok {
files := v.([]interface{})
bt.Files = make([]file, len(files))
for i, item := range files {
f := item.(map[string]interface{})
bt.Files[i] = file{
Path: f["path"].([]interface{}),
Length: f["length"].(int),
}
}
} else if _, ok := info["length"]; ok {
bt.Length = info["length"].(int)
}
data, err := json.Marshal(bt)
if err == nil {
fmt.Printf("%s\n\n", data)
}
}
}()
go w.Run()
config := dht.NewCrawlConfig()
config.OnAnnouncePeer = func(infoHash, ip string, port int) {
w.Request([]byte(infoHash), ip, port)
}
d := dht.New(config)
d.Run()
}

View file

@ -66,8 +66,7 @@ func decodeCompactIPPortInfo(info string) (ip net.IP, port int, err error) {
// compactIP-address/port info.
func encodeCompactIPPortInfo(ip net.IP, port int) (info string, err error) {
if port > 65535 || port < 0 {
err = errors.New(
"port should be no greater than 65535 and no less than 0")
err = errors.New("port should be no greater than 65535 and no less than 0")
return
}