From 44c03419c12a86b52dd5a6d08d1d643c41186039 Mon Sep 17 00:00:00 2001 From: r-romanov Date: Sun, 18 Jul 2021 16:16:58 +0300 Subject: [PATCH 1/3] improvement: reworked the Spyse integration to use API v4 and fetch more than 10 000 results --- v2/go.mod | 1 + v2/go.sum | 4 + v2/pkg/subscraping/sources/spyse/spyse.go | 107 ++++++++++++++-------- 3 files changed, 73 insertions(+), 39 deletions(-) diff --git a/v2/go.mod b/v2/go.mod index 543cf9ed9..9e324d5e2 100644 --- a/v2/go.mod +++ b/v2/go.mod @@ -11,6 +11,7 @@ require ( github.com/projectdiscovery/fdmax v0.0.3 github.com/projectdiscovery/gologger v1.1.4 github.com/rs/xid v1.3.0 + github.com/spyse-com/go-spyse v1.2.1 github.com/stretchr/testify v1.7.0 github.com/tomnomnom/linkheader v0.0.0-20180905144013-02ca5825eb80 gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b diff --git a/v2/go.sum b/v2/go.sum index 0c27ad707..00d954317 100644 --- a/v2/go.sum +++ b/v2/go.sum @@ -51,6 +51,8 @@ github.com/logrusorgru/aurora v2.0.3+incompatible/go.mod h1:7rIyQOR62GCctdiQpZ/z github.com/miekg/dns v1.1.29/go.mod h1:KNUDUusw/aVsxyTYZM1oqvCicbwhgbNgztCETuNZ7xM= github.com/miekg/dns v1.1.41 h1:WMszZWJG0XmzbK9FEmzH2TVcqYzFesusSIB41b8KHxY= github.com/miekg/dns v1.1.41/go.mod h1:p6aan82bvRIyn+zDIv9xYNUpwa73JcSh9BKwknJysuI= +github.com/mitchellh/mapstructure v1.4.1 h1:CpVNEelQCZBooIPDn+AR3NpivK/TIKU8bDxdASFVQag= +github.com/mitchellh/mapstructure v1.4.1/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= @@ -87,6 +89,8 @@ github.com/projectdiscovery/retryabledns v1.0.12-0.20210419174848-eec3ac17d61e/g github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/rs/xid v1.3.0 h1:6NjYksEUlhurdVehpc7S7dk6DAmcKv8V9gG0FsVN2U4= github.com/rs/xid v1.3.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= +github.com/spyse-com/go-spyse v1.2.1 h1:Za/BnLnXWY/DqZZQm2V7NQ69aJ8FgFA8vBiipf3CHC8= +github.com/spyse-com/go-spyse v1.2.1/go.mod h1:YzL0kTQIlCVTtP0Bna4I7p/sKF2rgY1cV32dq/L4oIw= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= diff --git a/v2/pkg/subscraping/sources/spyse/spyse.go b/v2/pkg/subscraping/sources/spyse/spyse.go index 2ae271e39..0dd794436 100644 --- a/v2/pkg/subscraping/sources/spyse/spyse.go +++ b/v2/pkg/subscraping/sources/spyse/spyse.go @@ -3,31 +3,11 @@ package spyse import ( "context" - "fmt" - "strconv" - - jsoniter "github.com/json-iterator/go" "github.com/projectdiscovery/subfinder/v2/pkg/subscraping" + "github.com/spyse-com/go-spyse/pkg" ) -type resultObject struct { - Name string `json:"name"` -} - -type dataObject struct { - Items []resultObject `json:"items"` - TotalCount int `json:"total_count"` -} - -type errorObject struct { - Code string `json:"code"` - Message string `json:"message"` -} - -type spyseResult struct { - Data dataObject `json:"data"` - Error []errorObject `json:"error"` -} +const SearchMethodResultsLimit = 10000 // Source is the passive scraping agent type Source struct{} @@ -43,33 +23,82 @@ func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Se return } - maxCount := 100 + client, err := spyse.NewClient(session.Keys.Spyse, nil) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + return + } - for offSet := 0; offSet <= maxCount; offSet += 100 { - resp, err := session.Get(ctx, fmt.Sprintf("https://api.spyse.com/v3/data/domain/subdomain?domain=%s&limit=100&offset=%s", domain, strconv.Itoa(offSet)), "", map[string]string{"Authorization": "Bearer " + session.Keys.Spyse}) - if err != nil { - results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} - session.DiscardHTTPResponse(resp) - return - } + domainSvc := spyse.NewDomainService(client) + + var searchDomain = "." + domain + var subdomainsSearchParams spyse.QueryBuilder - var response spyseResult - err = jsoniter.NewDecoder(resp.Body).Decode(&response) + subdomainsSearchParams.AppendParam(spyse.QueryParam{ + Name: domainSvc.Params().Name.Name, + Operator: domainSvc.Params().Name.Operator.EndsWith, + Value: searchDomain, + }) + + totalResults, err := domainSvc.SearchCount(ctx, subdomainsSearchParams.Query) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + return + } + + if totalResults == 0 { + return + } + + accountSvc := spyse.NewAccountService(client) + + quota, err := accountSvc.Quota(context.Background()) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + return + } + + var searchResults []spyse.Domain + + // The default "Search" method returns only first 10 000 subdomains + // To obtain more than 10 000 subdomains the "Scroll" method should be using + // Note: The "Scroll" method is only available for "PRO" customers, so we need to check + // quota.IsScrollSearchEnabled param + if totalResults > SearchMethodResultsLimit && quota.IsScrollSearchEnabled { + searchResults, err := domainSvc.ScrollSearch( + ctx, subdomainsSearchParams.Query, "") if err != nil { results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} - resp.Body.Close() return } - resp.Body.Close() - if response.Data.TotalCount == 0 { - return + for len(searchResults.Items) > 0 { + searchResults, err = domainSvc.ScrollSearch( + context.Background(), subdomainsSearchParams.Query, searchResults.SearchID) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + return + } + + for _, r := range searchResults.Items { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: r.Name} + } } + } else { + var limit = 100 + var offset = 0 + + for ; int64(offset) < totalResults && int64(offset) < SearchMethodResultsLimit; offset += limit { + searchResults, err = domainSvc.Search(ctx, subdomainsSearchParams.Query, limit, offset) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + return + } - maxCount = response.Data.TotalCount + for _, r := range searchResults { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: r.Name} + } - for _, hostname := range response.Data.Items { - results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: hostname.Name} } } }() From 85349ba5da9edeee638923fd2cf0aeb5210121f6 Mon Sep 17 00:00:00 2001 From: r-romanov Date: Wed, 21 Jul 2021 10:04:48 +0300 Subject: [PATCH 2/3] refactor: fix some linted fragments --- v2/pkg/subscraping/sources/spyse/spyse.go | 27 +++++++++++------------ 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/v2/pkg/subscraping/sources/spyse/spyse.go b/v2/pkg/subscraping/sources/spyse/spyse.go index 0dd794436..7daf48c4f 100644 --- a/v2/pkg/subscraping/sources/spyse/spyse.go +++ b/v2/pkg/subscraping/sources/spyse/spyse.go @@ -7,7 +7,7 @@ import ( "github.com/spyse-com/go-spyse/pkg" ) -const SearchMethodResultsLimit = 10000 +const searchMethodResultsLimit = 10000 // Source is the passive scraping agent type Source struct{} @@ -58,47 +58,46 @@ func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Se return } - var searchResults []spyse.Domain - // The default "Search" method returns only first 10 000 subdomains // To obtain more than 10 000 subdomains the "Scroll" method should be using // Note: The "Scroll" method is only available for "PRO" customers, so we need to check // quota.IsScrollSearchEnabled param - if totalResults > SearchMethodResultsLimit && quota.IsScrollSearchEnabled { - searchResults, err := domainSvc.ScrollSearch( + if totalResults > searchMethodResultsLimit && quota.IsScrollSearchEnabled { + var scrollResponse *spyse.DomainScrollResponse + scrollResponse, err = domainSvc.ScrollSearch( ctx, subdomainsSearchParams.Query, "") if err != nil { results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} return } - for len(searchResults.Items) > 0 { - searchResults, err = domainSvc.ScrollSearch( - context.Background(), subdomainsSearchParams.Query, searchResults.SearchID) + for len(scrollResponse.Items) > 0 { + scrollResponse, err = domainSvc.ScrollSearch( + context.Background(), subdomainsSearchParams.Query, scrollResponse.SearchID) if err != nil { results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} return } - for _, r := range searchResults.Items { - results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: r.Name} + for i := range scrollResponse.Items { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: scrollResponse.Items[i].Name} } } } else { var limit = 100 var offset = 0 + var searchResults []spyse.Domain - for ; int64(offset) < totalResults && int64(offset) < SearchMethodResultsLimit; offset += limit { + for ; int64(offset) < totalResults && int64(offset) < searchMethodResultsLimit; offset += limit { searchResults, err = domainSvc.Search(ctx, subdomainsSearchParams.Query, limit, offset) if err != nil { results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} return } - for _, r := range searchResults { - results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: r.Name} + for i := range searchResults { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: searchResults[i].Name} } - } } }() From 1a60d2fcc53dfcadd036eebe1387154743ea4a69 Mon Sep 17 00:00:00 2001 From: sandeep Date: Wed, 21 Jul 2021 13:56:35 +0530 Subject: [PATCH 3/3] lint fix --- v2/pkg/subscraping/sources/spyse/spyse.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/v2/pkg/subscraping/sources/spyse/spyse.go b/v2/pkg/subscraping/sources/spyse/spyse.go index 7daf48c4f..fbb750114 100644 --- a/v2/pkg/subscraping/sources/spyse/spyse.go +++ b/v2/pkg/subscraping/sources/spyse/spyse.go @@ -3,8 +3,9 @@ package spyse import ( "context" + "github.com/projectdiscovery/subfinder/v2/pkg/subscraping" - "github.com/spyse-com/go-spyse/pkg" + spyse "github.com/spyse-com/go-spyse/pkg" ) const searchMethodResultsLimit = 10000