From 90f4c00d8c05fd58b4559e71f2ff07486a86e5d2 Mon Sep 17 00:00:00 2001 From: ben Date: Wed, 2 Jul 2025 18:57:19 +0100 Subject: [PATCH 1/8] Adds the "driftnet" source. Updates the httpRequestWrapper() function to allow 204 responses. --- pkg/passive/sources.go | 2 + pkg/passive/sources_test.go | 3 + pkg/subscraping/agent.go | 2 +- .../subscraping/sources/driftnet/driftnet.go | 184 ++++++++++++++++++ 4 files changed, 190 insertions(+), 1 deletion(-) create mode 100644 v2/pkg/subscraping/sources/driftnet/driftnet.go diff --git a/pkg/passive/sources.go b/pkg/passive/sources.go index 20013042f..e74d874be 100644 --- a/pkg/passive/sources.go +++ b/pkg/passive/sources.go @@ -26,6 +26,7 @@ import ( "github.com/projectdiscovery/subfinder/v2/pkg/subscraping/sources/dnsdb" "github.com/projectdiscovery/subfinder/v2/pkg/subscraping/sources/dnsdumpster" "github.com/projectdiscovery/subfinder/v2/pkg/subscraping/sources/dnsrepo" + "github.com/projectdiscovery/subfinder/v2/pkg/subscraping/sources/driftnet" "github.com/projectdiscovery/subfinder/v2/pkg/subscraping/sources/facebook" "github.com/projectdiscovery/subfinder/v2/pkg/subscraping/sources/fofa" "github.com/projectdiscovery/subfinder/v2/pkg/subscraping/sources/fullhunt" @@ -70,6 +71,7 @@ var AllSources = [...]subscraping.Source{ &dnsdb.Source{}, &dnsdumpster.Source{}, &dnsrepo.Source{}, + &driftnet.Source{}, &fofa.Source{}, &fullhunt.Source{}, &github.Source{}, diff --git a/pkg/passive/sources_test.go b/pkg/passive/sources_test.go index a63760296..4febbf9fa 100644 --- a/pkg/passive/sources_test.go +++ b/pkg/passive/sources_test.go @@ -26,6 +26,7 @@ var ( "dnsdumpster", "dnsdb", "dnsrepo", + "driftnet", "fofa", "fullhunt", "github", @@ -72,6 +73,7 @@ var ( "digitorus", "dnsdumpster", "dnsrepo", + "driftnet", "fofa", "fullhunt", "hackertarget", @@ -101,6 +103,7 @@ var ( "crtsh", "dnsdb", "digitorus", + "driftnet", "hackertarget", "securitytrails", "virustotal", diff --git a/pkg/subscraping/agent.go b/pkg/subscraping/agent.go index 4f2cab8c6..e4e1441aa 100644 --- a/pkg/subscraping/agent.go +++ b/pkg/subscraping/agent.go @@ -137,7 +137,7 @@ func httpRequestWrapper(client *http.Client, request *http.Request) (*http.Respo return nil, err } - if response.StatusCode != http.StatusOK { + if response.StatusCode != http.StatusOK && response.StatusCode != http.StatusNoContent { requestURL, _ := url.QueryUnescape(request.URL.String()) gologger.Debug().MsgFunc(func() string { diff --git a/v2/pkg/subscraping/sources/driftnet/driftnet.go b/v2/pkg/subscraping/sources/driftnet/driftnet.go new file mode 100644 index 000000000..a8f49a2be --- /dev/null +++ b/v2/pkg/subscraping/sources/driftnet/driftnet.go @@ -0,0 +1,184 @@ +// Package virustotal logic +package driftnet + +import ( + "context" + "encoding/json" + "fmt" + "strings" + "sync" + "time" + + "github.com/projectdiscovery/subfinder/v2/pkg/subscraping" +) + +const ( + // baseURL is the base URL for the driftnet API + baseURL = "https://api.driftnet.io/v1/" + + // summaryLimit is the size of the summary limit that we send to the API + summaryLimit = 10000 +) + +// Source is the passive scraping agent +type Source struct { + apiKeys []string + timeTaken time.Duration + errors int + results int + skipped bool +} + +// endpointConfig describes a driftnet endpoint that can used +type endpointConfig struct { + // The API endpoint to be touched + endpoint string + + // The API parameter used for query + param string + + // The context that we should restrict to in results from this endpoint + context string +} + +// endpoints is a set of endpoint configs +var endpoints = []endpointConfig{ + {"ct/log", "field=host:", "cert-dns-name"}, + {"scan/protocols", "field=host:", "cert-dns-name"}, + {"scan/domains", "field=host:", "cert-dns-name"}, + {"domain/rdns", "host=", "dns-ptr"}, +} + +// summaryResponse is an API response +type summaryResponse struct { + Summary struct { + Other int `json:"other"` + Values map[string]int `json:"values"` + } `json:"summary"` +} + +// Run function returns all subdomains found with the service +func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result { + // Final results channel + results := make(chan subscraping.Result) + s.errors = 0 + s.results = 0 + + // Waitgroup for subsources + var wg sync.WaitGroup + wg.Add(len(endpoints)) + + // Map for dedupe between subsources + dedupe := sync.Map{} + + // Close down results when all subsources finished + go func(startTime time.Time) { + wg.Wait() + s.timeTaken = time.Since(startTime) + close(results) + }(time.Now()) + + // Start up requests for all subsources + for i := range endpoints { + go s.runSubsource(ctx, domain, session, results, &wg, &dedupe, endpoints[i]) + } + + // Return the result c + return results +} + +// Name returns the name of the source +func (s *Source) Name() string { + return "driftnet" +} + +// IsDefault indicates that this source should used as part of the default execution. +func (s *Source) IsDefault() bool { + return true +} + +// HasRecursiveSupport indicates that we accept subdomains in addition to apex domains +func (s *Source) HasRecursiveSupport() bool { + return true +} + +// NeedsKey indicates that we need an API key +func (s *Source) NeedsKey() bool { + return true +} + +// AddApiKeys provides us with the API key(s) +func (s *Source) AddApiKeys(keys []string) { + s.apiKeys = keys +} + +// Statistics returns statistics about the scraping process +func (s *Source) Statistics() subscraping.Statistics { + return subscraping.Statistics{ + Errors: s.errors, + Results: s.results, + TimeTaken: s.timeTaken, + Skipped: s.skipped, + } +} + +// runSubsource +func (s *Source) runSubsource(ctx context.Context, domain string, session *subscraping.Session, results chan subscraping.Result, wg *sync.WaitGroup, dedupe *sync.Map, epConfig endpointConfig) { + // Default headers + headers := map[string]string{ + "accept": "application/json", + } + + // Pick an API key + randomApiKey := subscraping.PickRandom(s.apiKeys, s.Name()) + if randomApiKey != "" { + headers["authorization"] = "Bearer " + randomApiKey + } + + // Request + url := fmt.Sprintf("%s%s?%s%s&summarize=host&summary_context=%s&summary_limit=%d", baseURL, epConfig.endpoint, epConfig.param, domain, epConfig.context, summaryLimit) + resp, err := session.Get(ctx, url, "", headers) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + s.errors++ + wg.Done() + return + } + + defer session.DiscardHTTPResponse(resp) + + // 204 means no results, any other response code is an error + if resp.StatusCode != 200 { + if resp.StatusCode != 204 { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: fmt.Errorf("request failed with status %d", resp.StatusCode)} + s.errors++ + } + + wg.Done() + return + } + + // Parse and return results + var summary summaryResponse + decoder := json.NewDecoder(resp.Body) + err = decoder.Decode(&summary) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + s.errors++ + wg.Done() + return + } + + for subdomain := range summary.Summary.Values { + // Avoid returning the same result more than once from the same source (can happen as we are using multiple endpoints) + if _, present := dedupe.LoadOrStore(strings.ToLower(subdomain), true); !present { + results <- subscraping.Result{ + Source: s.Name(), Type: subscraping.Subdomain, Value: subdomain, + } + s.results++ + } + } + + // Complete! + wg.Done() +} From 0afa0e5a72c680c6828555dad128d764321ae02f Mon Sep 17 00:00:00 2001 From: ben Date: Wed, 2 Jul 2025 19:33:28 +0100 Subject: [PATCH 2/8] remove cruft --- v2/pkg/subscraping/sources/driftnet/driftnet.go | 1 - 1 file changed, 1 deletion(-) diff --git a/v2/pkg/subscraping/sources/driftnet/driftnet.go b/v2/pkg/subscraping/sources/driftnet/driftnet.go index a8f49a2be..8ff59f2df 100644 --- a/v2/pkg/subscraping/sources/driftnet/driftnet.go +++ b/v2/pkg/subscraping/sources/driftnet/driftnet.go @@ -1,4 +1,3 @@ -// Package virustotal logic package driftnet import ( From 8f5680e90690ceac4bb31b9a628259031a8899ea Mon Sep 17 00:00:00 2001 From: ben Date: Thu, 17 Jul 2025 12:31:42 +0100 Subject: [PATCH 3/8] match new path --- {v2/pkg => pkg}/subscraping/sources/driftnet/driftnet.go | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename {v2/pkg => pkg}/subscraping/sources/driftnet/driftnet.go (100%) diff --git a/v2/pkg/subscraping/sources/driftnet/driftnet.go b/pkg/subscraping/sources/driftnet/driftnet.go similarity index 100% rename from v2/pkg/subscraping/sources/driftnet/driftnet.go rename to pkg/subscraping/sources/driftnet/driftnet.go From 423ae72f084e0003a9e42f2ca66faf3aa0101545 Mon Sep 17 00:00:00 2001 From: ben Date: Thu, 17 Jul 2025 12:50:56 +0100 Subject: [PATCH 4/8] correct typos and fix a keys-required test --- pkg/subscraping/sources/driftnet/driftnet.go | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pkg/subscraping/sources/driftnet/driftnet.go b/pkg/subscraping/sources/driftnet/driftnet.go index 8ff59f2df..fc599ab4f 100644 --- a/pkg/subscraping/sources/driftnet/driftnet.go +++ b/pkg/subscraping/sources/driftnet/driftnet.go @@ -82,7 +82,7 @@ func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Se go s.runSubsource(ctx, domain, session, results, &wg, &dedupe, endpoints[i]) } - // Return the result c + // Return the results channel return results } @@ -121,7 +121,7 @@ func (s *Source) Statistics() subscraping.Statistics { } } -// runSubsource +// runSubsource queries a specific driftnet endpoint for subdomains and sends results to the channel func (s *Source) runSubsource(ctx context.Context, domain string, session *subscraping.Session, results chan subscraping.Result, wg *sync.WaitGroup, dedupe *sync.Map, epConfig endpointConfig) { // Default headers headers := map[string]string{ @@ -169,6 +169,11 @@ func (s *Source) runSubsource(ctx context.Context, domain string, session *subsc } for subdomain := range summary.Summary.Values { + // We can get certificate results which aren't actually subdomains of the target domain. Skip them. + if !strings.HasSuffix(subdomain, "."+domain) { + continue + } + // Avoid returning the same result more than once from the same source (can happen as we are using multiple endpoints) if _, present := dedupe.LoadOrStore(strings.ToLower(subdomain), true); !present { results <- subscraping.Result{ From ed66a7d04e338e98eb7c36961330b3dbf7f6bda9 Mon Sep 17 00:00:00 2001 From: ben Date: Thu, 17 Jul 2025 12:51:22 +0100 Subject: [PATCH 5/8] revert change for NoContent --- pkg/subscraping/agent.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/subscraping/agent.go b/pkg/subscraping/agent.go index e4e1441aa..4f2cab8c6 100644 --- a/pkg/subscraping/agent.go +++ b/pkg/subscraping/agent.go @@ -137,7 +137,7 @@ func httpRequestWrapper(client *http.Client, request *http.Request) (*http.Respo return nil, err } - if response.StatusCode != http.StatusOK && response.StatusCode != http.StatusNoContent { + if response.StatusCode != http.StatusOK { requestURL, _ := url.QueryUnescape(request.URL.String()) gologger.Debug().MsgFunc(func() string { From e10fc7f9d75a407a84621ecb0772eb61499d8caa Mon Sep 17 00:00:00 2001 From: ben Date: Thu, 17 Jul 2025 12:57:29 +0100 Subject: [PATCH 6/8] fix for race --- pkg/subscraping/sources/driftnet/driftnet.go | 21 ++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/pkg/subscraping/sources/driftnet/driftnet.go b/pkg/subscraping/sources/driftnet/driftnet.go index fc599ab4f..5e88fa8e0 100644 --- a/pkg/subscraping/sources/driftnet/driftnet.go +++ b/pkg/subscraping/sources/driftnet/driftnet.go @@ -6,6 +6,7 @@ import ( "fmt" "strings" "sync" + "sync/atomic" "time" "github.com/projectdiscovery/subfinder/v2/pkg/subscraping" @@ -23,8 +24,8 @@ const ( type Source struct { apiKeys []string timeTaken time.Duration - errors int - results int + errors atomic.Int32 + results atomic.Int32 skipped bool } @@ -60,8 +61,8 @@ type summaryResponse struct { func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result { // Final results channel results := make(chan subscraping.Result) - s.errors = 0 - s.results = 0 + s.errors.Store(0) + s.results.Store(0) // Waitgroup for subsources var wg sync.WaitGroup @@ -114,8 +115,8 @@ func (s *Source) AddApiKeys(keys []string) { // Statistics returns statistics about the scraping process func (s *Source) Statistics() subscraping.Statistics { return subscraping.Statistics{ - Errors: s.errors, - Results: s.results, + Errors: int(s.errors.Load()), + Results: int(s.results.Load()), TimeTaken: s.timeTaken, Skipped: s.skipped, } @@ -139,7 +140,7 @@ func (s *Source) runSubsource(ctx context.Context, domain string, session *subsc resp, err := session.Get(ctx, url, "", headers) if err != nil { results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} - s.errors++ + s.errors.Add(1) wg.Done() return } @@ -150,7 +151,7 @@ func (s *Source) runSubsource(ctx context.Context, domain string, session *subsc if resp.StatusCode != 200 { if resp.StatusCode != 204 { results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: fmt.Errorf("request failed with status %d", resp.StatusCode)} - s.errors++ + s.errors.Add(1) } wg.Done() @@ -163,7 +164,7 @@ func (s *Source) runSubsource(ctx context.Context, domain string, session *subsc err = decoder.Decode(&summary) if err != nil { results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} - s.errors++ + s.errors.Add(1) wg.Done() return } @@ -179,7 +180,7 @@ func (s *Source) runSubsource(ctx context.Context, domain string, session *subsc results <- subscraping.Result{ Source: s.Name(), Type: subscraping.Subdomain, Value: subdomain, } - s.results++ + s.results.Add(1) } } From b2f1963756c60d07842ee79743c0d57779c6b183 Mon Sep 17 00:00:00 2001 From: ben Date: Thu, 17 Jul 2025 12:58:45 +0100 Subject: [PATCH 7/8] url escape as requested --- pkg/subscraping/sources/driftnet/driftnet.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pkg/subscraping/sources/driftnet/driftnet.go b/pkg/subscraping/sources/driftnet/driftnet.go index 5e88fa8e0..37c1f8c5f 100644 --- a/pkg/subscraping/sources/driftnet/driftnet.go +++ b/pkg/subscraping/sources/driftnet/driftnet.go @@ -4,6 +4,7 @@ import ( "context" "encoding/json" "fmt" + "net/url" "strings" "sync" "sync/atomic" @@ -136,8 +137,8 @@ func (s *Source) runSubsource(ctx context.Context, domain string, session *subsc } // Request - url := fmt.Sprintf("%s%s?%s%s&summarize=host&summary_context=%s&summary_limit=%d", baseURL, epConfig.endpoint, epConfig.param, domain, epConfig.context, summaryLimit) - resp, err := session.Get(ctx, url, "", headers) + requestURL := fmt.Sprintf("%s%s?%s%s&summarize=host&summary_context=%s&summary_limit=%d", baseURL, epConfig.endpoint, epConfig.param, url.QueryEscape(domain), epConfig.context, summaryLimit) + resp, err := session.Get(ctx, requestURL, "", headers) if err != nil { results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} s.errors.Add(1) From dcc7f4b3cd097650b970bbc3e3a7377fd96e8d5d Mon Sep 17 00:00:00 2001 From: ben Date: Thu, 17 Jul 2025 13:03:58 +0100 Subject: [PATCH 8/8] move 204 check to be consistent with other sources --- pkg/subscraping/sources/driftnet/driftnet.go | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pkg/subscraping/sources/driftnet/driftnet.go b/pkg/subscraping/sources/driftnet/driftnet.go index 37c1f8c5f..758bbfa44 100644 --- a/pkg/subscraping/sources/driftnet/driftnet.go +++ b/pkg/subscraping/sources/driftnet/driftnet.go @@ -4,6 +4,7 @@ import ( "context" "encoding/json" "fmt" + "net/http" "net/url" "strings" "sync" @@ -140,8 +141,12 @@ func (s *Source) runSubsource(ctx context.Context, domain string, session *subsc requestURL := fmt.Sprintf("%s%s?%s%s&summarize=host&summary_context=%s&summary_limit=%d", baseURL, epConfig.endpoint, epConfig.param, url.QueryEscape(domain), epConfig.context, summaryLimit) resp, err := session.Get(ctx, requestURL, "", headers) if err != nil { - results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} - s.errors.Add(1) + // HTTP 204 is not an error from the Driftnet API + if resp == nil || resp.StatusCode != http.StatusNoContent { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + s.errors.Add(1) + } + wg.Done() return }