From 47f23032a82d6860834246a6dd3d4c03efc67abb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Do=C4=9Fan=20Can=20Bak=C4=B1r?= Date: Wed, 23 Aug 2023 13:42:32 +0000 Subject: [PATCH 1/4] add builtwith as a source --- v2/pkg/passive/sources.go | 2 + v2/pkg/passive/sources_test.go | 2 + .../sources/builtwith/builtwith.go | 117 ++++++++++++++++++ 3 files changed, 121 insertions(+) create mode 100644 v2/pkg/subscraping/sources/builtwith/builtwith.go diff --git a/v2/pkg/passive/sources.go b/v2/pkg/passive/sources.go index 700190464..bd7d3de1f 100644 --- a/v2/pkg/passive/sources.go +++ b/v2/pkg/passive/sources.go @@ -13,6 +13,7 @@ import ( "github.com/projectdiscovery/subfinder/v2/pkg/subscraping/sources/bevigil" "github.com/projectdiscovery/subfinder/v2/pkg/subscraping/sources/binaryedge" "github.com/projectdiscovery/subfinder/v2/pkg/subscraping/sources/bufferover" + "github.com/projectdiscovery/subfinder/v2/pkg/subscraping/sources/builtwith" "github.com/projectdiscovery/subfinder/v2/pkg/subscraping/sources/c99" "github.com/projectdiscovery/subfinder/v2/pkg/subscraping/sources/censys" "github.com/projectdiscovery/subfinder/v2/pkg/subscraping/sources/certspotter" @@ -91,6 +92,7 @@ var AllSources = [...]subscraping.Source{ &facebook.Source{}, // &threatminer.Source{}, // failing api // &reconcloud.Source{}, // failing due to cloudflare bot protection + &builtwith.Source{}, } var NameSourceMap = make(map[string]subscraping.Source, len(AllSources)) diff --git a/v2/pkg/passive/sources_test.go b/v2/pkg/passive/sources_test.go index 34323c1af..852618862 100644 --- a/v2/pkg/passive/sources_test.go +++ b/v2/pkg/passive/sources_test.go @@ -52,6 +52,7 @@ var ( "facebook", // "threatminer", // "reconcloud", + "builtwith", } expectedDefaultSources = []string{ @@ -85,6 +86,7 @@ var ( "facebook", // "threatminer", // "reconcloud", + "builtwith", } expectedDefaultRecursiveSources = []string{ diff --git a/v2/pkg/subscraping/sources/builtwith/builtwith.go b/v2/pkg/subscraping/sources/builtwith/builtwith.go new file mode 100644 index 000000000..932133156 --- /dev/null +++ b/v2/pkg/subscraping/sources/builtwith/builtwith.go @@ -0,0 +1,117 @@ +// Package builtwith logic +package builtwith + +import ( + "context" + "fmt" + "time" + + jsoniter "github.com/json-iterator/go" + + "github.com/projectdiscovery/subfinder/v2/pkg/subscraping" +) + +type response struct { + Results []resultItem `json:"Results"` +} + +type resultItem struct { + Result result `json:"Result"` +} + +type result struct { + Paths []path `json:"Paths"` +} + +type path struct { + Domain string `json:"Domain"` + Url string `json:"Url"` + SubDomain string `json:"SubDomain"` +} + +// Source is the passive scraping agent +type Source struct { + apiKeys []string + timeTaken time.Duration + errors int + results int + skipped bool +} + +// Run function returns all subdomains found with the service +func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result { + results := make(chan subscraping.Result) + s.errors = 0 + s.results = 0 + + go func() { + defer func(startTime time.Time) { + s.timeTaken = time.Since(startTime) + close(results) + }(time.Now()) + + randomApiKey := subscraping.PickRandom(s.apiKeys, s.Name()) + if randomApiKey == "" { + return + } + + resp, err := session.SimpleGet(ctx, fmt.Sprintf("https://api.builtwith.com/v21/api.json?KEY=%s&HIDETEXT=yes&HIDEDL=yes&NOLIVE=yes&NOMETA=yes&NOPII=yes&NOATTR=yes&LOOKUP=%s", randomApiKey, domain)) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + s.errors++ + session.DiscardHTTPResponse(resp) + return + } + + // x, _ := io.ReadAll(resp.Body) + // fmt.Println(string(x)) + + var data response + err = jsoniter.NewDecoder(resp.Body).Decode(&data) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + s.errors++ + resp.Body.Close() + return + } + resp.Body.Close() + for _, result := range data.Results { + for _, path := range result.Result.Paths { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: fmt.Sprintf("%s.%s", path.SubDomain, path.Domain)} + s.results++ + } + } + }() + + return results +} + +// Name returns the name of the source +func (s *Source) Name() string { + return "builtwith" +} + +func (s *Source) IsDefault() bool { + return true +} + +func (s *Source) HasRecursiveSupport() bool { + return false +} + +func (s *Source) NeedsKey() bool { + return true +} + +func (s *Source) AddApiKeys(keys []string) { + s.apiKeys = keys +} + +func (s *Source) Statistics() subscraping.Statistics { + return subscraping.Statistics{ + Errors: s.errors, + Results: s.results, + TimeTaken: s.timeTaken, + Skipped: s.skipped, + } +} From f9efaa8671e5dcc78a1b755d097d56e6aa7f76d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Do=C4=9Fan=20Can=20Bak=C4=B1r?= Date: Wed, 23 Aug 2023 13:57:58 +0000 Subject: [PATCH 2/4] skip failing tests --- v2/pkg/passive/sources_wo_auth_test.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/v2/pkg/passive/sources_wo_auth_test.go b/v2/pkg/passive/sources_wo_auth_test.go index 88f64e04b..73a4cba03 100644 --- a/v2/pkg/passive/sources_wo_auth_test.go +++ b/v2/pkg/passive/sources_wo_auth_test.go @@ -20,10 +20,12 @@ import ( func TestSourcesWithoutKeys(t *testing.T) { ignoredSources := []string{ - "commoncrawl", // commoncrawl is under resourced and will likely time-out so step over it for this test https://groups.google.com/u/2/g/common-crawl/c/3QmQjFA_3y4/m/vTbhGqIBBQAJ - "riddler", // Fails with 403: There might be too much traffic or a configuration error - "crtsh", // Fails in GH Action (possibly IP-based ban) causing a timeout. - "hackertarget", // Fails in GH Action (possibly IP-based ban) but works locally + "commoncrawl", // commoncrawl is under resourced and will likely time-out so step over it for this test https://groups.google.com/u/2/g/common-crawl/c/3QmQjFA_3y4/m/vTbhGqIBBQAJ + "riddler", // Fails with 403: There might be too much traffic or a configuration error + "crtsh", // Fails in GH Action (possibly IP-based ban) causing a timeout. + "hackertarget", // Fails in GH Action (possibly IP-based ban) but works locally + "waybackarchive", // Fails randomly + "alienvault", // 503 Service Temporarily Unavailable } domain := "hackerone.com" From 866d65b28e9f4bd07fcc2ec9870e8b87606d3f86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Do=C4=9Fan=20Can=20Bak=C4=B1r?= Date: Thu, 24 Aug 2023 06:53:40 +0000 Subject: [PATCH 3/4] remove debug code --- v2/pkg/subscraping/sources/builtwith/builtwith.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/v2/pkg/subscraping/sources/builtwith/builtwith.go b/v2/pkg/subscraping/sources/builtwith/builtwith.go index 932133156..24f2e60cc 100644 --- a/v2/pkg/subscraping/sources/builtwith/builtwith.go +++ b/v2/pkg/subscraping/sources/builtwith/builtwith.go @@ -63,9 +63,6 @@ func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Se return } - // x, _ := io.ReadAll(resp.Body) - // fmt.Println(string(x)) - var data response err = jsoniter.NewDecoder(resp.Body).Decode(&data) if err != nil { From 0f3fab0f7d8ccf00e0dc98dbd5dcd8537c218418 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Do=C4=9Fan=20Can=20Bak=C4=B1r?= Date: Thu, 24 Aug 2023 06:58:04 +0000 Subject: [PATCH 4/4] update README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e3360cace..c6c9a3dac 100644 --- a/README.md +++ b/README.md @@ -121,7 +121,7 @@ go install -v github.com/projectdiscovery/subfinder/v2/cmd/subfinder@latest `subfinder` can be used right after the installation, however the following services require configuring API keys to work: -[BeVigil](https://bevigil.com/osint-api), [BinaryEdge](https://binaryedge.io), [BufferOver](https://tls.bufferover.run), [C99](https://api.c99.nl/), [Censys](https://censys.io), [CertSpotter](https://sslmate.com/certspotter/api/), [Chaos](https://chaos.projectdiscovery.io), [Chinaz](http://my.chinaz.com/ChinazAPI/DataCenter/MyDataApi), [DnsDB](https://api.dnsdb.info), [Fofa](https://fofa.info/static_pages/api_help), [FullHunt](https://fullhunt.io), [GitHub](https://github.com), [Intelx](https://intelx.io), [PassiveTotal](http://passivetotal.org), [quake](https://quake.360.cn), [Robtex](https://www.robtex.com/api/), [SecurityTrails](http://securitytrails.com), [Shodan](https://shodan.io), [ThreatBook](https://x.threatbook.cn/en), [VirusTotal](https://www.virustotal.com), [WhoisXML API](https://whoisxmlapi.com/), [ZoomEye](https://www.zoomeye.org), [ZoomEye API](https://api.zoomeye.org), [dnsrepo](https://dnsrepo.noc.org), [Hunter](https://hunter.qianxin.com/), [Facebook](https://developers.facebook.com) +[BeVigil](https://bevigil.com/osint-api), [BinaryEdge](https://binaryedge.io), [BufferOver](https://tls.bufferover.run), [C99](https://api.c99.nl/), [Censys](https://censys.io), [CertSpotter](https://sslmate.com/certspotter/api/), [Chaos](https://chaos.projectdiscovery.io), [Chinaz](http://my.chinaz.com/ChinazAPI/DataCenter/MyDataApi), [DnsDB](https://api.dnsdb.info), [Fofa](https://fofa.info/static_pages/api_help), [FullHunt](https://fullhunt.io), [GitHub](https://github.com), [Intelx](https://intelx.io), [PassiveTotal](http://passivetotal.org), [quake](https://quake.360.cn), [Robtex](https://www.robtex.com/api/), [SecurityTrails](http://securitytrails.com), [Shodan](https://shodan.io), [ThreatBook](https://x.threatbook.cn/en), [VirusTotal](https://www.virustotal.com), [WhoisXML API](https://whoisxmlapi.com/), [ZoomEye](https://www.zoomeye.org), [ZoomEye API](https://api.zoomeye.org), [dnsrepo](https://dnsrepo.noc.org), [Hunter](https://hunter.qianxin.com/), [Facebook](https://developers.facebook.com), [BuiltWith](https://api.builtwith.com/domain-api) You can also use the `subfinder -ls` command to display all the available sources.