From 2507688c70403b405aaccb9d086897862a4c728e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=ADctor=20Zamanillo?= Date: Sun, 23 Oct 2022 11:36:58 +0200 Subject: [PATCH 1/2] Fixes #675, crsth can return email values for specific domains so we always try to extract the subdomain using the default regular expression of the session --- v2/pkg/subscraping/sources/crtsh/crtsh.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/v2/pkg/subscraping/sources/crtsh/crtsh.go b/v2/pkg/subscraping/sources/crtsh/crtsh.go index 93c1f895b..9ed5ea102 100644 --- a/v2/pkg/subscraping/sources/crtsh/crtsh.go +++ b/v2/pkg/subscraping/sources/crtsh/crtsh.go @@ -5,6 +5,7 @@ import ( "context" "database/sql" "fmt" + "regexp" "strings" jsoniter "github.com/json-iterator/go" @@ -30,7 +31,7 @@ func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Se go func() { defer close(results) - count := s.getSubdomainsFromSQL(domain, results) + count := s.getSubdomainsFromSQL(domain, session.Extractor, results) if count > 0 { return } @@ -40,7 +41,7 @@ func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Se return results } -func (s *Source) getSubdomainsFromSQL(domain string, results chan subscraping.Result) int { +func (s *Source) getSubdomainsFromSQL(domain string, subDomainExtractor *regexp.Regexp, results chan subscraping.Result) int { db, err := sql.Open("postgres", "host=crt.sh user=guest dbname=certwatch sslmode=disable binary_parameters=yes") if err != nil { results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} @@ -73,7 +74,7 @@ func (s *Source) getSubdomainsFromSQL(domain string, results chan subscraping.Re return count } count++ - results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: data} + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: subDomainExtractor.FindString(data)} } return count } @@ -98,7 +99,7 @@ func (s *Source) getSubdomainsFromHTTP(ctx context.Context, domain string, sessi for _, subdomain := range subdomains { for _, sub := range strings.Split(subdomain.NameValue, "\n") { - results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: sub} + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: session.Extractor.FindString(sub)} } } From 9b8dcb2fd8972bbb5352e0b83d22d6d3a6fb6737 Mon Sep 17 00:00:00 2001 From: Mzack9999 Date: Sat, 5 Nov 2022 14:30:11 +0100 Subject: [PATCH 2/2] using session for uniformity with http signature --- v2/pkg/subscraping/sources/crtsh/crtsh.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/v2/pkg/subscraping/sources/crtsh/crtsh.go b/v2/pkg/subscraping/sources/crtsh/crtsh.go index 9ed5ea102..41f8904e1 100644 --- a/v2/pkg/subscraping/sources/crtsh/crtsh.go +++ b/v2/pkg/subscraping/sources/crtsh/crtsh.go @@ -5,7 +5,6 @@ import ( "context" "database/sql" "fmt" - "regexp" "strings" jsoniter "github.com/json-iterator/go" @@ -31,7 +30,7 @@ func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Se go func() { defer close(results) - count := s.getSubdomainsFromSQL(domain, session.Extractor, results) + count := s.getSubdomainsFromSQL(domain, session, results) if count > 0 { return } @@ -41,7 +40,7 @@ func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Se return results } -func (s *Source) getSubdomainsFromSQL(domain string, subDomainExtractor *regexp.Regexp, results chan subscraping.Result) int { +func (s *Source) getSubdomainsFromSQL(domain string, session *subscraping.Session, results chan subscraping.Result) int { db, err := sql.Open("postgres", "host=crt.sh user=guest dbname=certwatch sslmode=disable binary_parameters=yes") if err != nil { results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} @@ -74,7 +73,7 @@ func (s *Source) getSubdomainsFromSQL(domain string, subDomainExtractor *regexp. return count } count++ - results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: subDomainExtractor.FindString(data)} + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: session.Extractor.FindString(data)} } return count }