From 1a03959680d7cbaed6eb6d03b315282356a71475 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=ADctor=20Zamanillo?= Date: Wed, 9 Nov 2022 10:07:15 +0100 Subject: [PATCH] Reading this thread https://groups.google.com/g/crtsh/c/sUmV0mBz8bQ/m/K-6Vymd_AAAJ and using the &showSQL=y parameter I noticed the source's SQL is quite different of the current SQL crts.sh is using --- v2/pkg/subscraping/sources/crtsh/crtsh.go | 43 ++++++++++++++++++----- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/v2/pkg/subscraping/sources/crtsh/crtsh.go b/v2/pkg/subscraping/sources/crtsh/crtsh.go index 93c1f895b..d948de5fd 100644 --- a/v2/pkg/subscraping/sources/crtsh/crtsh.go +++ b/v2/pkg/subscraping/sources/crtsh/crtsh.go @@ -30,7 +30,7 @@ func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Se go func() { defer close(results) - count := s.getSubdomainsFromSQL(domain, results) + count := s.getSubdomainsFromSQL(domain, session, results) if count > 0 { return } @@ -40,7 +40,7 @@ func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Se return results } -func (s *Source) getSubdomainsFromSQL(domain string, results chan subscraping.Result) int { +func (s *Source) getSubdomainsFromSQL(domain string, session *subscraping.Session, results chan subscraping.Result) int { db, err := sql.Open("postgres", "host=crt.sh user=guest dbname=certwatch sslmode=disable binary_parameters=yes") if err != nil { results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} @@ -49,11 +49,33 @@ func (s *Source) getSubdomainsFromSQL(domain string, results chan subscraping.Re defer db.Close() - pattern := "%." + domain - query := `SELECT DISTINCT ci.NAME_VALUE as domain FROM certificate_identity ci - WHERE reverse(lower(ci.NAME_VALUE)) LIKE reverse(lower($1)) - ORDER BY ci.NAME_VALUE` - rows, err := db.Query(query, pattern) + query := `WITH ci AS ( + SELECT min(sub.CERTIFICATE_ID) ID, + min(sub.ISSUER_CA_ID) ISSUER_CA_ID, + array_agg(DISTINCT sub.NAME_VALUE) NAME_VALUES, + x509_commonName(sub.CERTIFICATE) COMMON_NAME, + x509_notBefore(sub.CERTIFICATE) NOT_BEFORE, + x509_notAfter(sub.CERTIFICATE) NOT_AFTER, + encode(x509_serialNumber(sub.CERTIFICATE), 'hex') SERIAL_NUMBER + FROM (SELECT * + FROM certificate_and_identities cai + WHERE plainto_tsquery('certwatch', $1) @@ identities(cai.CERTIFICATE) + AND cai.NAME_VALUE ILIKE ('%' || $1 || '%') + LIMIT 10000 + ) sub + GROUP BY sub.CERTIFICATE + ) + SELECT array_to_string(ci.NAME_VALUES, chr(10)) NAME_VALUE + FROM ci + LEFT JOIN LATERAL ( + SELECT min(ctle.ENTRY_TIMESTAMP) ENTRY_TIMESTAMP + FROM ct_log_entry ctle + WHERE ctle.CERTIFICATE_ID = ci.ID + ) le ON TRUE, + ca + WHERE ci.ISSUER_CA_ID = ca.ID + ORDER BY le.ENTRY_TIMESTAMP DESC NULLS LAST;` + rows, err := db.Query(query, domain) if err != nil { results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} return 0 @@ -72,8 +94,11 @@ func (s *Source) getSubdomainsFromSQL(domain string, results chan subscraping.Re results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} return count } + count++ - results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: data} + for _, subdomain := range strings.Split(data, "\n") { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: session.Extractor.FindString(subdomain)} + } } return count } @@ -98,7 +123,7 @@ func (s *Source) getSubdomainsFromHTTP(ctx context.Context, domain string, sessi for _, subdomain := range subdomains { for _, sub := range strings.Split(subdomain.NameValue, "\n") { - results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: sub} + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: session.Extractor.FindString(sub)} } }