diff --git a/README.md b/README.md index dad545074..a52604124 100644 --- a/README.md +++ b/README.md @@ -130,6 +130,18 @@ Learn about more ways to install subfinder here: https://docs.projectdiscovery.i `subfinder` can be used right after the installation, however many sources required API keys to work. Learn more here: https://docs.projectdiscovery.io/tools/subfinder/install#post-install-configuration. +### Censys configuration + +- Censys has migrated from the legacy Search API (`api_id:api_secret`) to the Platform API which requires a Personal Access Token (PAT). +- Provide the PAT in your `provider-config.yaml` and append the organization ID (Starter/Enterprise tiers) after a colon. The Platform API rejects PAT-only requests. + +```yaml +censys: + - censys_pat_value:12345678-91011-1213 +``` + +Subfinder will send the PAT as a bearer token together with the vendor media type header. Censys’ Platform API ties entitlements to the provided Organization ID, so requests without it either fall back to the free-tier limits or are rejected with `Missing organization id`. Free accounts do not receive API credits; if your account lacks an Org ID or API role, the call will still fail even with a PAT. + ## Running Subfinder Learn about how to run Subfinder here: https://docs.projectdiscovery.io/tools/subfinder/running. diff --git a/pkg/subscraping/sources/censys/censys.go b/pkg/subscraping/sources/censys/censys.go index 4427821ae..121b1fc4e 100644 --- a/pkg/subscraping/sources/censys/censys.go +++ b/pkg/subscraping/sources/censys/censys.go @@ -2,55 +2,77 @@ package censys import ( + "bytes" "context" - "strconv" + "fmt" + "net/http" + "strings" "time" jsoniter "github.com/json-iterator/go" + "github.com/projectdiscovery/gologger" "github.com/projectdiscovery/subfinder/v2/pkg/subscraping" - urlutil "github.com/projectdiscovery/utils/url" ) const ( maxCensysPages = 10 maxPerPage = 100 + searchAPIUrl = "https://api.platform.censys.io/v3/global/search/query" + authorization = "Authorization" + bearerTokenFmt = "Bearer %s" + acceptHeader = "application/vnd.censys.api.v3.search.v1+json" + orgHeader = "X-Organization-ID" ) -type response struct { - Code int `json:"code"` - Status string `json:"status"` - Result result `json:"result"` +type searchRequest struct { + Query string `json:"query"` + PageSize int `json:"page_size,omitempty"` + PageToken string `json:"page_token,omitempty"` + Fields []string `json:"fields,omitempty"` } -type result struct { - Query string `json:"query"` - Total float64 `json:"total"` - DurationMS int `json:"duration_ms"` - Hits []hit `json:"hits"` - Links links `json:"links"` +type searchResponse struct { + Result *searchResult `json:"result"` } -type hit struct { - Parsed parsed `json:"parsed"` - Names []string `json:"names"` - FingerprintSha256 string `json:"fingerprint_sha256"` +type searchResult struct { + Hits []searchHit `json:"hits"` + NextPageToken string `json:"next_page_token"` } -type parsed struct { - ValidityPeriod validityPeriod `json:"validity_period"` - SubjectDN string `json:"subject_dn"` - IssuerDN string `json:"issuer_dn"` +type searchHit struct { + Certificate *certificateAsset `json:"certificate_v1"` + Host *hostAsset `json:"host_v1"` + WebProperty *webPropertyAsset `json:"webproperty_v1"` } -type validityPeriod struct { - NotAfter string `json:"not_after"` - NotBefore string `json:"not_before"` +type certificateAsset struct { + Resource *certificateResource `json:"resource"` } -type links struct { - Next string `json:"next"` - Prev string `json:"prev"` +type certificateResource struct { + Names []string `json:"names"` +} + +type hostAsset struct { + Resource *hostResource `json:"resource"` +} + +type hostResource struct { + DNS *hostDNS `json:"dns"` +} + +type hostDNS struct { + Names []string `json:"names"` +} + +type webPropertyAsset struct { + Resource *webPropertyResource `json:"resource"` +} + +type webPropertyResource struct { + Hostname string `json:"hostname"` } // Source is the passive scraping agent @@ -63,8 +85,8 @@ type Source struct { } type apiKey struct { - token string - secret string + token string + orgID string } // Run function returns all subdomains found with the service @@ -80,36 +102,48 @@ func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Se }(time.Now()) randomApiKey := subscraping.PickRandom(s.apiKeys, s.Name()) - if randomApiKey.token == "" || randomApiKey.secret == "" { + if randomApiKey.token == "" { s.skipped = true return } - certSearchEndpoint := "https://search.censys.io/api/v2/certificates/search" + domainLower := strings.ToLower(domain) + seen := make(map[string]struct{}) cursor := "" currentPage := 1 for { - certSearchEndpointUrl, err := urlutil.Parse(certSearchEndpoint) + reqBody := searchRequest{ + Query: domain, + PageSize: maxPerPage, + } + if cursor != "" { + reqBody.PageToken = cursor + } + + payload, err := jsoniter.Marshal(reqBody) if err != nil { results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} s.errors++ return } - certSearchEndpointUrl.Params.Add("q", domain) - certSearchEndpointUrl.Params.Add("per_page", strconv.Itoa(maxPerPage)) - if cursor != "" { - certSearchEndpointUrl.Params.Add("cursor", cursor) + headers := map[string]string{ + "Content-Type": "application/json", + "Accept": acceptHeader, + authorization: fmt.Sprintf(bearerTokenFmt, randomApiKey.token), + } + if randomApiKey.orgID != "" { + headers[orgHeader] = randomApiKey.orgID } resp, err := session.HTTPRequest( ctx, - "GET", - certSearchEndpointUrl.String(), + http.MethodPost, + searchAPIUrl, "", - nil, - nil, - subscraping.BasicAuth{Username: randomApiKey.token, Password: randomApiKey.secret}, + headers, + bytes.NewReader(payload), + subscraping.BasicAuth{}, ) if err != nil { @@ -119,7 +153,7 @@ func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Se return } - var censysResponse response + var censysResponse searchResponse err = jsoniter.NewDecoder(resp.Body).Decode(&censysResponse) if err != nil { results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} @@ -130,15 +164,15 @@ func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Se session.DiscardHTTPResponse(resp) + if censysResponse.Result == nil || len(censysResponse.Result.Hits) == 0 { + break + } + for _, hit := range censysResponse.Result.Hits { - for _, name := range hit.Names { - results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: name} - s.results++ - } + s.emitFromHit(hit, domainLower, seen, results) } - // Exit the censys enumeration if last page is reached - cursor = censysResponse.Result.Links.Next + cursor = censysResponse.Result.NextPageToken if cursor == "" || currentPage >= maxCensysPages { break } @@ -167,9 +201,72 @@ func (s *Source) NeedsKey() bool { } func (s *Source) AddApiKeys(keys []string) { - s.apiKeys = subscraping.CreateApiKeys(keys, func(k, v string) apiKey { - return apiKey{k, v} - }) + s.apiKeys = nil + for _, key := range keys { + raw := strings.TrimSpace(key) + if raw == "" { + continue + } + + parts := strings.SplitN(raw, ":", 2) + if len(parts) != 2 { + gologger.Warning().Msg("censys source requires PAT entries to include an organization id (use PAT:ORG_ID); skipping") + continue + } + + token := strings.TrimSpace(parts[0]) + orgID := strings.TrimSpace(parts[1]) + if token == "" || orgID == "" { + gologger.Warning().Msg("censys source encountered an entry with missing PAT or organization id; skipping") + continue + } + + s.apiKeys = append(s.apiKeys, apiKey{token: token, orgID: orgID}) + } +} + +func (s *Source) emitFromHit(hit searchHit, domainLower string, seen map[string]struct{}, results chan subscraping.Result) { + if hit.Certificate != nil && hit.Certificate.Resource != nil { + for _, name := range hit.Certificate.Resource.Names { + s.emitIfValid(name, domainLower, seen, results) + } + } + + if hit.Host != nil && hit.Host.Resource != nil && hit.Host.Resource.DNS != nil { + for _, name := range hit.Host.Resource.DNS.Names { + s.emitIfValid(name, domainLower, seen, results) + } + } + + if hit.WebProperty != nil && hit.WebProperty.Resource != nil { + s.emitIfValid(hit.WebProperty.Resource.Hostname, domainLower, seen, results) + } +} + +func (s *Source) emitIfValid(candidate, domainLower string, seen map[string]struct{}, results chan subscraping.Result) { + name, ok := sanitizeCandidate(candidate, domainLower) + if !ok { + return + } + if _, alreadySeen := seen[name]; alreadySeen { + return + } + seen[name] = struct{}{} + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: name} + s.results++ +} + +func sanitizeCandidate(value, domainLower string) (string, bool) { + name := strings.TrimSpace(strings.TrimSuffix(value, ".")) + if name == "" { + return "", false + } + name = strings.TrimPrefix(name, "*.") + nameLower := strings.ToLower(name) + if nameLower == domainLower || strings.HasSuffix(nameLower, "."+domainLower) { + return nameLower, true + } + return "", false } func (s *Source) Statistics() subscraping.Statistics { diff --git a/pkg/subscraping/sources/censys/censys_test.go b/pkg/subscraping/sources/censys/censys_test.go new file mode 100644 index 000000000..3d9920420 --- /dev/null +++ b/pkg/subscraping/sources/censys/censys_test.go @@ -0,0 +1,54 @@ +package censys + +import ( + "strings" + "testing" +) + +func TestAddApiKeysRequiresOrgId(t *testing.T) { + source := Source{} + source.AddApiKeys([]string{" token-one : org-1 ", "token-two:org-2", "missing", "no-org:", " :no-token"}) + + if len(source.apiKeys) != 2 { + t.Fatalf("expected 2 valid entries, got %d", len(source.apiKeys)) + } + + if source.apiKeys[0].token != "token-one" || source.apiKeys[0].orgID != "org-1" { + t.Fatalf("expected first entry to be token-one/org-1, got token=%q org=%q", source.apiKeys[0].token, source.apiKeys[0].orgID) + } + + if source.apiKeys[1].token != "token-two" || source.apiKeys[1].orgID != "org-2" { + t.Fatalf("expected second entry to be token-two/org-2, got token=%q org=%q", source.apiKeys[1].token, source.apiKeys[1].orgID) + } +} + +func TestSanitizeCandidate(t *testing.T) { + domain := "example.com" + testCases := []struct { + name string + value string + expected string + valid bool + }{ + {"exact match", "example.com", "example.com", true}, + {"subdomain", "api.example.com", "api.example.com", true}, + {"uppercase", "WWW.EXAMPLE.COM", "www.example.com", true}, + {"wildcard", "*.mail.example.com", "mail.example.com", true}, + {"trailing dot", "test.example.com.", "test.example.com", true}, + {"non-matching", "otherdomain.com", "", false}, + {"empty", "", "", false}, + } + + domainLower := strings.ToLower(domain) + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + got, ok := sanitizeCandidate(tc.value, domainLower) + if ok != tc.valid { + t.Fatalf("expected valid=%v, got %v", tc.valid, ok) + } + if got != tc.expected { + t.Fatalf("expected %q, got %q", tc.expected, got) + } + }) + } +}