diff --git a/v2/pkg/runner/options.go b/v2/pkg/runner/options.go index 63066f9da..582486503 100644 --- a/v2/pkg/runner/options.go +++ b/v2/pkg/runner/options.go @@ -243,7 +243,6 @@ func (options *Options) preProcessOptions() { var defaultRateLimits = []string{ "github=30/m", - // "gitlab=2000/m", "fullhunt=60/m", fmt.Sprintf("robtex=%d/ms", uint(math.MaxUint)), "securitytrails=1/s", @@ -254,4 +253,8 @@ var defaultRateLimits = []string{ "waybackarchive=15/m", "whoisxmlapi=50/s", "securitytrails=2/s", + "sitedossier=8/m", + "netlas=1/s", + // "gitlab=2/s", + "github=83/m", } diff --git a/v2/pkg/subscraping/sources/github/github.go b/v2/pkg/subscraping/sources/github/github.go index 85fb528a5..fc4e67df6 100644 --- a/v2/pkg/subscraping/sources/github/github.go +++ b/v2/pkg/subscraping/sources/github/github.go @@ -80,16 +80,6 @@ func (s *Source) enumerate(ctx context.Context, searchURL string, domainRegexp * } token := tokens.Get() - - if token.RetryAfter > 0 { - if len(tokens.pool) == 1 { - gologger.Verbose().Label(s.Name()).Msgf("GitHub Search request rate limit exceeded, waiting for %d seconds before retry... \n", token.RetryAfter) - time.Sleep(time.Duration(token.RetryAfter) * time.Second) - } else { - token = tokens.Get() - } - } - headers := map[string]string{ "Accept": "application/vnd.github.v3.text-match+json", "Authorization": "token " + token.Hash, } diff --git a/v2/pkg/subscraping/sources/gitlab/gitlab.go b/v2/pkg/subscraping/sources/gitlab/gitlab.go index 2dc76634a..82540adf9 100644 --- a/v2/pkg/subscraping/sources/gitlab/gitlab.go +++ b/v2/pkg/subscraping/sources/gitlab/gitlab.go @@ -9,7 +9,6 @@ import ( "regexp" "strings" "sync" - "time" jsoniter "github.com/json-iterator/go" "github.com/projectdiscovery/subfinder/v2/pkg/subscraping" @@ -118,9 +117,6 @@ func (s *Source) enumerate(ctx context.Context, searchURL string, domainRegexp * return } - // TODO: hardcoded for testing, should be a source internal rate limit #718 - time.Sleep(2 * time.Second) - s.enumerate(ctx, nextURL, domainRegexp, headers, session, results) } } diff --git a/v2/pkg/subscraping/sources/netlas/netlas.go b/v2/pkg/subscraping/sources/netlas/netlas.go index f2132e7c5..e48e9ecd3 100644 --- a/v2/pkg/subscraping/sources/netlas/netlas.go +++ b/v2/pkg/subscraping/sources/netlas/netlas.go @@ -103,8 +103,6 @@ func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Se //Define the API endpoint URL and query parameters for i := 0; i < domainsCount.Count; i += 20 { - - time.Sleep(1000 * time.Millisecond) offset := strconv.Itoa(i) endpoint := "https://app.netlas.io/api/domains/" diff --git a/v2/pkg/subscraping/sources/sitedossier/sitedossier.go b/v2/pkg/subscraping/sources/sitedossier/sitedossier.go index c44ad1c80..91c86298e 100644 --- a/v2/pkg/subscraping/sources/sitedossier/sitedossier.go +++ b/v2/pkg/subscraping/sources/sitedossier/sitedossier.go @@ -5,7 +5,6 @@ import ( "context" "fmt" "io" - "math/rand" "net/http" "regexp" "time" @@ -25,44 +24,6 @@ type agent struct { session *subscraping.Session } -func (a *agent) enumerate(ctx context.Context, baseURL string) { - select { - case <-ctx.Done(): - return - default: - } - - resp, err := a.session.SimpleGet(ctx, baseURL) - isnotfound := resp != nil && resp.StatusCode == http.StatusNotFound - if err != nil && !isnotfound { - a.results <- subscraping.Result{Source: "sitedossier", Type: subscraping.Error, Error: err} - a.errors++ - a.session.DiscardHTTPResponse(resp) - return - } - - body, err := io.ReadAll(resp.Body) - if err != nil { - a.results <- subscraping.Result{Source: "sitedossier", Type: subscraping.Error, Error: err} - a.errors++ - resp.Body.Close() - return - } - resp.Body.Close() - - src := string(body) - for _, match := range a.session.Extractor.Extract(src) { - a.results <- subscraping.Result{Source: "sitedossier", Type: subscraping.Subdomain, Value: match} - } - - match1 := reNext.FindStringSubmatch(src) - time.Sleep(time.Duration((3 + rand.Intn(SleepRandIntn))) * time.Second) - - if len(match1) > 0 { - a.enumerate(ctx, "http://www.sitedossier.com"+match1[1]) - } -} - // Source is the passive scraping agent type Source struct { timeTaken time.Duration @@ -95,6 +56,42 @@ func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Se return a.results } +func (a *agent) enumerate(ctx context.Context, baseURL string) { + select { + case <-ctx.Done(): + return + default: + } + + resp, err := a.session.SimpleGet(ctx, baseURL) + isnotfound := resp != nil && resp.StatusCode == http.StatusNotFound + if err != nil && !isnotfound { + a.results <- subscraping.Result{Source: "sitedossier", Type: subscraping.Error, Error: err} + a.errors++ + a.session.DiscardHTTPResponse(resp) + return + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + a.results <- subscraping.Result{Source: "sitedossier", Type: subscraping.Error, Error: err} + a.errors++ + resp.Body.Close() + return + } + resp.Body.Close() + + src := string(body) + for _, subdomain := range a.session.Extractor.Extract(src) { + a.results <- subscraping.Result{Source: "sitedossier", Type: subscraping.Subdomain, Value: subdomain} + } + + match := reNext.FindStringSubmatch(src) + if len(match) > 0 { + a.enumerate(ctx, fmt.Sprintf("http://www.sitedossier.com%s", match[1])) + } +} + // Name returns the name of the source func (s *Source) Name() string { return "sitedossier"