projectdiscovery · ehsandeep · Mar 15, 2024 · Feb 29, 2024 · Feb 29, 2024 · Feb 29, 2024
diff --git a/v2/pkg/runner/options.go b/v2/pkg/runner/options.go
@@ -243,7 +243,6 @@ func (options *Options) preProcessOptions() {
 
 var defaultRateLimits = []string{
 	"github=30/m",
-	// "gitlab=2000/m",
 	"fullhunt=60/m",
 	fmt.Sprintf("robtex=%d/ms", uint(math.MaxUint)),
 	"securitytrails=1/s",
@@ -254,4 +253,8 @@ var defaultRateLimits = []string{
 	"waybackarchive=15/m",
 	"whoisxmlapi=50/s",
 	"securitytrails=2/s",
+	"sitedossier=8/m",
+	"netlas=1/s",
+	// "gitlab=2/s",
+	"github=83/m",
 }
diff --git a/v2/pkg/subscraping/sources/github/github.go b/v2/pkg/subscraping/sources/github/github.go
@@ -80,16 +80,6 @@ func (s *Source) enumerate(ctx context.Context, searchURL string, domainRegexp *
 	}
 
 	token := tokens.Get()
-
-	if token.RetryAfter > 0 {
-		if len(tokens.pool) == 1 {
-			gologger.Verbose().Label(s.Name()).Msgf("GitHub Search request rate limit exceeded, waiting for %d seconds before retry... \n", token.RetryAfter)
-			time.Sleep(time.Duration(token.RetryAfter) * time.Second)
-		} else {
-			token = tokens.Get()
-		}
-	}
-
 	headers := map[string]string{
 		"Accept": "application/vnd.github.v3.text-match+json", "Authorization": "token " + token.Hash,
 	}

diff --git a/v2/pkg/subscraping/sources/gitlab/gitlab.go b/v2/pkg/subscraping/sources/gitlab/gitlab.go
@@ -9,7 +9,6 @@ import (
 	"regexp"
 	"strings"
 	"sync"
-	"time"
 
 	jsoniter "github.com/json-iterator/go"
 	"github.com/projectdiscovery/subfinder/v2/pkg/subscraping"
@@ -118,9 +117,6 @@ func (s *Source) enumerate(ctx context.Context, searchURL string, domainRegexp *
 				return
 			}
 
-			// TODO: hardcoded for testing, should be a source internal rate limit #718
-			time.Sleep(2 * time.Second)
-
 			s.enumerate(ctx, nextURL, domainRegexp, headers, session, results)
 		}
 	}

diff --git a/v2/pkg/subscraping/sources/netlas/netlas.go b/v2/pkg/subscraping/sources/netlas/netlas.go
@@ -103,8 +103,6 @@ func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Se
 		//Define the API endpoint URL and query parameters
 
 		for i := 0; i < domainsCount.Count; i += 20 {
-
-			time.Sleep(1000 * time.Millisecond)
 			offset := strconv.Itoa(i)
 
 			endpoint := "https://app.netlas.io/api/domains/"

diff --git a/v2/pkg/subscraping/sources/sitedossier/sitedossier.go b/v2/pkg/subscraping/sources/sitedossier/sitedossier.go
@@ -5,7 +5,6 @@ import (
 	"context"
 	"fmt"
 	"io"
-	"math/rand"
 	"net/http"
 	"regexp"
 	"time"
@@ -25,44 +24,6 @@ type agent struct {
 	session *subscraping.Session
 }
 
-func (a *agent) enumerate(ctx context.Context, baseURL string) {
-	select {
-	case <-ctx.Done():
-		return
-	default:
-	}
-
-	resp, err := a.session.SimpleGet(ctx, baseURL)
-	isnotfound := resp != nil && resp.StatusCode == http.StatusNotFound
-	if err != nil && !isnotfound {
-		a.results <- subscraping.Result{Source: "sitedossier", Type: subscraping.Error, Error: err}
-		a.errors++
-		a.session.DiscardHTTPResponse(resp)
-		return
-	}
-
-	body, err := io.ReadAll(resp.Body)
-	if err != nil {
-		a.results <- subscraping.Result{Source: "sitedossier", Type: subscraping.Error, Error: err}
-		a.errors++
-		resp.Body.Close()
-		return
-	}
-	resp.Body.Close()
-
-	src := string(body)
-	for _, match := range a.session.Extractor.Extract(src) {
-		a.results <- subscraping.Result{Source: "sitedossier", Type: subscraping.Subdomain, Value: match}
-	}
-
-	match1 := reNext.FindStringSubmatch(src)
-	time.Sleep(time.Duration((3 + rand.Intn(SleepRandIntn))) * time.Second)
-
-	if len(match1) > 0 {
-		a.enumerate(ctx, "http://www.sitedossier.com"+match1[1])
-	}
-}
-
 // Source is the passive scraping agent
 type Source struct {
 	timeTaken time.Duration
@@ -95,6 +56,42 @@ func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Se
 	return a.results
 }
 
+func (a *agent) enumerate(ctx context.Context, baseURL string) {
+	select {
+	case <-ctx.Done():
+		return
+	default:
+	}
+
+	resp, err := a.session.SimpleGet(ctx, baseURL)
+	isnotfound := resp != nil && resp.StatusCode == http.StatusNotFound
+	if err != nil && !isnotfound {
+		a.results <- subscraping.Result{Source: "sitedossier", Type: subscraping.Error, Error: err}
+		a.errors++
+		a.session.DiscardHTTPResponse(resp)
+		return
+	}
+
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		a.results <- subscraping.Result{Source: "sitedossier", Type: subscraping.Error, Error: err}
+		a.errors++
+		resp.Body.Close()
+		return
+	}
+	resp.Body.Close()
+
+	src := string(body)
+	for _, subdomain := range a.session.Extractor.Extract(src) {
+		a.results <- subscraping.Result{Source: "sitedossier", Type: subscraping.Subdomain, Value: subdomain}
+	}
+
+	match := reNext.FindStringSubmatch(src)
+	if len(match) > 0 {
+		a.enumerate(ctx, fmt.Sprintf("http://www.sitedossier.com%s", match[1]))
+	}
+}
+
 // Name returns the name of the source
 func (s *Source) Name() string {
 	return "sitedossier"