这是indexloc提供的服务,不要输入任何密码
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion v2/pkg/runner/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,6 @@ func (options *Options) preProcessOptions() {

var defaultRateLimits = []string{
"github=30/m",
// "gitlab=2000/m",
"fullhunt=60/m",
fmt.Sprintf("robtex=%d/ms", uint(math.MaxUint)),
"securitytrails=1/s",
Expand All @@ -254,4 +253,8 @@ var defaultRateLimits = []string{
"waybackarchive=15/m",
"whoisxmlapi=50/s",
"securitytrails=2/s",
"sitedossier=8/m",
"netlas=1/s",
// "gitlab=2/s",
"github=83/m",
}
10 changes: 0 additions & 10 deletions v2/pkg/subscraping/sources/github/github.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,16 +80,6 @@ func (s *Source) enumerate(ctx context.Context, searchURL string, domainRegexp *
}

token := tokens.Get()

if token.RetryAfter > 0 {
if len(tokens.pool) == 1 {
gologger.Verbose().Label(s.Name()).Msgf("GitHub Search request rate limit exceeded, waiting for %d seconds before retry... \n", token.RetryAfter)
time.Sleep(time.Duration(token.RetryAfter) * time.Second)
} else {
token = tokens.Get()
}
}

headers := map[string]string{
"Accept": "application/vnd.github.v3.text-match+json", "Authorization": "token " + token.Hash,
}
Expand Down
4 changes: 0 additions & 4 deletions v2/pkg/subscraping/sources/gitlab/gitlab.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import (
"regexp"
"strings"
"sync"
"time"

jsoniter "github.com/json-iterator/go"
"github.com/projectdiscovery/subfinder/v2/pkg/subscraping"
Expand Down Expand Up @@ -118,9 +117,6 @@ func (s *Source) enumerate(ctx context.Context, searchURL string, domainRegexp *
return
}

// TODO: hardcoded for testing, should be a source internal rate limit #718
time.Sleep(2 * time.Second)

s.enumerate(ctx, nextURL, domainRegexp, headers, session, results)
}
}
Expand Down
2 changes: 0 additions & 2 deletions v2/pkg/subscraping/sources/netlas/netlas.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,6 @@ func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Se
//Define the API endpoint URL and query parameters

for i := 0; i < domainsCount.Count; i += 20 {

time.Sleep(1000 * time.Millisecond)
offset := strconv.Itoa(i)

endpoint := "https://app.netlas.io/api/domains/"
Expand Down
75 changes: 36 additions & 39 deletions v2/pkg/subscraping/sources/sitedossier/sitedossier.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import (
"context"
"fmt"
"io"
"math/rand"
"net/http"
"regexp"
"time"
Expand All @@ -25,44 +24,6 @@ type agent struct {
session *subscraping.Session
}

func (a *agent) enumerate(ctx context.Context, baseURL string) {
select {
case <-ctx.Done():
return
default:
}

resp, err := a.session.SimpleGet(ctx, baseURL)
isnotfound := resp != nil && resp.StatusCode == http.StatusNotFound
if err != nil && !isnotfound {
a.results <- subscraping.Result{Source: "sitedossier", Type: subscraping.Error, Error: err}
a.errors++
a.session.DiscardHTTPResponse(resp)
return
}

body, err := io.ReadAll(resp.Body)
if err != nil {
a.results <- subscraping.Result{Source: "sitedossier", Type: subscraping.Error, Error: err}
a.errors++
resp.Body.Close()
return
}
resp.Body.Close()

src := string(body)
for _, match := range a.session.Extractor.Extract(src) {
a.results <- subscraping.Result{Source: "sitedossier", Type: subscraping.Subdomain, Value: match}
}

match1 := reNext.FindStringSubmatch(src)
time.Sleep(time.Duration((3 + rand.Intn(SleepRandIntn))) * time.Second)

if len(match1) > 0 {
a.enumerate(ctx, "http://www.sitedossier.com"+match1[1])
}
}

// Source is the passive scraping agent
type Source struct {
timeTaken time.Duration
Expand Down Expand Up @@ -95,6 +56,42 @@ func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Se
return a.results
}

func (a *agent) enumerate(ctx context.Context, baseURL string) {
select {
case <-ctx.Done():
return
default:
}

resp, err := a.session.SimpleGet(ctx, baseURL)
isnotfound := resp != nil && resp.StatusCode == http.StatusNotFound
if err != nil && !isnotfound {
a.results <- subscraping.Result{Source: "sitedossier", Type: subscraping.Error, Error: err}
a.errors++
a.session.DiscardHTTPResponse(resp)
return
}

body, err := io.ReadAll(resp.Body)
if err != nil {
a.results <- subscraping.Result{Source: "sitedossier", Type: subscraping.Error, Error: err}
a.errors++
resp.Body.Close()
return
}
resp.Body.Close()

src := string(body)
for _, subdomain := range a.session.Extractor.Extract(src) {
a.results <- subscraping.Result{Source: "sitedossier", Type: subscraping.Subdomain, Value: subdomain}
}

match := reNext.FindStringSubmatch(src)
if len(match) > 0 {
a.enumerate(ctx, fmt.Sprintf("http://www.sitedossier.com%s", match[1]))
}
}

// Name returns the name of the source
func (s *Source) Name() string {
return "sitedossier"
Expand Down