From e9f64a2acbe14b9f6850912735da868d395c8a3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Do=C4=9Fan=20Can=20Bak=C4=B1r?= Date: Wed, 30 Oct 2024 14:36:26 +0300 Subject: [PATCH] fix sitedossier negative stats --- .../sources/sitedossier/sitedossier.go | 40 +++++++------------ 1 file changed, 14 insertions(+), 26 deletions(-) diff --git a/v2/pkg/subscraping/sources/sitedossier/sitedossier.go b/v2/pkg/subscraping/sources/sitedossier/sitedossier.go index 91c86298e..dacc3c66b 100644 --- a/v2/pkg/subscraping/sources/sitedossier/sitedossier.go +++ b/v2/pkg/subscraping/sources/sitedossier/sitedossier.go @@ -18,12 +18,6 @@ const SleepRandIntn = 5 var reNext = regexp.MustCompile(``) -type agent struct { - results chan subscraping.Result - errors int - session *subscraping.Session -} - // Source is the passive scraping agent type Source struct { timeTaken time.Duration @@ -37,58 +31,52 @@ func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Se s.errors = 0 s.results = 0 - a := agent{ - session: session, - results: results, - } - go func() { defer func(startTime time.Time) { s.timeTaken = time.Since(startTime) - close(a.results) + close(results) }(time.Now()) - a.enumerate(ctx, fmt.Sprintf("http://www.sitedossier.com/parentdomain/%s", domain)) - s.errors = a.errors - s.results = len(a.results) + s.enumerate(ctx, session, fmt.Sprintf("http://www.sitedossier.com/parentdomain/%s", domain), results) }() - return a.results + return results } -func (a *agent) enumerate(ctx context.Context, baseURL string) { +func (s *Source) enumerate(ctx context.Context, session *subscraping.Session, baseURL string, results chan subscraping.Result) { select { case <-ctx.Done(): return default: } - resp, err := a.session.SimpleGet(ctx, baseURL) + resp, err := session.SimpleGet(ctx, baseURL) isnotfound := resp != nil && resp.StatusCode == http.StatusNotFound if err != nil && !isnotfound { - a.results <- subscraping.Result{Source: "sitedossier", Type: subscraping.Error, Error: err} - a.errors++ - a.session.DiscardHTTPResponse(resp) + results <- subscraping.Result{Source: "sitedossier", Type: subscraping.Error, Error: err} + s.errors++ + session.DiscardHTTPResponse(resp) return } body, err := io.ReadAll(resp.Body) if err != nil { - a.results <- subscraping.Result{Source: "sitedossier", Type: subscraping.Error, Error: err} - a.errors++ + results <- subscraping.Result{Source: "sitedossier", Type: subscraping.Error, Error: err} + s.errors++ resp.Body.Close() return } resp.Body.Close() src := string(body) - for _, subdomain := range a.session.Extractor.Extract(src) { - a.results <- subscraping.Result{Source: "sitedossier", Type: subscraping.Subdomain, Value: subdomain} + for _, subdomain := range session.Extractor.Extract(src) { + results <- subscraping.Result{Source: "sitedossier", Type: subscraping.Subdomain, Value: subdomain} + s.results++ } match := reNext.FindStringSubmatch(src) if len(match) > 0 { - a.enumerate(ctx, fmt.Sprintf("http://www.sitedossier.com%s", match[1])) + s.enumerate(ctx, session, fmt.Sprintf("http://www.sitedossier.com%s", match[1]), results) } }