From 4b1217faa77e15b76517bfa41efc3fdf6d4ce3ad Mon Sep 17 00:00:00 2001 From: Mzack9999 Date: Wed, 1 Feb 2023 14:38:14 +0100 Subject: [PATCH] adding support for source/origin wappalyzer --- go.mod | 1 + go.sum | 2 ++ internal/runner/options.go | 3 +-- pkg/engine/hybrid/crawl.go | 4 +++- pkg/engine/hybrid/hybrid.go | 27 +++++++++++++++++++-------- pkg/engine/standard/crawl.go | 4 ++++ pkg/engine/standard/standard.go | 28 ++++++++++++++++++++-------- pkg/navigation/request.go | 21 +++++++++++---------- pkg/navigation/response.go | 1 + pkg/output/result.go | 2 ++ pkg/types/crawler_options.go | 24 ++++++++++++++++-------- 11 files changed, 80 insertions(+), 37 deletions(-) diff --git a/go.mod b/go.mod index 1c5e19e6..6268c02b 100644 --- a/go.mod +++ b/go.mod @@ -15,6 +15,7 @@ require ( github.com/projectdiscovery/ratelimit v0.0.5 github.com/projectdiscovery/retryablehttp-go v1.0.8 github.com/projectdiscovery/utils v0.0.5 + github.com/projectdiscovery/wappalyzergo v0.0.80 github.com/remeh/sizedwaitgroup v1.0.0 github.com/rs/xid v1.4.0 github.com/shirou/gopsutil/v3 v3.22.12 diff --git a/go.sum b/go.sum index 0d79c961..64367ae3 100644 --- a/go.sum +++ b/go.sum @@ -139,6 +139,8 @@ github.com/projectdiscovery/stringsutil v0.0.2 h1:uzmw3IVLJSMW1kEg8eCStG/cGbYYZA github.com/projectdiscovery/stringsutil v0.0.2/go.mod h1:EJ3w6bC5fBYjVou6ryzodQq37D5c6qbAYQpGmAy+DC0= github.com/projectdiscovery/utils v0.0.5 h1:3S4ZaxzStYWGBWGgskll+76+3xAapWmo6Wa9vvXZx1o= github.com/projectdiscovery/utils v0.0.5/go.mod h1:PCwA5YuCYWPgHaGiZmr53/SA9iGQmAnw7DSHuhr8VPQ= +github.com/projectdiscovery/wappalyzergo v0.0.80 h1:QWE1Nrxwc3bo5dYMgO1NMADZRzPWY0l25bcAPFjKBB4= +github.com/projectdiscovery/wappalyzergo v0.0.80/go.mod h1:HvYuW0Be4JCjVds/+XAEaMSqRG9yrI97UmZq0TPk6A0= github.com/remeh/sizedwaitgroup v1.0.0 h1:VNGGFwNo/R5+MJBf6yrsr110p0m4/OX4S3DCy7Kyl5E= github.com/remeh/sizedwaitgroup v1.0.0/go.mod h1:3j2R4OIe/SeS6YDhICBy22RWjJC5eNCJ1V+9+NVNYlo= github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8= diff --git a/internal/runner/options.go b/internal/runner/options.go index e72e1a3d..8c8f05a9 100644 --- a/internal/runner/options.go +++ b/internal/runner/options.go @@ -13,7 +13,6 @@ import ( "github.com/projectdiscovery/katana/pkg/utils" errorutil "github.com/projectdiscovery/utils/errors" fileutil "github.com/projectdiscovery/utils/file" - logutil "github.com/projectdiscovery/utils/log" "gopkg.in/yaml.v3" ) @@ -101,7 +100,7 @@ func configureOutput(options *types.Options) { gologger.DefaultLogger.SetMaxLevel(levels.LevelVerbose) } - logutil.DisableDefaultLogger() + // logutil.DisableDefaultLogger() } func initExampleFormFillConfig() error { diff --git a/pkg/engine/hybrid/crawl.go b/pkg/engine/hybrid/crawl.go index bdcb6e10..3c31512b 100644 --- a/pkg/engine/hybrid/crawl.go +++ b/pkg/engine/hybrid/crawl.go @@ -18,6 +18,7 @@ import ( "github.com/projectdiscovery/katana/pkg/utils/queue" "github.com/projectdiscovery/retryablehttp-go" errorutil "github.com/projectdiscovery/utils/errors" + mapsutil "github.com/projectdiscovery/utils/maps" ) func (c *Crawler) navigateRequest(ctx context.Context, httpclient *retryablehttp.Client, queue *queue.VarietyQueue, parseResponseCallback func(nr navigation.Request), browser *rod.Browser, request navigation.Request, rootHostname string) (*navigation.Response, error) { @@ -66,6 +67,7 @@ func (c *Crawler) navigateRequest(ctx context.Context, httpclient *retryablehttp } bodyReader, _ := goquery.NewDocumentFromReader(bytes.NewReader(body)) + technologies := c.options.Wappalyzer.Fingerprint(headers, body) resp := navigation.Response{ Resp: httpresp, Body: []byte(body), @@ -73,8 +75,8 @@ func (c *Crawler) navigateRequest(ctx context.Context, httpclient *retryablehttp Options: c.options, Depth: depth, RootHostname: rootHostname, + Technologies: mapsutil.GetKeys(technologies), } - _ = resp // process the raw response parser.ParseResponse(resp, parseResponseCallback) diff --git a/pkg/engine/hybrid/hybrid.go b/pkg/engine/hybrid/hybrid.go index fb371793..d608f162 100644 --- a/pkg/engine/hybrid/hybrid.go +++ b/pkg/engine/hybrid/hybrid.go @@ -25,6 +25,7 @@ import ( "github.com/projectdiscovery/katana/pkg/utils" "github.com/projectdiscovery/katana/pkg/utils/queue" errorutil "github.com/projectdiscovery/utils/errors" + mapsutil "github.com/projectdiscovery/utils/maps" stringsutil "github.com/projectdiscovery/utils/strings" "github.com/remeh/sizedwaitgroup" ps "github.com/shirou/gopsutil/v3/process" @@ -171,7 +172,16 @@ func (c *Crawler) Crawl(rootURL string) error { httpclient, _, err := common.BuildClient(c.options.Dialer, c.options.Options, func(resp *http.Response, depth int) { body, _ := io.ReadAll(resp.Body) reader, _ := goquery.NewDocumentFromReader(bytes.NewReader(body)) - parser.ParseResponse(navigation.Response{Depth: depth + 1, Options: c.options, RootHostname: hostname, Resp: resp, Body: body, Reader: reader}, parseResponseCallback) + navigationResponse := navigation.Response{ + Depth: depth + 1, + Options: c.options, + RootHostname: hostname, + Resp: resp, + Body: body, + Reader: reader, + Technologies: mapsutil.GetKeys(c.options.Wappalyzer.Fingerprint(resp.Header, body)), + } + parser.ParseResponse(navigationResponse, parseResponseCallback) }) if err != nil { return errorutil.NewWithTag("hybrid", "could not create http client").Wrap(err) @@ -270,13 +280,14 @@ func (c *Crawler) makeParseResponseCallback(queue *queue.VarietyQueue) func(nr n // Write the found result to output result := &output.Result{ - Timestamp: time.Now(), - Body: nr.Body, - URL: nr.URL, - Source: nr.Source, - Tag: nr.Tag, - Attribute: nr.Attribute, - CustomFields: nr.CustomFields, + Timestamp: time.Now(), + Body: nr.Body, + URL: nr.URL, + Source: nr.Source, + Tag: nr.Tag, + Attribute: nr.Attribute, + CustomFields: nr.CustomFields, + SourceTechnologies: nr.SourceTechnologies, } if nr.Method != http.MethodGet { result.Method = nr.Method diff --git a/pkg/engine/standard/crawl.go b/pkg/engine/standard/crawl.go index 4182cef9..18c1c709 100644 --- a/pkg/engine/standard/crawl.go +++ b/pkg/engine/standard/crawl.go @@ -12,6 +12,7 @@ import ( "github.com/projectdiscovery/katana/pkg/utils" "github.com/projectdiscovery/retryablehttp-go" errorutil "github.com/projectdiscovery/utils/errors" + mapsutil "github.com/projectdiscovery/utils/maps" ) // makeRequest makes a request to a URL returning a response interface. @@ -67,6 +68,9 @@ func (c *Crawler) makeRequest(ctx context.Context, request navigation.Request, r return navigation.Response{}, nil } + technologies := c.options.Wappalyzer.Fingerprint(resp.Header, data) + response.Technologies = mapsutil.GetKeys(technologies) + resp.Body = io.NopCloser(strings.NewReader(string(data))) _ = c.options.OutputWriter.Write(nil, resp) diff --git a/pkg/engine/standard/standard.go b/pkg/engine/standard/standard.go index a5c914d8..9f204b80 100644 --- a/pkg/engine/standard/standard.go +++ b/pkg/engine/standard/standard.go @@ -20,6 +20,7 @@ import ( "github.com/projectdiscovery/katana/pkg/utils" "github.com/projectdiscovery/katana/pkg/utils/queue" errorutil "github.com/projectdiscovery/utils/errors" + mapsutil "github.com/projectdiscovery/utils/maps" "github.com/remeh/sizedwaitgroup" ) @@ -79,7 +80,17 @@ func (c *Crawler) Crawl(rootURL string) error { httpclient, _, err := common.BuildClient(c.options.Dialer, c.options.Options, func(resp *http.Response, depth int) { body, _ := io.ReadAll(resp.Body) reader, _ := goquery.NewDocumentFromReader(bytes.NewReader(body)) - parser.ParseResponse(navigation.Response{Depth: depth + 1, Options: c.options, RootHostname: hostname, Resp: resp, Body: body, Reader: reader}, parseResponseCallback) + technologies := c.options.Wappalyzer.Fingerprint(resp.Header, body) + navigationResponse := navigation.Response{ + Depth: depth + 1, + Options: c.options, + RootHostname: hostname, + Resp: resp, + Body: body, + Reader: reader, + Technologies: mapsutil.GetKeys(technologies), + } + parser.ParseResponse(navigationResponse, parseResponseCallback) }) if err != nil { return errorutil.NewWithTag("standard", "could not create http client").Wrap(err) @@ -160,13 +171,14 @@ func (c *Crawler) makeParseResponseCallback(queue *queue.VarietyQueue) func(nr n // Write the found result to output result := &output.Result{ - Timestamp: time.Now(), - Body: nr.Body, - URL: nr.URL, - Source: nr.Source, - Tag: nr.Tag, - Attribute: nr.Attribute, - CustomFields: nr.CustomFields, + Timestamp: time.Now(), + Body: nr.Body, + URL: nr.URL, + Source: nr.Source, + Tag: nr.Tag, + Attribute: nr.Attribute, + CustomFields: nr.CustomFields, + SourceTechnologies: nr.SourceTechnologies, } if nr.Method != http.MethodGet { result.Method = nr.Method diff --git a/pkg/navigation/request.go b/pkg/navigation/request.go index 6a22cab1..1d53b0de 100644 --- a/pkg/navigation/request.go +++ b/pkg/navigation/request.go @@ -9,15 +9,16 @@ type Depth struct{} // Request is a navigation request for the crawler type Request struct { - Method string - URL string - Body string - Depth int - Headers map[string]string - Tag string - Attribute string - RootHostname string - Source string // source is the source of the request + Method string + URL string + Body string + Depth int + Headers map[string]string + Tag string + Attribute string + RootHostname string + Source string // source is the source of the request + SourceTechnologies []string // technologies of the source that originated the current request CustomFields map[string][]string // customField matched output } @@ -41,5 +42,5 @@ func (n *Request) RequestURL() string { // newNavigationRequestURL generates a navigation request from a relative URL func NewNavigationRequestURLFromResponse(path, source, tag, attribute string, resp Response) Request { requestURL := resp.AbsoluteURL(path) - return Request{Method: "GET", URL: requestURL, RootHostname: resp.RootHostname, Depth: resp.Depth, Source: source, Attribute: attribute, Tag: tag} + return Request{Method: "GET", URL: requestURL, RootHostname: resp.RootHostname, Depth: resp.Depth, Source: source, Attribute: attribute, Tag: tag, SourceTechnologies: resp.Technologies} } diff --git a/pkg/navigation/response.go b/pkg/navigation/response.go index 398995e1..c0c5c679 100644 --- a/pkg/navigation/response.go +++ b/pkg/navigation/response.go @@ -16,6 +16,7 @@ type Response struct { Reader *goquery.Document Body []byte RootHostname string + Technologies []string Options *types.CrawlerOptions } diff --git a/pkg/output/result.go b/pkg/output/result.go index 3cf454f2..6f7ac82e 100644 --- a/pkg/output/result.go +++ b/pkg/output/result.go @@ -20,4 +20,6 @@ type Result struct { Attribute string `json:"attribute,omitempty"` // customField matched output CustomFields map[string][]string `json:"-"` + // Technologies of the response + SourceTechnologies []string `json:"source-technologies,omitempty"` } diff --git a/pkg/types/crawler_options.go b/pkg/types/crawler_options.go index 4b647062..bbf11750 100644 --- a/pkg/types/crawler_options.go +++ b/pkg/types/crawler_options.go @@ -11,6 +11,7 @@ import ( "github.com/projectdiscovery/katana/pkg/utils/scope" "github.com/projectdiscovery/ratelimit" errorutil "github.com/projectdiscovery/utils/errors" + wappalyzer "github.com/projectdiscovery/wappalyzergo" ) // CrawlerOptions contains helper utilities for the crawler @@ -29,6 +30,8 @@ type CrawlerOptions struct { ScopeManager *scope.Manager // Dialer is instance of the dialer for global crawler Dialer *fastdialer.Dialer + // Wappalyzer instance for technologies detection + Wappalyzer *wappalyzer.Wappalyze } // NewCrawlerOptions creates a new crawler options structure @@ -66,22 +69,27 @@ func NewCrawlerOptions(options *Options) (*CrawlerOptions, error) { return nil, errorutil.NewWithErr(err).Msgf("could not create output writer") } - var ratelimiter ratelimit.Limiter - if options.RateLimit > 0 { - ratelimiter = *ratelimit.New(context.Background(), uint(options.RateLimit), time.Second) - } else if options.RateLimitMinute > 0 { - ratelimiter = *ratelimit.New(context.Background(), uint(options.RateLimitMinute), time.Minute) - } - crawlerOptions := &CrawlerOptions{ ExtensionsValidator: extensionsValidator, ScopeManager: scopeManager, UniqueFilter: itemFilter, - RateLimit: ratelimiter, Options: options, Dialer: fastdialerInstance, OutputWriter: outputWriter, } + + if options.RateLimit > 0 { + crawlerOptions.RateLimit = *ratelimit.New(context.Background(), uint(options.RateLimit), time.Second) + } else if options.RateLimitMinute > 0 { + crawlerOptions.RateLimit = *ratelimit.New(context.Background(), uint(options.RateLimitMinute), time.Minute) + } + + wappalyze, err := wappalyzer.New() + if err != nil { + return nil, err + } + crawlerOptions.Wappalyzer = wappalyze + return crawlerOptions, nil }