From 2bd35e4d6ae31dbd5fd6827ca0484d782e41377b Mon Sep 17 00:00:00 2001 From: Aristos Miliaressis Date: Mon, 5 Jun 2023 11:48:08 +0300 Subject: [PATCH 1/4] form extraction --- README.md | 2 ++ cmd/katana/main.go | 1 + pkg/engine/hybrid/crawl.go | 3 +++ pkg/engine/standard/crawl.go | 3 +++ pkg/navigation/response.go | 8 ++++++++ pkg/types/options.go | 2 ++ pkg/utils/formfields.go | 37 +++++++++++++++++++++++++++++++++++ pkg/utils/formfields_test.go | 38 ++++++++++++++++++++++++++++++++++++ 8 files changed, 94 insertions(+) create mode 100644 pkg/utils/formfields.go create mode 100644 pkg/utils/formfields_test.go diff --git a/README.md b/README.md index abf3d937..1b661c8f 100644 --- a/README.md +++ b/README.md @@ -125,6 +125,7 @@ CONFIGURATION: -mrs, -max-response-size int maximum response size to read (default 9223372036854775807) -timeout int time to wait for request in seconds (default 10) -aff, -automatic-form-fill enable automatic form filling (experimental) + -fx, -form-extraction enable extraction of form, input, textarea & select elements -retry int number of times to retry the request (default 1) -proxy string http/socks5 proxy to use -H, -headers string[] custom header/cookie to include in all http request in header:value format (file) @@ -535,6 +536,7 @@ CONFIGURATION: -mrs, -max-response-size int maximum response size to read (default 9223372036854775807) -timeout int time to wait for request in seconds (default 10) -aff, -automatic-form-fill enable automatic form filling (experimental) + -fx, -form-extraction enable extraction of form, input, textarea & select elements -retry int number of times to retry the request (default 1) -proxy string http/socks5 proxy to use -H, -headers string[] custom header/cookie to include in request diff --git a/cmd/katana/main.go b/cmd/katana/main.go index 8830c5c2..17bb3d9a 100644 --- a/cmd/katana/main.go +++ b/cmd/katana/main.go @@ -75,6 +75,7 @@ pipelines offering both headless and non-headless crawling.`) flagSet.IntVarP(&options.BodyReadSize, "max-response-size", "mrs", math.MaxInt, "maximum response size to read"), flagSet.IntVar(&options.Timeout, "timeout", 10, "time to wait for request in seconds"), flagSet.BoolVarP(&options.AutomaticFormFill, "automatic-form-fill", "aff", false, "enable automatic form filling (experimental)"), + flagSet.BoolVarP(&options.FormExtraction, "form-extraction", "fx", false, "enable extraction of form, input, textarea & select elements"), flagSet.IntVar(&options.Retries, "retry", 1, "number of times to retry the request"), flagSet.StringVar(&options.Proxy, "proxy", "", "http/socks5 proxy to use"), flagSet.StringSliceVarP(&options.CustomHeaders, "headers", "H", nil, "custom header/cookie to include in all http request in header:value format (file)", goflags.FileStringSliceOptions), diff --git a/pkg/engine/hybrid/crawl.go b/pkg/engine/hybrid/crawl.go index 7cd3331a..a30dabb8 100644 --- a/pkg/engine/hybrid/crawl.go +++ b/pkg/engine/hybrid/crawl.go @@ -180,6 +180,9 @@ func (c *Crawler) navigateRequest(s *common.CrawlSession, request *navigation.Re } response.Body = body + if c.Options.Options.FormExtraction { + response.Forms = append(response.Forms, utils.ParseFormFields(response.Reader)...) + } response.Reader, err = goquery.NewDocumentFromReader(strings.NewReader(response.Body)) if err != nil { diff --git a/pkg/engine/standard/crawl.go b/pkg/engine/standard/crawl.go index e0ad546b..ff16cb82 100644 --- a/pkg/engine/standard/crawl.go +++ b/pkg/engine/standard/crawl.go @@ -83,6 +83,9 @@ func (c *Crawler) makeRequest(s *common.CrawlSession, request *navigation.Reques response.Reader, err = goquery.NewDocumentFromReader(bytes.NewReader(data)) response.StatusCode = resp.StatusCode response.Headers = utils.FlattenHeaders(resp.Header) + if c.Options.Options.FormExtraction { + response.Forms = append(response.Forms, utils.ParseFormFields(response.Reader)...) + } resp.ContentLength = int64(len(data)) diff --git a/pkg/navigation/response.go b/pkg/navigation/response.go index 526a24c5..1a2a45c9 100644 --- a/pkg/navigation/response.go +++ b/pkg/navigation/response.go @@ -10,6 +10,13 @@ import ( type Headers map[string]string +type Form struct { + Method string `json:"method,omitempty"` + Action string `json:"action,omitempty"` + Enctype string `json:"enctype,omitempty"` + Parameters []string `json:"parameters,omitempty"` +} + func (h *Headers) MarshalJSON() ([]byte, error) { hCopy := make(Headers) for k, v := range *h { @@ -30,6 +37,7 @@ type Response struct { RootHostname string `json:"-"` Technologies []string `json:"technologies,omitempty"` Raw string `json:"raw,omitempty"` + Forms []Form `json:"forms,omitempty"` } func (n Response) AbsoluteURL(path string) string { diff --git a/pkg/types/options.go b/pkg/types/options.go index e35b0c93..5a9a7f33 100644 --- a/pkg/types/options.go +++ b/pkg/types/options.go @@ -82,6 +82,8 @@ type Options struct { Headless bool // AutomaticFormFill enables optional automatic form filling and submission AutomaticFormFill bool + // FormExtraction enables extraction of form, input, textarea & select elements + FormExtraction bool // UseInstalledChrome skips chrome install and use local instance UseInstalledChrome bool // ShowBrowser specifies whether the show the browser in headless mode diff --git a/pkg/utils/formfields.go b/pkg/utils/formfields.go new file mode 100644 index 00000000..bd3510d0 --- /dev/null +++ b/pkg/utils/formfields.go @@ -0,0 +1,37 @@ +package utils + +import ( + "github.com/projectdiscovery/katana/pkg/navigation" + + "github.com/PuerkitoBio/goquery" +) + +// parses form, input, textarea & select elements +func ParseFormFields(document *goquery.Document) []navigation.Form { + var forms []navigation.Form + + document.Find("form").Each(func(i int, formElem *goquery.Selection) { + form := navigation.Form{} + + action, _ := formElem.Attr("action") + method, _ := formElem.Attr("method") + enctype, _ := formElem.Attr("enctype") + + form.Action = action + form.Method = method + form.Enctype = enctype + + formElem.Find("input, textarea, select").Each(func(i int, inputElem *goquery.Selection) { + name, ok := inputElem.Attr("name") + if !ok { + return + } + + form.Parameters = append(form.Parameters, name) + }) + + forms = append(forms, form) + }) + + return forms +} diff --git a/pkg/utils/formfields_test.go b/pkg/utils/formfields_test.go new file mode 100644 index 00000000..8b51478a --- /dev/null +++ b/pkg/utils/formfields_test.go @@ -0,0 +1,38 @@ +package utils + +import ( + "strings" + "testing" + + "github.com/PuerkitoBio/goquery" + "github.com/stretchr/testify/require" +) + +var htmlFormExample = ` + + HTML Form Test + + +
+
+ + + +
+ +` + +func TestParseFormFields(t *testing.T) { + document, err := goquery.NewDocumentFromReader(strings.NewReader(htmlFormExample)) + require.NoError(t, err, "could not read document") + + forms := ParseFormFields(document) + + require.Equal(t, "/test", forms[0].Action) + require.Equal(t, "POST", forms[0].Method) + require.Equal(t, "", forms[0].Enctype) + require.Contains(t, forms[0].Parameters, "firstname") + require.Contains(t, forms[0].Parameters, "textarea1") + require.Contains(t, forms[0].Parameters, "select1") + require.Equal(t, 1, len(forms), "found more or less params than where present") +} From 5dc9f7d98868a7ec380a2688c61721bf6b4d26a4 Mon Sep 17 00:00:00 2001 From: Aristos Miliaressis Date: Tue, 13 Jun 2023 12:55:14 +0300 Subject: [PATCH 2/4] uppercased extracted form method --- pkg/utils/formfields.go | 4 +++- pkg/utils/formfields_test.go | 7 ++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/pkg/utils/formfields.go b/pkg/utils/formfields.go index bd3510d0..7a3ad24b 100644 --- a/pkg/utils/formfields.go +++ b/pkg/utils/formfields.go @@ -1,6 +1,8 @@ package utils import ( + "strings" + "github.com/projectdiscovery/katana/pkg/navigation" "github.com/PuerkitoBio/goquery" @@ -18,7 +20,7 @@ func ParseFormFields(document *goquery.Document) []navigation.Form { enctype, _ := formElem.Attr("enctype") form.Action = action - form.Method = method + form.Method = strings.ToUpper(method) form.Enctype = enctype formElem.Find("input, textarea, select").Each(func(i int, inputElem *goquery.Selection) { diff --git a/pkg/utils/formfields_test.go b/pkg/utils/formfields_test.go index 8b51478a..3c7c8be2 100644 --- a/pkg/utils/formfields_test.go +++ b/pkg/utils/formfields_test.go @@ -19,6 +19,8 @@ var htmlFormExample = ` +
+
` @@ -30,9 +32,12 @@ func TestParseFormFields(t *testing.T) { require.Equal(t, "/test", forms[0].Action) require.Equal(t, "POST", forms[0].Method) + require.Equal(t, "POST", forms[1].Method) + require.Equal(t, "/test2", forms[2].Action) require.Equal(t, "", forms[0].Enctype) require.Contains(t, forms[0].Parameters, "firstname") require.Contains(t, forms[0].Parameters, "textarea1") require.Contains(t, forms[0].Parameters, "select1") - require.Equal(t, 1, len(forms), "found more or less params than where present") + require.Equal(t, 3, len(forms[0].Parameters), "found more or less parameters than where present") + require.Equal(t, 3, len(forms), "found more or less forms than where present") } From 4792f2e42f71fce1215e64d2ad411ed16c3c03de Mon Sep 17 00:00:00 2001 From: Tarun Koyalwar Date: Tue, 13 Jun 2023 19:10:44 +0530 Subject: [PATCH 3/4] fix nil response panic --- pkg/output/output.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pkg/output/output.go b/pkg/output/output.go index 2d6af463..fa342bda 100644 --- a/pkg/output/output.go +++ b/pkg/output/output.go @@ -143,9 +143,11 @@ func (w *StandardWriter) Write(result *Result) error { if w.omitRaw { result.Request.Raw = "" - result.Response.Raw = "" + if result.Response != nil { + result.Response.Raw = "" + } } - if w.omitBody { + if w.omitBody && result.HasResponse() { result.Response.Body = "" } From ef6fb760b3cfce98f77f6ca7c42da9e1c94dbd1a Mon Sep 17 00:00:00 2001 From: Tarun Koyalwar Date: Tue, 13 Jun 2023 19:17:06 +0530 Subject: [PATCH 4/4] skip empty form data --- pkg/utils/formfields.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pkg/utils/formfields.go b/pkg/utils/formfields.go index 7a3ad24b..eb06d37e 100644 --- a/pkg/utils/formfields.go +++ b/pkg/utils/formfields.go @@ -6,6 +6,7 @@ import ( "github.com/projectdiscovery/katana/pkg/navigation" "github.com/PuerkitoBio/goquery" + "github.com/projectdiscovery/utils/generic" ) // parses form, input, textarea & select elements @@ -32,7 +33,9 @@ func ParseFormFields(document *goquery.Document) []navigation.Form { form.Parameters = append(form.Parameters, name) }) - forms = append(forms, form) + if !generic.EqualsAll("", form.Action, form.Method, form.Enctype) || len(form.Parameters) > 0 { + forms = append(forms, form) + } }) return forms