diff --git a/README.md b/README.md index abf3d937..1b661c8f 100644 --- a/README.md +++ b/README.md @@ -125,6 +125,7 @@ CONFIGURATION: -mrs, -max-response-size int maximum response size to read (default 9223372036854775807) -timeout int time to wait for request in seconds (default 10) -aff, -automatic-form-fill enable automatic form filling (experimental) + -fx, -form-extraction enable extraction of form, input, textarea & select elements -retry int number of times to retry the request (default 1) -proxy string http/socks5 proxy to use -H, -headers string[] custom header/cookie to include in all http request in header:value format (file) @@ -535,6 +536,7 @@ CONFIGURATION: -mrs, -max-response-size int maximum response size to read (default 9223372036854775807) -timeout int time to wait for request in seconds (default 10) -aff, -automatic-form-fill enable automatic form filling (experimental) + -fx, -form-extraction enable extraction of form, input, textarea & select elements -retry int number of times to retry the request (default 1) -proxy string http/socks5 proxy to use -H, -headers string[] custom header/cookie to include in request diff --git a/cmd/katana/main.go b/cmd/katana/main.go index 8830c5c2..17bb3d9a 100644 --- a/cmd/katana/main.go +++ b/cmd/katana/main.go @@ -75,6 +75,7 @@ pipelines offering both headless and non-headless crawling.`) flagSet.IntVarP(&options.BodyReadSize, "max-response-size", "mrs", math.MaxInt, "maximum response size to read"), flagSet.IntVar(&options.Timeout, "timeout", 10, "time to wait for request in seconds"), flagSet.BoolVarP(&options.AutomaticFormFill, "automatic-form-fill", "aff", false, "enable automatic form filling (experimental)"), + flagSet.BoolVarP(&options.FormExtraction, "form-extraction", "fx", false, "enable extraction of form, input, textarea & select elements"), flagSet.IntVar(&options.Retries, "retry", 1, "number of times to retry the request"), flagSet.StringVar(&options.Proxy, "proxy", "", "http/socks5 proxy to use"), flagSet.StringSliceVarP(&options.CustomHeaders, "headers", "H", nil, "custom header/cookie to include in all http request in header:value format (file)", goflags.FileStringSliceOptions), diff --git a/pkg/engine/hybrid/crawl.go b/pkg/engine/hybrid/crawl.go index 7cd3331a..a30dabb8 100644 --- a/pkg/engine/hybrid/crawl.go +++ b/pkg/engine/hybrid/crawl.go @@ -180,6 +180,9 @@ func (c *Crawler) navigateRequest(s *common.CrawlSession, request *navigation.Re } response.Body = body + if c.Options.Options.FormExtraction { + response.Forms = append(response.Forms, utils.ParseFormFields(response.Reader)...) + } response.Reader, err = goquery.NewDocumentFromReader(strings.NewReader(response.Body)) if err != nil { diff --git a/pkg/engine/standard/crawl.go b/pkg/engine/standard/crawl.go index e0ad546b..ff16cb82 100644 --- a/pkg/engine/standard/crawl.go +++ b/pkg/engine/standard/crawl.go @@ -83,6 +83,9 @@ func (c *Crawler) makeRequest(s *common.CrawlSession, request *navigation.Reques response.Reader, err = goquery.NewDocumentFromReader(bytes.NewReader(data)) response.StatusCode = resp.StatusCode response.Headers = utils.FlattenHeaders(resp.Header) + if c.Options.Options.FormExtraction { + response.Forms = append(response.Forms, utils.ParseFormFields(response.Reader)...) + } resp.ContentLength = int64(len(data)) diff --git a/pkg/navigation/response.go b/pkg/navigation/response.go index 526a24c5..1a2a45c9 100644 --- a/pkg/navigation/response.go +++ b/pkg/navigation/response.go @@ -10,6 +10,13 @@ import ( type Headers map[string]string +type Form struct { + Method string `json:"method,omitempty"` + Action string `json:"action,omitempty"` + Enctype string `json:"enctype,omitempty"` + Parameters []string `json:"parameters,omitempty"` +} + func (h *Headers) MarshalJSON() ([]byte, error) { hCopy := make(Headers) for k, v := range *h { @@ -30,6 +37,7 @@ type Response struct { RootHostname string `json:"-"` Technologies []string `json:"technologies,omitempty"` Raw string `json:"raw,omitempty"` + Forms []Form `json:"forms,omitempty"` } func (n Response) AbsoluteURL(path string) string { diff --git a/pkg/output/output.go b/pkg/output/output.go index 2d6af463..fa342bda 100644 --- a/pkg/output/output.go +++ b/pkg/output/output.go @@ -143,9 +143,11 @@ func (w *StandardWriter) Write(result *Result) error { if w.omitRaw { result.Request.Raw = "" - result.Response.Raw = "" + if result.Response != nil { + result.Response.Raw = "" + } } - if w.omitBody { + if w.omitBody && result.HasResponse() { result.Response.Body = "" } diff --git a/pkg/types/options.go b/pkg/types/options.go index e35b0c93..5a9a7f33 100644 --- a/pkg/types/options.go +++ b/pkg/types/options.go @@ -82,6 +82,8 @@ type Options struct { Headless bool // AutomaticFormFill enables optional automatic form filling and submission AutomaticFormFill bool + // FormExtraction enables extraction of form, input, textarea & select elements + FormExtraction bool // UseInstalledChrome skips chrome install and use local instance UseInstalledChrome bool // ShowBrowser specifies whether the show the browser in headless mode diff --git a/pkg/utils/formfields.go b/pkg/utils/formfields.go new file mode 100644 index 00000000..eb06d37e --- /dev/null +++ b/pkg/utils/formfields.go @@ -0,0 +1,42 @@ +package utils + +import ( + "strings" + + "github.com/projectdiscovery/katana/pkg/navigation" + + "github.com/PuerkitoBio/goquery" + "github.com/projectdiscovery/utils/generic" +) + +// parses form, input, textarea & select elements +func ParseFormFields(document *goquery.Document) []navigation.Form { + var forms []navigation.Form + + document.Find("form").Each(func(i int, formElem *goquery.Selection) { + form := navigation.Form{} + + action, _ := formElem.Attr("action") + method, _ := formElem.Attr("method") + enctype, _ := formElem.Attr("enctype") + + form.Action = action + form.Method = strings.ToUpper(method) + form.Enctype = enctype + + formElem.Find("input, textarea, select").Each(func(i int, inputElem *goquery.Selection) { + name, ok := inputElem.Attr("name") + if !ok { + return + } + + form.Parameters = append(form.Parameters, name) + }) + + if !generic.EqualsAll("", form.Action, form.Method, form.Enctype) || len(form.Parameters) > 0 { + forms = append(forms, form) + } + }) + + return forms +} diff --git a/pkg/utils/formfields_test.go b/pkg/utils/formfields_test.go new file mode 100644 index 00000000..3c7c8be2 --- /dev/null +++ b/pkg/utils/formfields_test.go @@ -0,0 +1,43 @@ +package utils + +import ( + "strings" + "testing" + + "github.com/PuerkitoBio/goquery" + "github.com/stretchr/testify/require" +) + +var htmlFormExample = ` + + HTML Form Test + + +
+
+ + + +
+
+
+ +` + +func TestParseFormFields(t *testing.T) { + document, err := goquery.NewDocumentFromReader(strings.NewReader(htmlFormExample)) + require.NoError(t, err, "could not read document") + + forms := ParseFormFields(document) + + require.Equal(t, "/test", forms[0].Action) + require.Equal(t, "POST", forms[0].Method) + require.Equal(t, "POST", forms[1].Method) + require.Equal(t, "/test2", forms[2].Action) + require.Equal(t, "", forms[0].Enctype) + require.Contains(t, forms[0].Parameters, "firstname") + require.Contains(t, forms[0].Parameters, "textarea1") + require.Contains(t, forms[0].Parameters, "select1") + require.Equal(t, 3, len(forms[0].Parameters), "found more or less parameters than where present") + require.Equal(t, 3, len(forms), "found more or less forms than where present") +}