Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add custom field #225

Merged
merged 12 commits into from
Jan 5, 2023
1 change: 1 addition & 0 deletions cmd/katana/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ pipelines offering both headless and non-headless crawling.`)
flagSet.StringSliceVarP(&options.CustomHeaders, "headers", "H", nil, "custom header/cookie to include in request", goflags.StringSliceOptions),
flagSet.StringVar(&cfgFile, "config", "", "path to the katana configuration file"),
flagSet.StringVarP(&options.FormConfig, "form-config", "fc", "", "path to custom form configuration file"),
flagSet.StringVarP(&options.FieldConfig, "field-config", "flc", "", "path to custom field configuration file"),
)

flagSet.CreateGroup("headless", "Headless",
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -78,4 +78,5 @@ require (
golang.org/x/text v0.5.0 // indirect
golang.org/x/tools v0.2.0 // indirect
gopkg.in/djherbis/times.v1 v1.3.0 // indirect
gopkg.in/yaml.v2 v2.4.0
)
18 changes: 9 additions & 9 deletions pkg/engine/hybrid/hybrid.go
Original file line number Diff line number Diff line change
Expand Up @@ -244,9 +244,8 @@ func (c *Crawler) makeParseResponseCallback(queue *queue.VarietyQueue) func(nr n
if err != nil {
return
}
// Ignore the following cases
// - previously seen URLs
if !c.options.UniqueFilter.UniqueURL(nr.RequestURL()) {
// Ignore blank URL items and only work on unique items
if !c.options.UniqueFilter.UniqueURL(nr.RequestURL()) && len(nr.CustomFields) == 0 {
return
}
// - URLs stuck in a loop
Expand All @@ -256,12 +255,13 @@ func (c *Crawler) makeParseResponseCallback(queue *queue.VarietyQueue) func(nr n

// Write the found result to output
result := &output.Result{
Timestamp: time.Now(),
Body: nr.Body,
URL: nr.URL,
Source: nr.Source,
Tag: nr.Tag,
Attribute: nr.Attribute,
Timestamp: time.Now(),
Body: nr.Body,
URL: nr.URL,
Source: nr.Source,
Tag: nr.Tag,
Attribute: nr.Attribute,
CustomFields: nr.CustomFields,
}
if nr.Method != http.MethodGet {
result.Method = nr.Method
Expand Down
49 changes: 49 additions & 0 deletions pkg/engine/parser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (

"github.com/PuerkitoBio/goquery"
"github.com/projectdiscovery/katana/pkg/navigation"
"github.com/projectdiscovery/katana/pkg/output"
"github.com/projectdiscovery/katana/pkg/utils"
"golang.org/x/net/html"
)
Expand Down Expand Up @@ -68,6 +69,9 @@ var responseParsers = []responseParser{
// Optional JS relative endpoints parsers
{contentParser, scriptJSFileRegexParser},
{contentParser, bodyScrapeEndpointsParser},

// custom field regex parser
{bodyParser, customFieldRegexParser},
}

// parseResponse runs the response parsers on the navigation response
Expand Down Expand Up @@ -622,3 +626,48 @@ func bodyScrapeEndpointsParser(resp navigation.Response, callback func(navigatio
callback(navigation.NewNavigationRequestURLFromResponse(item, resp.Resp.Request.URL.String(), "html", "regex", resp))
}
}

// customFieldRegexParser parses custom regex from HTML body and header
func customFieldRegexParser(resp navigation.Response, callback func(navigation.Request)) {
var customField = make(map[string][]string)
for _, v := range output.CustomFieldsMap {
results := []string{}
for _, re := range v.CompileRegex {
matches := [][]string{}

// read body
if v.Part == output.Body.ToString() || v.Part == output.Response.ToString() {
matches = re.FindAllStringSubmatch(string(resp.Body), -1)
}

// read header
if v.Part == output.Header.ToString() || v.Part == output.Response.ToString() {
for key, v := range resp.Resp.Header {
header := key + ": " + strings.Join(v, "\n")
headerMatches := re.FindAllStringSubmatch(header, -1)
matches = append(matches, headerMatches...)
}
}

for _, match := range matches {
if len(match) < (v.Group + 1) {
continue
}
matchString := match[v.Group]
results = append(results, matchString)
}
}
customField[v.GetName()] = results
}
if len(customField) != 0 {
callback(navigation.Request{
Method: "GET",
URL: resp.Resp.Request.URL.String(),
Source: resp.Resp.Request.URL.String(),
Attribute: "regex",
Tag: "regex",
Depth: resp.Depth,
CustomFields: customField,
})
}
}
87 changes: 87 additions & 0 deletions pkg/engine/parser/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@ package parser
import (
"net/http"
"net/url"
"regexp"
"strings"
"testing"

"github.com/PuerkitoBio/goquery"
"github.com/projectdiscovery/katana/pkg/navigation"
"github.com/projectdiscovery/katana/pkg/output"
"github.com/projectdiscovery/katana/pkg/types"
"github.com/stretchr/testify/require"
)
Expand Down Expand Up @@ -539,3 +541,88 @@ func TestScriptParsers(t *testing.T) {

})
}

func TestRegexBodyParsers(t *testing.T) {
parsed, _ := url.Parse("https://security-crawl-maze.app/contact")
t.Run("regexbody", func(t *testing.T) {
output.CustomFieldsMap = make(map[string]output.CustomFieldConfig)
resp := navigation.Response{
Resp: &http.Response{Request: &http.Request{URL: parsed}},
Depth: 0,
Body: []byte("some content [email protected]"),
}

// set required regex
output.CustomFieldsMap["email"] = output.CustomFieldConfig{
Name: "email",
Type: "regex",
Part: "body",
CompileRegex: []*regexp.Regexp{regexp.MustCompile(`([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9_-]+)`)},
}

var gotFields map[string][]string
customFieldRegexParser(resp, func(resp navigation.Request) {
gotFields = resp.CustomFields
})
var requireFields = map[string][]string{"email": {"[email protected]"}}
require.Equal(t, requireFields, gotFields, "could not get correct url")
})
t.Run("regexheader", func(t *testing.T) {
output.CustomFieldsMap = make(map[string]output.CustomFieldConfig)
resp := navigation.Response{
Resp: &http.Response{Request: &http.Request{URL: parsed},
Header: http.Header{
"server": []string{"ECS (dcb/7F84)"},
},
},
}

// set required regex
output.CustomFieldsMap["server"] = output.CustomFieldConfig{
Name: "server",
Type: "regex",
Part: "header",
CompileRegex: []*regexp.Regexp{regexp.MustCompile(`server: ECS`)},
}

var gotFields map[string][]string
customFieldRegexParser(resp, func(resp navigation.Request) {
gotFields = resp.CustomFields
})
var requireFields = map[string][]string{"server": {"server: ECS"}}
require.Equal(t, requireFields, gotFields, "could not get correct url")
})

t.Run("regexresponse", func(t *testing.T) {
output.CustomFieldsMap = make(map[string]output.CustomFieldConfig)
resp := navigation.Response{
Resp: &http.Response{Request: &http.Request{URL: parsed},
Header: http.Header{
"server": []string{"ECS (dcb/7F84)"},
},
},
Body: []byte("some content [email protected]"),
}

// set required regex
output.CustomFieldsMap["server"] = output.CustomFieldConfig{
Name: "server",
Type: "regex",
Part: "response",
CompileRegex: []*regexp.Regexp{regexp.MustCompile(`ECS`)},
}
output.CustomFieldsMap["email"] = output.CustomFieldConfig{
Name: "email",
Type: "regex",
Part: "response",
CompileRegex: []*regexp.Regexp{regexp.MustCompile(`([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9_-]+)`)},
}

var gotFields map[string][]string
customFieldRegexParser(resp, func(resp navigation.Request) {
gotFields = resp.CustomFields
})
var requireFields = map[string][]string{"server": {"ECS"}, "email": {"[email protected]"}}
require.Equal(t, requireFields, gotFields, "could not get correct url")
})
}
18 changes: 9 additions & 9 deletions pkg/engine/standard/standard.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,9 +142,8 @@ func (c *Crawler) makeParseResponseCallback(queue *queue.VarietyQueue) func(nr n
if err != nil {
return
}
// Ignore the following cases
// - previously seen URLs
if !c.options.UniqueFilter.UniqueURL(nr.RequestURL()) {
// Ignore blank URL items and only work on unique items
if !c.options.UniqueFilter.UniqueURL(nr.RequestURL()) && len(nr.CustomFields) == 0 {
return
}
// - URLs stuck in a loop
Expand All @@ -154,12 +153,13 @@ func (c *Crawler) makeParseResponseCallback(queue *queue.VarietyQueue) func(nr n

// Write the found result to output
result := &output.Result{
Timestamp: time.Now(),
Body: nr.Body,
URL: nr.URL,
Source: nr.Source,
Tag: nr.Tag,
Attribute: nr.Attribute,
Timestamp: time.Now(),
Body: nr.Body,
URL: nr.URL,
Source: nr.Source,
Tag: nr.Tag,
Attribute: nr.Attribute,
CustomFields: nr.CustomFields,
}
if nr.Method != http.MethodGet {
result.Method = nr.Method
Expand Down
2 changes: 2 additions & 0 deletions pkg/navigation/request.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ type Request struct {
Attribute string
RootHostname string
Source string // source is the source of the request

CustomFields map[string][]string // customField matched output
}

// RequestURL returns the request URL for the navigation
Expand Down
Loading