diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml index c02b9480..28631e5a 100644 --- a/.github/workflows/build-test.yml +++ b/.github/workflows/build-test.yml @@ -1,8 +1,10 @@ name: 🔨 Build Test + on: - push: - pull_request: workflow_dispatch: + pull_request: + branches: + - dev jobs: build: @@ -11,6 +13,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, windows-latest, macOS-latest] + steps: - name: Set up Go uses: actions/setup-go@v3 @@ -20,14 +23,18 @@ jobs: - name: Check out code uses: actions/checkout@v3 + - name: Test + run: go test ./... + working-directory: . + - name: Build run: go build . working-directory: cmd/katana/ - - name: Test - run: go test ./... - working-directory: . - + - name: Install + run: go install + working-directory: cmd/katana/ + - name: Race Condition Tests run: go build -race . working-directory: cmd/katana/ diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 2bbccfd4..9f533f8d 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -2,7 +2,6 @@ name: 🚨 CodeQL Analysis on: workflow_dispatch: - push: pull_request: branches: - dev diff --git a/.github/workflows/lint-test.yml b/.github/workflows/lint-test.yml index d6921ef1..ebe52856 100644 --- a/.github/workflows/lint-test.yml +++ b/.github/workflows/lint-test.yml @@ -1,8 +1,10 @@ name: 🙏🏻 Lint Test + on: - push: - pull_request: workflow_dispatch: + pull_request: + branches: + - dev jobs: lint: diff --git a/.github/workflows/release-binary.yml b/.github/workflows/release-binary.yml index 028de6ec..5e99aafb 100644 --- a/.github/workflows/release-binary.yml +++ b/.github/workflows/release-binary.yml @@ -1,4 +1,5 @@ name: 🎉 Release Binary + on: create: tags: diff --git a/Dockerfile b/Dockerfile index 971641e1..fafecae4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,10 +1,10 @@ -FROM golang:1.19.2-alpine AS builder +FROM golang:1.19.3-alpine AS builder RUN apk add --no-cache git RUN go install -v github.com/projectdiscovery/katana/cmd/katana@latest FROM alpine:3.16.2 RUN apk -U upgrade --no-cache \ - && apk add --no-cache bind-tools ca-certificates + && apk add --no-cache bind-tools ca-certificates chromium COPY --from=builder /go/bin/katana /usr/local/bin/ ENTRYPOINT ["katana"] diff --git a/README.md b/README.md index 552ffb91..6a7228e9 100644 --- a/README.md +++ b/README.md @@ -46,6 +46,32 @@ katana requires **Go 1.18** to install successfully. To install, just run the be go install github.com/projectdiscovery/katana/cmd/katana@latest ``` +**More options to install / run katana-** + +
+ Docker + +> To install / update docker to latest tag - + +```sh +docker pull projectdiscovery/katana:latest +``` + +> To run katana in standard mode using docker - + + +```sh +docker run projectdiscovery/katana:latest -u https://tesla.com +``` + +> To run katana in headless mode using docker - + +```sh +docker run projectdiscovery/katana:latest -u https://tesla.com -system-chrome -headless +``` + +
+ ## Usage ```console @@ -77,9 +103,11 @@ CONFIGURATION: -fc, -form-config string path to custom form configuration file HEADLESS: - -hl, -headless enable headless hybrid crawling (experimental) - -sc, -system-chrome use local installed chrome browser instead of katana installed - -sb, -show-browser show the browser on the screen with headless mode + -hl, -headless enable headless hybrid crawling (experimental) + -sc, -system-chrome use local installed chrome browser instead of katana installed + -sb, -show-browser show the browser on the screen with headless mode + -ho, -headless-options string[] start headless chrome with additional options + -nos, -no-sandbox start headless chrome in --no-sandbox mode SCOPE: -cs, -crawl-scope string[] in scope url regex to be followed by crawler @@ -226,8 +254,30 @@ HEADLESS: -hl, -headless enable experimental headless hybrid crawling -sc, -system-chrome use local installed chrome browser instead of katana installed -sb, -show-browser show the browser on the screen with headless mode + -ho, -headless-options string[] start headless chrome with additional options + -nos, -no-sandbox start headless chrome in --no-sandbox mode +``` + +*`-no-sandbox`* +---- + +Runs headless chrome browser with **no-sandbox** option, useful when running as root user. + +```console +katana -u https://tesla.com -headless -no-sandbox ``` +*`-headless-options`* +---- + +When crawling in headless mode, additional chrome options can be specified using `-headless-options`, for example - + + +```console +katana -u https://tesla.com -headless -system-chrome -headless-options --disable-gpu,proxy-server=http://127.0.0.1:8080 +``` + + ## Scope Control Crawling can be endless if not scoped, as such katana comes with multiple support to define the crawl scope. @@ -240,7 +290,7 @@ Most handy option to define scope with predefined field name, `rdn` being defaul - `fqdn` - crawling scoped to given sub(domain) - `dn` - crawling scoped to domain name keyword -``` +```console katana -u https://tesla.com -fs dn ``` @@ -250,7 +300,7 @@ katana -u https://tesla.com -fs dn For advanced scope control, `-cs` option can be used that comes with **regex** support. -``` +```console katana -u https://tesla.com -cs login ``` @@ -265,7 +315,7 @@ app/ wordpress/ ``` -``` +```console katana -u https://tesla.com -cs in_scope.txt ``` @@ -275,7 +325,7 @@ katana -u https://tesla.com -cs in_scope.txt For defining what not to crawl, `-cos` option can be used and also support **regex** input. -``` +```console katana -u https://tesla.com -cos logout ``` @@ -288,7 +338,7 @@ $ cat out_of_scope.txt /log_out ``` -``` +```console katana -u https://tesla.com -cos out_of_scope.txt ``` @@ -297,7 +347,7 @@ katana -u https://tesla.com -cos out_of_scope.txt Katana is default to scope `*.domain`, to disable this `-ns` option can be used and also to crawl the internet. -``` +```console katana -u https://tesla.com -ns ``` @@ -475,6 +525,25 @@ https_www.tesla.com_qurl.txt + +*`-extension-match`* +--- + +Crawl output can be easily matched for specfic extension using `-em` option to ensure to display only output containing given extension. + +``` +katana -u https://tesla.com -silent -em js,jsp,json +``` + +*`-extension-filter`* +--- + +Crawl output can be easily filtered for specfic extension using `-ef` option which ensure to remove all the urls containing given extension. + +``` +katana -u https://tesla.com -silent -ef css,txt,md +``` + Here are additonal filter options - ```console diff --git a/cmd/katana/main.go b/cmd/katana/main.go index 0e30a56c..43a73913 100644 --- a/cmd/katana/main.go +++ b/cmd/katana/main.go @@ -27,7 +27,11 @@ func main() { runner, err := runner.New(options) if err != nil || runner == nil { - gologger.Fatal().Msgf("could not create runner: %s\n", err) + if options.Version { + return + } else { + gologger.Fatal().Msgf("could not create runner: %s\n", err) + } } defer runner.Close() @@ -77,6 +81,8 @@ pipelines offering both headless and non-headless crawling.`) flagSet.BoolVarP(&options.Headless, "headless", "hl", false, "enable headless hybrid crawling (experimental)"), flagSet.BoolVarP(&options.UseInstalledChrome, "system-chrome", "sc", false, "use local installed chrome browser instead of katana installed"), flagSet.BoolVarP(&options.ShowBrowser, "show-browser", "sb", false, "show the browser on the screen with headless mode"), + flagSet.StringSliceVarP(&options.HeadlessOptionalArguments, "headless-options", "ho", nil, "start headless chrome with additional options", goflags.FileCommaSeparatedStringSliceOptions), + flagSet.BoolVarP(&options.HeadlessNoSandbox, "no-sandbox", "nos", false, "start headless chrome in --no-sandbox mode"), ) flagSet.CreateGroup("scope", "Scope", diff --git a/go.mod b/go.mod index affb12f1..4ceaccf6 100644 --- a/go.mod +++ b/go.mod @@ -22,7 +22,7 @@ require ( github.com/shirou/gopsutil/v3 v3.22.10 github.com/stretchr/testify v1.8.1 go.uber.org/multierr v1.8.0 - golang.org/x/net v0.1.0 + golang.org/x/net v0.2.0 gopkg.in/yaml.v3 v3.0.1 ) @@ -68,7 +68,7 @@ require ( go.uber.org/atomic v1.9.0 // indirect golang.org/x/crypto v0.0.0-20210921155107-089bfa567519 // indirect golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4 // indirect - golang.org/x/sys v0.1.0 // indirect + golang.org/x/sys v0.2.0 // indirect golang.org/x/text v0.4.0 // indirect golang.org/x/tools v0.1.12 // indirect ) diff --git a/go.sum b/go.sum index b7ac779e..d5d54533 100644 --- a/go.sum +++ b/go.sum @@ -248,8 +248,9 @@ golang.org/x/net v0.0.0-20220617184016-355a448f1bc9/go.mod h1:XRhObCWvk6IyKnWLug golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.0.0-20220728211354-c7608f3a8462/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk= golang.org/x/net v0.0.0-20221002022538-bcab6841153b/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk= -golang.org/x/net v0.1.0 h1:hZ/3BUoy5aId7sCpA/Tc5lt8DkFgdVS2onTpJsZ/fl0= golang.org/x/net v0.1.0/go.mod h1:Cx3nUiGt4eDBEyega/BKRp+/AlGL8hYe7U9odMt2Cco= +golang.org/x/net v0.2.0 h1:sZfSu1wtKLGlWI4ZZayP0ck9Y73K1ynO6gqzTdBVdPU= +golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -283,12 +284,14 @@ golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220731174439-a90be440212d/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.1.0 h1:kunALQeHf1/185U1i0GOB/fy1IPRDDpuoOOqRReG57U= golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.2.0 h1:ljd4t30dBnAvMZaQCevtY0xLLD0A+bRZXbgLMLU1F/A= +golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.1.0/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= diff --git a/internal/runner/banner.go b/internal/runner/banner.go index 89a86712..b2700493 100644 --- a/internal/runner/banner.go +++ b/internal/runner/banner.go @@ -13,7 +13,7 @@ var banner = fmt.Sprintf(` /_/\_\\_,_/\__/\_,_/_//_/\_,_/ %s `, version) -var version = "v0.0.1" +var version = "v0.0.2" // showBanner is used to show the banner to the user func showBanner() { diff --git a/internal/runner/options.go b/internal/runner/options.go index 3eb1d733..a856c37c 100644 --- a/internal/runner/options.go +++ b/internal/runner/options.go @@ -29,6 +29,9 @@ func validateOptions(options *types.Options) error { if len(options.URLs) == 0 && !fileutil.HasStdin() { return errors.New("no inputs specified for crawler") } + if (options.HeadlessOptionalArguments != nil || options.HeadlessNoSandbox) && !options.Headless { + return errors.New("headless mode (-hl) is required if -ho or -nos are set") + } gologger.DefaultLogger.SetFormatter(formatter.NewCLI(options.NoColors)) return nil } diff --git a/pkg/engine/hybrid/hybrid.go b/pkg/engine/hybrid/hybrid.go index fa97e28f..503041ba 100644 --- a/pkg/engine/hybrid/hybrid.go +++ b/pkg/engine/hybrid/hybrid.go @@ -14,6 +14,7 @@ import ( "github.com/PuerkitoBio/goquery" "github.com/go-rod/rod" "github.com/go-rod/rod/lib/launcher" + "github.com/go-rod/rod/lib/launcher/flags" "github.com/pkg/errors" "github.com/projectdiscovery/gologger" "github.com/projectdiscovery/katana/pkg/engine/common" @@ -76,6 +77,14 @@ func New(options *types.CrawlerOptions) (*Crawler, error) { chromeLauncher = chromeLauncher.Headless(true) } + if options.Options.HeadlessNoSandbox { + chromeLauncher.Set("no-sandbox", "true") + } + + for k, v := range options.Options.ParseHeadlessOptionalArguments() { + chromeLauncher.Set(flags.Flag(k), v) + } + launcherURL, err := chromeLauncher.Launch() if err != nil { return nil, err diff --git a/pkg/types/options.go b/pkg/types/options.go index 02e11b6a..228c23c5 100644 --- a/pkg/types/options.go +++ b/pkg/types/options.go @@ -79,6 +79,10 @@ type Options struct { UseInstalledChrome bool // ShowBrowser specifies whether the show the browser in headless mode ShowBrowser bool + // HeadlessOptionalArguments specifies optional arguments to pass to Chrome + HeadlessOptionalArguments goflags.StringSlice + // HeadlessNoSandbox specifies if chrome should be start in --no-sandbox mode + HeadlessNoSandbox bool } func (options *Options) ParseCustomHeaders() map[string]string { @@ -90,3 +94,17 @@ func (options *Options) ParseCustomHeaders() map[string]string { } return customHeaders } + +func (options *Options) ParseHeadlessOptionalArguments() map[string]string { + optionalArguments := make(map[string]string) + for _, v := range options.HeadlessOptionalArguments { + if argParts := strings.SplitN(v, "=", 2); len(argParts) >= 2 { + key := strings.TrimSpace(argParts[0]) + value := strings.TrimSpace(argParts[1]) + if key != "" && value != "" { + optionalArguments[key] = value + } + } + } + return optionalArguments +} diff --git a/pkg/types/options_test.go b/pkg/types/options_test.go new file mode 100644 index 00000000..82ac4fb4 --- /dev/null +++ b/pkg/types/options_test.go @@ -0,0 +1,101 @@ +package types + +import ( + "strings" + "testing" + + "github.com/projectdiscovery/goflags" + "github.com/stretchr/testify/require" +) + +func TestParseCustomHeaders(t *testing.T) { + tests := []struct { + name string + input string + want map[string]string + }{ + { + name: "single value", + input: "a:b", + want: map[string]string{"a": "b"}, + }, + { + name: "empty string", + input: "", + want: map[string]string{}, + }, + { + name: "empty value", + input: "a:", + want: map[string]string{"a": ""}, + }, + { + name: "double input", + input: "a:b,c:d", + want: map[string]string{"a": "b", "c": "d"}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + strsl := goflags.StringSlice{} + for _, v := range strings.Split(tt.input, ",") { + //nolint + strsl.Set(v) + } + opt := Options{CustomHeaders: strsl} + got := opt.ParseCustomHeaders() + require.Equal(t, tt.want, got) + }) + } +} + +func TestParseHeadlessOptionalArguments(t *testing.T) { + tests := []struct { + name string + input string + want map[string]string + }{ + { + name: "single value", + input: "a=b", + want: map[string]string{"a": "b"}, + }, + { + name: "empty string", + input: "", + want: map[string]string{}, + }, + { + name: "empty key", + input: "=b", + want: map[string]string{}, + }, + { + name: "empty value", + input: "a=", + want: map[string]string{}, + }, + { + name: "double input", + input: "a=b,c=d", + want: map[string]string{"a": "b", "c": "d"}, + }, + { + name: "duplicated input", + input: "a=b,a=b", + want: map[string]string{"a": "b"}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + strsl := goflags.StringSlice{} + for _, v := range strings.Split(tt.input, ",") { + //nolint + strsl.Set(v) + } + opt := Options{HeadlessOptionalArguments: strsl} + got := opt.ParseHeadlessOptionalArguments() + require.Equal(t, tt.want, got) + }) + } +}