Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add resume option #538

Merged
merged 8 commits into from
Aug 21, 2023
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions cmd/katana/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,19 @@ import (
"math"
"os"
"os/signal"
"path/filepath"
"strings"
"syscall"
"time"

"github.com/projectdiscovery/goflags"
"github.com/projectdiscovery/gologger"
"github.com/projectdiscovery/katana/internal/runner"
"github.com/projectdiscovery/katana/pkg/output"
"github.com/projectdiscovery/katana/pkg/types"
errorutil "github.com/projectdiscovery/utils/errors"
fileutil "github.com/projectdiscovery/utils/file"
"github.com/rs/xid"
)

var (
Expand Down Expand Up @@ -42,19 +46,33 @@ func main() {
defer katanaRunner.Close()

// close handler
resumeFilename := defaultResumeFilename()
go func() {
c := make(chan os.Signal, 1)
signal.Notify(c, os.Interrupt, syscall.SIGTERM)
for range c {
gologger.DefaultLogger.Info().Msg("- Ctrl+C pressed in Terminal")
katanaRunner.Close()

gologger.Info().Msgf("Creating resume file: %s\n", resumeFilename)
err := katanaRunner.SaveState(resumeFilename)
if err != nil {
gologger.Error().Msgf("Couldn't create resume file: %s\n", err)
}

os.Exit(0)
}
}()

if err := katanaRunner.ExecuteCrawling(); err != nil {
gologger.Fatal().Msgf("could not execute crawling: %s", err)
}

// on successful execution remove the resume file in case it exists
if fileutil.FileExists(resumeFilename) {
os.Remove(resumeFilename)
}

}

func readFlags() (*goflags.FlagSet, error) {
Expand All @@ -64,6 +82,7 @@ pipelines offering both headless and non-headless crawling.`)

flagSet.CreateGroup("input", "Input",
flagSet.StringSliceVarP(&options.URLs, "list", "u", nil, "target url / list to crawl", goflags.FileCommaSeparatedStringSliceOptions),
flagSet.StringVar(&options.Resume, "resume", "", "resume scan using resume.cfg"),
)

flagSet.CreateGroup("config", "Configuration",
Expand Down Expand Up @@ -162,6 +181,7 @@ pipelines offering both headless and non-headless crawling.`)
return nil, errorutil.NewWithErr(err).Msgf("could not read config file")
}
}
cleanupOldResumeFiles()
return flagSet, nil
}

Expand All @@ -171,3 +191,26 @@ func init() {
errorutil.ShowStackTrace = true
}
}

func defaultResumeFilename() string {
homedir, err := os.UserHomeDir()
if err != nil {
gologger.Fatal().Msgf("could not get home directory: %s", err)
}
configDir := filepath.Join(homedir, ".config", "katana")
return filepath.Join(configDir, fmt.Sprintf("resume-%s.cfg", xid.New().String()))
}

// cleanupOldResumeFiles cleans up resume files older than 10 days.
func cleanupOldResumeFiles() {
homedir, err := os.UserHomeDir()
if err != nil {
gologger.Fatal().Msgf("could not get home directory: %s", err)
}
root := filepath.Join(homedir, ".config", "katana")
filter := fileutil.FileFilters{
OlderThan: 24 * time.Hour * 10, // cleanup on the 10th day
Prefix: "resume-",
}
_ = fileutil.DeleteFilesOlderThan(root, filter)
}
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ require (
github.com/projectdiscovery/hmap v0.0.13
github.com/projectdiscovery/ratelimit v0.0.9
github.com/projectdiscovery/retryablehttp-go v1.0.20
github.com/projectdiscovery/utils v0.0.45
github.com/projectdiscovery/utils v0.0.46
github.com/projectdiscovery/wappalyzergo v0.0.107
github.com/remeh/sizedwaitgroup v1.0.0
github.com/rs/xid v1.5.0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -189,8 +189,8 @@ github.com/projectdiscovery/retryabledns v1.0.32 h1:Ekr+1j1jwQ2qINW7T02uMcXFc3Qe
github.com/projectdiscovery/retryabledns v1.0.32/go.mod h1:t8aKbGPnmN/IUFY7vk+M16LBmzBhMsfN/6YGKs6oL8c=
github.com/projectdiscovery/retryablehttp-go v1.0.20 h1:Ns3m7EPMEFKTSSNPtD1WGkCHvuYyQ6x98HYdKdALqwE=
github.com/projectdiscovery/retryablehttp-go v1.0.20/go.mod h1:3YrxgFe21HUL+25IU9VfFlTZ23yMEA2Zek6p8F55cuI=
github.com/projectdiscovery/utils v0.0.45 h1:i0xwdpaGrTIw8hVtutjpVXvPCMcKKwb9VbqWxlwL08M=
github.com/projectdiscovery/utils v0.0.45/go.mod h1:kJu+OqAWsoilLto06ajSp4U3gWuz51GEVLL/PogJXPk=
github.com/projectdiscovery/utils v0.0.46 h1:MTWu7AN3z0mClqo4a1+jKO5DcvAdvFfcmjpv1bJAdJA=
github.com/projectdiscovery/utils v0.0.46/go.mod h1:kJu+OqAWsoilLto06ajSp4U3gWuz51GEVLL/PogJXPk=
github.com/projectdiscovery/wappalyzergo v0.0.107 h1:B8gzJpAh08f1o+OiDunHAfKtqXiDnFCc7Rj1qKp+DB8=
github.com/projectdiscovery/wappalyzergo v0.0.107/go.mod h1:4Z3DKhi75zIPMuA+qSDDWxZvnhL4qTLmDx4dxNMu7MA=
github.com/refraction-networking/utls v1.3.2 h1:o+AkWB57mkcoW36ET7uJ002CpBWHu0KPxi6vzxvPnv8=
Expand Down
4 changes: 4 additions & 0 deletions internal/runner/executer.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ func (r *Runner) ExecuteCrawling() error {
if len(inputs) == 0 {
return errorutil.New("no input provided for crawling")
}
for _, input := range inputs {
_ = r.state.InFlightUrls.Set(addSchemeIfNotExists(input), struct{}{})
}

defer r.crawler.Close()

Expand All @@ -28,6 +31,7 @@ func (r *Runner) ExecuteCrawling() error {
if err := r.crawler.Crawl(input); err != nil {
gologger.Warning().Msgf("Could not crawl %s: %s", input, err)
}
r.state.InFlightUrls.Delete(input)
}(input)
}
wg.Wait()
Expand Down
36 changes: 34 additions & 2 deletions internal/runner/runner.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
package runner

import (
"encoding/json"
"os"

"github.com/projectdiscovery/gologger"
"github.com/projectdiscovery/katana/pkg/engine"
"github.com/projectdiscovery/katana/pkg/engine/hybrid"
Expand All @@ -9,8 +12,9 @@ import (
"github.com/projectdiscovery/katana/pkg/types"
errorutil "github.com/projectdiscovery/utils/errors"
fileutil "github.com/projectdiscovery/utils/file"
"go.uber.org/multierr"
mapsutil "github.com/projectdiscovery/utils/maps"
updateutils "github.com/projectdiscovery/utils/update"
"go.uber.org/multierr"
)

// Runner creates the required resources for crawling
Expand All @@ -20,10 +24,32 @@ type Runner struct {
stdin bool
crawler engine.Engine
options *types.Options
state *RunnerState
}

type RunnerState struct {
InFlightUrls *mapsutil.SyncLockMap[string, struct{}]
}

// New returns a new crawl runner structure
func New(options *types.Options) (*Runner, error) {
// create the resume configuration structure
if options.ShouldResume() {
gologger.Info().Msg("Resuming from save checkpoint")

file, err := os.ReadFile(options.Resume)
if err != nil {
return nil, err
}

runnerState := &RunnerState{}
err = json.Unmarshal(file, runnerState)
if err != nil {
return nil, err
}
options.URLs = mapsutil.GetKeys(runnerState.InFlightUrls.GetAll())
}

configureOutput(options)
showBanner()

Expand Down Expand Up @@ -72,7 +98,7 @@ func New(options *types.Options) (*Runner, error) {
if err != nil {
return nil, errorutil.NewWithErr(err).Msgf("could not create standard crawler")
}
runner := &Runner{options: options, stdin: fileutil.HasStdin(), crawlerOptions: crawlerOptions, crawler: crawler}
runner := &Runner{options: options, stdin: fileutil.HasStdin(), crawlerOptions: crawlerOptions, crawler: crawler, state: &RunnerState{InFlightUrls: mapsutil.NewSyncLockMap[string, struct{}]()}}

return runner, nil
}
Expand All @@ -84,3 +110,9 @@ func (r *Runner) Close() error {
r.crawlerOptions.Close(),
)
}

func (r *Runner) SaveState(resumeFilename string) error {
runnerState := r.state
data, _ := json.Marshal(runnerState)
return os.WriteFile(resumeFilename, data, os.ModePerm)
}
7 changes: 7 additions & 0 deletions pkg/types/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (

"github.com/projectdiscovery/goflags"
"github.com/projectdiscovery/katana/pkg/output"
fileutil "github.com/projectdiscovery/utils/file"
)

// OnResultCallback (output.Result)
Expand All @@ -15,6 +16,8 @@ type OnResultCallback func(output.Result)
type Options struct {
// URLs contains a list of URLs for crawling
URLs goflags.StringSlice
// Resume the scan from the state stored in the resume config file
Resume string
// Scope contains a list of regexes for in-scope URLS
Scope goflags.StringSlice
// OutOfScope contains a list of regexes for out-scope URLS
Expand Down Expand Up @@ -166,3 +169,7 @@ func (options *Options) ParseHeadlessOptionalArguments() map[string]string {
}
return optionalArguments
}

func (options *Options) ShouldResume() bool {
return options.Resume != "" && fileutil.FileExists(options.Resume)
}