Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add resume option #538

Merged
merged 8 commits into from
Aug 21, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions cmd/katana/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,19 @@ import (
"math"
"os"
"os/signal"
"path/filepath"
"strings"
"syscall"
"time"

"github.com/projectdiscovery/goflags"
"github.com/projectdiscovery/gologger"
"github.com/projectdiscovery/katana/internal/runner"
"github.com/projectdiscovery/katana/pkg/output"
"github.com/projectdiscovery/katana/pkg/types"
errorutil "github.com/projectdiscovery/utils/errors"
fileutil "github.com/projectdiscovery/utils/file"
"github.com/rs/xid"
)

var (
Expand Down Expand Up @@ -42,19 +46,33 @@ func main() {
defer katanaRunner.Close()

// close handler
resumeFilename := defaultResumeFilename()
go func() {
c := make(chan os.Signal, 1)
signal.Notify(c, os.Interrupt, syscall.SIGTERM)
for range c {
gologger.DefaultLogger.Info().Msg("- Ctrl+C pressed in Terminal")
katanaRunner.Close()

gologger.Info().Msgf("Creating resume file: %s\n", resumeFilename)
err := katanaRunner.SaveState(resumeFilename)
if err != nil {
gologger.Error().Msgf("Couldn't create resume file: %s\n", err)
}

os.Exit(0)
}
}()

if err := katanaRunner.ExecuteCrawling(); err != nil {
gologger.Fatal().Msgf("could not execute crawling: %s", err)
}

// on successful execution remove the resume file in case it exists
if fileutil.FileExists(resumeFilename) {
os.Remove(resumeFilename)
}

}

func readFlags() (*goflags.FlagSet, error) {
Expand All @@ -64,6 +82,7 @@ pipelines offering both headless and non-headless crawling.`)

flagSet.CreateGroup("input", "Input",
flagSet.StringSliceVarP(&options.URLs, "list", "u", nil, "target url / list to crawl", goflags.FileCommaSeparatedStringSliceOptions),
flagSet.StringVar(&options.Resume, "resume", "", "resume scan using resume.cfg"),
)

flagSet.CreateGroup("config", "Configuration",
Expand Down Expand Up @@ -160,6 +179,7 @@ pipelines offering both headless and non-headless crawling.`)
return nil, errorutil.NewWithErr(err).Msgf("could not read config file")
}
}
cleanupOldResumeFiles()
return flagSet, nil
}

Expand All @@ -169,3 +189,26 @@ func init() {
errorutil.ShowStackTrace = true
}
}

func defaultResumeFilename() string {
homedir, err := os.UserHomeDir()
if err != nil {
gologger.Fatal().Msgf("could not get home directory: %s", err)
}
configDir := filepath.Join(homedir, ".config", "katana")
return filepath.Join(configDir, fmt.Sprintf("resume-%s.cfg", xid.New().String()))
}

// cleanupOldResumeFiles cleans up resume files older than 10 days.
func cleanupOldResumeFiles() {
homedir, err := os.UserHomeDir()
if err != nil {
gologger.Fatal().Msgf("could not get home directory: %s", err)
}
root := filepath.Join(homedir, ".config", "katana")
filter := fileutil.FileFilters{
OlderThan: 24 * time.Hour * 10, // cleanup on the 10th day
Prefix: "resume-",
}
_ = fileutil.DeleteFilesOlderThan(root, filter)
}
39 changes: 37 additions & 2 deletions internal/runner/runner.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
package runner

import (
"encoding/json"
"os"

"github.com/projectdiscovery/gologger"
"github.com/projectdiscovery/katana/pkg/engine"
"github.com/projectdiscovery/katana/pkg/engine/hybrid"
Expand All @@ -9,8 +12,8 @@ import (
"github.com/projectdiscovery/katana/pkg/types"
errorutil "github.com/projectdiscovery/utils/errors"
fileutil "github.com/projectdiscovery/utils/file"
"go.uber.org/multierr"
updateutils "github.com/projectdiscovery/utils/update"
"go.uber.org/multierr"
)

// Runner creates the required resources for crawling
Expand All @@ -20,10 +23,32 @@ type Runner struct {
stdin bool
crawler engine.Engine
options *types.Options
state *RunnerState
}

type RunnerState struct {
InFlightUrls []string
}

// New returns a new crawl runner structure
func New(options *types.Options) (*Runner, error) {
// create the resume configuration structure
if options.ShouldResume() {
gologger.Info().Msg("Resuming from save checkpoint")

file, err := os.ReadFile(options.Resume)
if err != nil {
return nil, err
}

runnerState := &RunnerState{}
err = json.Unmarshal(file, runnerState)
if err != nil {
return nil, err
}
options.URLs = runnerState.InFlightUrls
}

configureOutput(options)
showBanner()

Expand Down Expand Up @@ -72,7 +97,7 @@ func New(options *types.Options) (*Runner, error) {
if err != nil {
return nil, errorutil.NewWithErr(err).Msgf("could not create standard crawler")
}
runner := &Runner{options: options, stdin: fileutil.HasStdin(), crawlerOptions: crawlerOptions, crawler: crawler}
runner := &Runner{options: options, stdin: fileutil.HasStdin(), crawlerOptions: crawlerOptions, crawler: crawler, state: &RunnerState{}}

return runner, nil
}
Expand All @@ -84,3 +109,13 @@ func (r *Runner) Close() error {
r.crawlerOptions.Close(),
)
}

func (r *Runner) SaveState(resumeFilename string) error {
runnerState := r.buildRunnerState()
data, _ := json.Marshal(runnerState)
return os.WriteFile(resumeFilename, data, os.ModePerm)
}

func (r *Runner) buildRunnerState() *RunnerState {
return &RunnerState{InFlightUrls: append(r.state.InFlightUrls, r.crawler.GetInFlightUrls()...)}
}
12 changes: 9 additions & 3 deletions pkg/engine/common/base.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,19 @@ import (
)

type Shared struct {
Headers map[string]string
KnownFiles *files.KnownFiles
Options *types.CrawlerOptions
Headers map[string]string
KnownFiles *files.KnownFiles
Options *types.CrawlerOptions
InFlightUrls mapsutil.SyncLockMap[string, bool]
dogancanbakir marked this conversation as resolved.
Show resolved Hide resolved
}

func NewShared(options *types.CrawlerOptions) (*Shared, error) {
shared := &Shared{
Headers: options.Options.ParseCustomHeaders(),
Options: options,
InFlightUrls: mapsutil.SyncLockMap[string, bool]{
Map: make(map[string]bool),
},
}
if options.Options.KnownFiles != "" {
httpclient, _, err := BuildHttpClient(options.Dialer, options.Options, nil)
Expand Down Expand Up @@ -82,6 +86,7 @@ func (s *Shared) Enqueue(queue *queue.Queue, navigationRequests ...*navigation.R
continue
}
queue.Push(nr, nr.Depth)
_ = s.InFlightUrls.Set(reqUrl, true)
}
}

Expand Down Expand Up @@ -242,6 +247,7 @@ func (s *Shared) Do(crawlSession *CrawlSession, doRequest DoRequestFunc) error {

navigationRequests := parser.ParseResponse(resp)
s.Enqueue(crawlSession.Queue, navigationRequests...)
s.InFlightUrls.Delete(req.RequestURL())
}()
}
wg.Wait()
Expand Down
1 change: 1 addition & 0 deletions pkg/engine/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,6 @@ package engine

type Engine interface {
Crawl(string) error
GetInFlightUrls() []string
Close() error
}
5 changes: 5 additions & 0 deletions pkg/engine/hybrid/hybrid.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"github.com/projectdiscovery/katana/pkg/engine/common"
"github.com/projectdiscovery/katana/pkg/types"
errorutil "github.com/projectdiscovery/utils/errors"
mapsutil "github.com/projectdiscovery/utils/maps"
stringsutil "github.com/projectdiscovery/utils/strings"
urlutil "github.com/projectdiscovery/utils/url"
ps "github.com/shirou/gopsutil/v3/process"
Expand Down Expand Up @@ -123,6 +124,10 @@ func (c *Crawler) Crawl(rootURL string) error {
return nil
}

func (c *Crawler) GetInFlightUrls() []string {
return mapsutil.GetKeys(c.InFlightUrls.GetAll())
}

// buildChromeLauncher builds a new chrome launcher instance
func buildChromeLauncher(options *types.CrawlerOptions, dataStore string) (*launcher.Launcher, error) {
chromeLauncher := launcher.New().
Expand Down
5 changes: 5 additions & 0 deletions pkg/engine/standard/standard.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"github.com/projectdiscovery/katana/pkg/engine/common"
"github.com/projectdiscovery/katana/pkg/types"
errorutil "github.com/projectdiscovery/utils/errors"
mapsutil "github.com/projectdiscovery/utils/maps"
)

// Crawler is a standard crawler instance
Expand Down Expand Up @@ -39,3 +40,7 @@ func (c *Crawler) Crawl(rootURL string) error {
}
return nil
}

func (c *Crawler) GetInFlightUrls() []string {
return mapsutil.GetKeys(c.InFlightUrls.GetAll())
}
7 changes: 7 additions & 0 deletions pkg/types/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (

"github.com/projectdiscovery/goflags"
"github.com/projectdiscovery/katana/pkg/output"
fileutil "github.com/projectdiscovery/utils/file"
)

// OnResultCallback (output.Result)
Expand All @@ -15,6 +16,8 @@ type OnResultCallback func(output.Result)
type Options struct {
// URLs contains a list of URLs for crawling
URLs goflags.StringSlice
// Resume the scan from the state stored in the resume config file
Resume string
// Scope contains a list of regexes for in-scope URLS
Scope goflags.StringSlice
// OutOfScope contains a list of regexes for out-scope URLS
Expand Down Expand Up @@ -162,3 +165,7 @@ func (options *Options) ParseHeadlessOptionalArguments() map[string]string {
}
return optionalArguments
}

func (options *Options) ShouldResume() bool {
return options.Resume != "" && fileutil.FileExists(options.Resume)
}