From 2f21ad25dbbc0d0d4483f7309c6febe6805aeea0 Mon Sep 17 00:00:00 2001 From: Ramana Reddy Date: Wed, 10 May 2023 14:22:10 +0530 Subject: [PATCH 1/4] support custom header from file --- cmd/katana/main.go | 2 +- pkg/engine/hybrid/crawl.go | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/cmd/katana/main.go b/cmd/katana/main.go index 2f34261f..38997d3e 100644 --- a/cmd/katana/main.go +++ b/cmd/katana/main.go @@ -77,7 +77,7 @@ pipelines offering both headless and non-headless crawling.`) flagSet.BoolVarP(&options.AutomaticFormFill, "automatic-form-fill", "aff", false, "enable automatic form filling (experimental)"), flagSet.IntVar(&options.Retries, "retry", 1, "number of times to retry the request"), flagSet.StringVar(&options.Proxy, "proxy", "", "http/socks5 proxy to use"), - flagSet.StringSliceVarP(&options.CustomHeaders, "headers", "H", nil, "custom header/cookie to include in request", goflags.StringSliceOptions), + flagSet.StringSliceVarP(&options.CustomHeaders, "headers", "H", nil, "custom header/cookie to include in all http request in header:value format (file)", goflags.FileStringSliceOptions), flagSet.StringVar(&cfgFile, "config", "", "path to the katana configuration file"), flagSet.StringVarP(&options.FormConfig, "form-config", "fc", "", "path to custom form configuration file"), flagSet.StringVarP(&options.FieldConfig, "field-config", "flc", "", "path to custom field configuration file"), diff --git a/pkg/engine/hybrid/crawl.go b/pkg/engine/hybrid/crawl.go index bd0ce972..dafe4946 100644 --- a/pkg/engine/hybrid/crawl.go +++ b/pkg/engine/hybrid/crawl.go @@ -61,6 +61,9 @@ func (c *Crawler) navigateRequest(s *common.CrawlSession, request *navigation.Re statucCodeText = http.StatusText(statusCode) } httpreq, _ := http.NewRequest(e.Request.Method, URL.String(), strings.NewReader(e.Request.PostData)) + for k, v := range c.Headers { + httpreq.Header.Set(k, v) + } httpresp := &http.Response{ Proto: "HTTP/1.1", ProtoMajor: 1, From dd44c2a6c794b16296651b338ae97c46ece1d4f4 Mon Sep 17 00:00:00 2001 From: Ramana Reddy Date: Wed, 10 May 2023 14:28:25 +0530 Subject: [PATCH 2/4] update readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 17e327c1..e85ab160 100644 --- a/README.md +++ b/README.md @@ -127,7 +127,7 @@ CONFIGURATION: -aff, -automatic-form-fill enable automatic form filling (experimental) -retry int number of times to retry the request (default 1) -proxy string http/socks5 proxy to use - -H, -headers string[] custom header/cookie to include in request + -H, -headers string[] custom header/cookie to include in all http request in header:value format (file) -config string path to the katana configuration file -fc, -form-config string path to custom form configuration file -flc, -field-config string path to custom field configuration file From 2a94dc245ac55e9b9849f65a3a2b2cf7f6ffa580 Mon Sep 17 00:00:00 2001 From: Ramana Reddy Date: Fri, 12 May 2023 10:51:51 +0530 Subject: [PATCH 3/4] headers in headless mode added with #437 --- pkg/engine/hybrid/crawl.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/pkg/engine/hybrid/crawl.go b/pkg/engine/hybrid/crawl.go index dafe4946..bd0ce972 100644 --- a/pkg/engine/hybrid/crawl.go +++ b/pkg/engine/hybrid/crawl.go @@ -61,9 +61,6 @@ func (c *Crawler) navigateRequest(s *common.CrawlSession, request *navigation.Re statucCodeText = http.StatusText(statusCode) } httpreq, _ := http.NewRequest(e.Request.Method, URL.String(), strings.NewReader(e.Request.PostData)) - for k, v := range c.Headers { - httpreq.Header.Set(k, v) - } httpresp := &http.Response{ Proto: "HTTP/1.1", ProtoMajor: 1, From f990521353316c42e4e8a97b70ad5d2e9d2afbc2 Mon Sep 17 00:00:00 2001 From: Ramana Reddy Date: Tue, 16 May 2023 13:28:35 +0530 Subject: [PATCH 4/4] fix nil pointer error --- pkg/engine/hybrid/crawl.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pkg/engine/hybrid/crawl.go b/pkg/engine/hybrid/crawl.go index 966ac8a3..7cd3331a 100644 --- a/pkg/engine/hybrid/crawl.go +++ b/pkg/engine/hybrid/crawl.go @@ -163,6 +163,10 @@ func (c *Crawler) navigateRequest(s *common.CrawlSession, request *navigation.Re if err != nil { return nil, errorutil.NewWithTag("hybrid", "url could not be parsed").Wrap(err) } + + if response.Resp == nil { + return nil, errorutil.NewWithTag("hybrid", "response is nil").Wrap(err) + } response.Resp.Request.URL = parsed.URL // Create a copy of intrapolated shadow DOM elements and parse them separately