Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

caddyhttp: Smarter path matching and rewriting #4948

Merged
merged 15 commits into from
Aug 16, 2022
Merged
Prev Previous commit
Next Next commit
caddyhttp: Support %* wildcard in path matcher
This allows matching spans of raw/URI-escaped portions of the path.
mholt committed Aug 10, 2022

Verified

This commit was signed with the committer’s verified signature.
mholt Matt Holt
commit df254da43e19fd484544ea28ab3454031996f3ce
148 changes: 123 additions & 25 deletions modules/caddyhttp/matchers.go
Original file line number Diff line number Diff line change
@@ -67,9 +67,9 @@ type (
// matching is exact, not prefix-based, giving you more control and clarity
// over matching. Wildcards (`*`) may be used:
//
// - At the end, for a prefix match (`/prefix/*`)
// - At the beginning, for a suffix match (`*.suffix`)
// - On both sides, for a substring match (`*/contains/*`)
// - At the end only, for a prefix match (`/prefix/*`)
// - At the beginning only, for a suffix match (`*.suffix`)
// - On both sides only, for a substring match (`*/contains/*`)
// - In the middle, for a globular match (`/accounts/*/info`)
//
// Slashes are significant; i.e. `/foo*` matches `/foo`, `/foo/`, `/foo/bar`,
@@ -82,6 +82,11 @@ type (
// possible security issues, as all request paths will be normalized to
// their unescaped forms before matcher evaluation.
//
// Even though path matching is done in normalized space, the special
// wildcard `%*` may be used in place of a span that should NOT be decoded;
// that is, `/bands/%*` will match `/bands/AC%2fDC` whereas `/bands/*`
// will not.
//
// This matcher is fast, so it does not support regular expressions or
// capture groups. For slower but more powerful matching, use the
// path_regexp matcher.
@@ -355,34 +360,45 @@ func (MatchPath) CaddyModule() caddy.ModuleInfo {
// Provision lower-cases the paths in m to ensure case-insensitive matching.
func (m MatchPath) Provision(_ caddy.Context) error {
for i := range m {
// TODO: if m[i] == "*", put it first and delete all others (will always match)
m[i] = strings.ToLower(m[i])
}
return nil
}

// Match returns true if r matches m.
func (m MatchPath) Match(r *http.Request) bool {
// Even though RFC 9110 says that path matching is case-sensitive
// (https://www.rfc-editor.org/rfc/rfc9110.html#section-4.2.3),
// we do case in-sensitive matching to mitigate security issues
// related to differences between operating systems, applications,
// etc; if case-sensitive matching is needed, the regex matcher
// can be used instead.
lowerPath := strings.ToLower(r.URL.Path)

// Clean the path, merges doubled slashes, etc.
// This ensures maliciously crafted requests can't bypass
// the path matcher. See #4407
// the path matcher. See #4407. Good security posture
// suggests that we should do all we can to reduce any
// funny-looking paths into "normalized" forms such that
// weird variants can't sneak by.
lowerPath = path.Clean(lowerPath)

// see #2917; Windows ignores trailing dots and spaces
// See #2917; Windows ignores trailing dots and spaces
// when accessing files (sigh), potentially causing a
// security risk (cry) if PHP files end up being served
// as static files, exposing the source code, instead of
// being matched by *.php to be treated as PHP scripts
// being matched by *.php to be treated as PHP scripts.
lowerPath = strings.TrimRight(lowerPath, ". ")

// Cleaning may remove the trailing slash, but we want to keep it
// Cleaning may remove the trailing slash, but we want to keep it.
if lowerPath != "/" && strings.HasSuffix(r.URL.Path, "/") {
lowerPath = lowerPath + "/"
}

repl := r.Context().Value(caddy.ReplacerCtxKey).(*caddy.Replacer)

outer:
for _, matchPath := range m {
matchPath = repl.ReplaceAll(matchPath, "")

@@ -392,38 +408,120 @@ func (m MatchPath) Match(r *http.Request) bool {
return true
}

// if special wildcard "%*" is used, the span it represents is
// compared in the escaped space instead of normalized space;
// we have to do this ourselves, since only part of the path
// should be URI-decoded in this case; we iterate the pattern
// and escaped path in lock-step, decoding the escaped path
// as we go, but not decoding it in spans represented by %*.
if strings.Contains(matchPath, "%*") {
escapedPath := r.URL.EscapedPath()
var sb strings.Builder

var iPattern, iPath int
for {
if iPattern >= len(matchPath) || iPath >= len(escapedPath) {
break
}

pathCh := string(escapedPath[iPath])

// normalize/decode escape sequence
if pathCh == "%" {
var err error
pathCh, err = url.PathUnescape(escapedPath[iPath : iPath+2])
if err != nil {
// should be impossible unless EscapedPath() is giving us an invalid sequence!
continue outer
}
iPath += 2
}

normalize := true

switch matchPath[iPattern] {
case '%':
normalize = false
iPattern++
fallthrough
case '*':
remaining := escapedPath[iPath:]
until := len(escapedPath) - iPath // go until end of string...
if iPattern < len(matchPath)-1 { // ...unless the * is not at the end
nextCh := matchPath[iPattern+1]
until = strings.IndexByte(remaining, nextCh)
}
next := remaining[:until]
if normalize {
var err error
next, err = url.PathUnescape(next)
if err != nil {
continue outer // should be impossible anyway
}
}
sb.WriteString(next)
iPath += until
default:
sb.WriteString(pathCh)
iPath++
}

iPattern++
}

// we can now treat rawpath globs (%*) as regular globs (*)
matchPath = strings.ReplaceAll(matchPath, "%*", "*")

matches, _ := filepath.Match(matchPath, sb.String())
if matches {
return true
}

// doing prefix/suffix/substring matches doesn't make sense
continue
}

// for substring, prefix, and suffix matching, only perform those
// special, fast matches if they are the only wildcards in the pattern;
// otherwise we assume a globular match if any * appears in the middle

// special case: first and last characters are wildcard,
// treat it as a fast substring match
if len(matchPath) > 1 &&
if strings.Count(matchPath, "*") == 2 &&
strings.HasPrefix(matchPath, "*") &&
strings.HasSuffix(matchPath, "*") {
strings.HasSuffix(matchPath, "*") &&
strings.Count(matchPath, "*") == 2 {
if strings.Contains(lowerPath, matchPath[1:len(matchPath)-1]) {
return true
}
continue
}

// special case: first character is a wildcard,
// treat it as a fast suffix match
if strings.HasPrefix(matchPath, "*") {
if strings.HasSuffix(lowerPath, matchPath[1:]) {
return true
// only perform prefix/suffix match if it is the only wildcard...
// I think that is more correct most of the time
if strings.Count(matchPath, "*") == 1 {
// special case: first character is a wildcard,
// treat it as a fast suffix match
if strings.HasPrefix(matchPath, "*") {
if strings.HasSuffix(lowerPath, matchPath[1:]) {
return true
}
continue
}
continue
}

// special case: last character is a wildcard,
// treat it as a fast prefix match
if strings.HasSuffix(matchPath, "*") {
if strings.HasPrefix(lowerPath, matchPath[:len(matchPath)-1]) {
return true
// special case: last character is a wildcard,
// treat it as a fast prefix match
if strings.HasSuffix(matchPath, "*") {
if strings.HasPrefix(lowerPath, matchPath[:len(matchPath)-1]) {
return true
}
continue
}
continue
}

// for everything else, try globular matching, which also
// is exact matching if there are no glob/wildcard chars;
// can ignore error here because we can't handle it anyway
// at last, use globular matching, which also is exact matching
// if there are no glob/wildcard chars; we ignore the error here
// because we can't handle it anyway
matches, _ := filepath.Match(matchPath, lowerPath)
if matches {
return true
28 changes: 28 additions & 0 deletions modules/caddyhttp/matchers_test.go
Original file line number Diff line number Diff line change
@@ -304,6 +304,9 @@ func TestPathMatcher(t *testing.T) {
input: "/%25%40.txt",
expect: true,
},
// these next two tests may be unintuitive, but path matching
// takes place in normalized (unescaped) space, so the pattern
// should be written in that space
{
match: MatchPath{"/%25@.txt"},
input: "/%25@.txt",
@@ -314,6 +317,31 @@ func TestPathMatcher(t *testing.T) {
input: "/%25%40.txt",
expect: false,
},
{
match: MatchPath{"/bands/*/*"},
input: "/bands/AC%2FDC/T.N.T",
expect: false,
},
{
match: MatchPath{"/bands/%*/%*"},
input: "/bands/AC%2FDC/T.N.T",
expect: true,
},
{
match: MatchPath{"/bands/%*/%*"},
input: "/bands/AC/DC/T.N.T",
expect: false,
},
{
match: MatchPath{"/bands/%*"},
input: "/bands/AC/DC",
expect: false, // not a suffix match
},
{
match: MatchPath{"/bands/%*"},
input: "/bands/AC%2FDC",
expect: true,
},
} {
err := tc.match.Provision(caddy.Context{})
if err == nil && tc.provisionErr {