From 40750ddb817b77cb5905872fb0211024c9643471 Mon Sep 17 00:00:00 2001 From: Sanjay Ghemawat Date: Mon, 9 Jan 2023 09:08:44 -0800 Subject: [PATCH] Speed up in the presence of timeouts. Currently regexp2 can be quite slow when a MatchTimeout is supplied (a micro-benchmark shows 2300ns compared to 34ns when no timeout is supplied). This slowdown is caused by repeated timeout checks which call time.Now(). The new approach introduces a fast but approximate clock that is just an atomic variable updated by a goroutine once very 10ms. The timeout check can now compare this variable. Removed "timeout check skip" mechanism since a timeout check is now very cheap. Added a simple micro-benchmark that compares the speed of searching 100 byte text with and without a timeout. Performance impact: 1. A micro-benchmark that looks for an "easy" regexp in a 100 byte string goes from ~2300ns to ~34ns. 2. Chrome (syntax highlighter) speeds up from ~500ms to ~50ms on a 24KB source file. --- fastclock.go | 104 +++++++++++++++++++++++++++++++++++++ fastclock_test.go | 33 ++++++++++++ regexp_performance_test.go | 23 ++++++++ runner.go | 35 ++----------- 4 files changed, 165 insertions(+), 30 deletions(-) create mode 100644 fastclock.go create mode 100644 fastclock_test.go diff --git a/fastclock.go b/fastclock.go new file mode 100644 index 0000000..d0abd44 --- /dev/null +++ b/fastclock.go @@ -0,0 +1,104 @@ +package regexp2 + +import ( + "sync" + "sync/atomic" + "time" +) + +// fasttime holds a time value (ticks since clock initialization) +type fasttime int64 + +// fastclock provides a fast clock implementation. +// +// A background goroutine periodically stores the current time +// into an atomic variable. +// +// A deadline can be quickly checked for expiration by comparing +// its value to the clock stored in the atomic variable. +// +// The goroutine automatically stops once clockEnd is reached. +// (clockEnd covers the largest deadline seen so far + some +// extra time). This ensures that if regexp2 with timeouts +// stops being used we will stop background work. +type fastclock struct { + // current and clockEnd can be read via atomic loads. + // Reads and writes of other fields require mu to be held. + mu sync.Mutex + + start time.Time // Time corresponding to fasttime(0) + current atomicTime // Current time (approximate) + clockEnd atomicTime // When clock updater is supposed to stop (>= any existing deadline) + running bool // Is a clock updater running? +} + +var fast fastclock + +// reached returns true if current time is at or past t. +func (t fasttime) reached() bool { + return fast.current.read() >= t +} + +// makeDeadline returns a time that is approximately time.Now().Add(d) +func makeDeadline(d time.Duration) fasttime { + // Increase the deadline since the clock we are reading may be + // just about to tick forwards. + end := fast.current.read() + durationToTicks(d+clockPeriod) + + // Start or extend clock if necessary. + if end > fast.clockEnd.read() { + extendClock(end) + } + return end +} + +// extendClock ensures that clock is live and will run until at least end. +func extendClock(end fasttime) { + fast.mu.Lock() + defer fast.mu.Unlock() + + if fast.start.IsZero() { + fast.start = time.Now() + } + + // Extend the running time to cover end as well as a bit of slop. + if shutdown := end + durationToTicks(time.Second); shutdown > fast.clockEnd.read() { + fast.clockEnd.write(shutdown) + } + + // Start clock if necessary + if !fast.running { + fast.running = true + go runClock() + } +} + +func durationToTicks(d time.Duration) fasttime { + // Downscale nanoseconds to approximately a millisecond so that we can avoid + // overflow even if the caller passes in math.MaxInt64. + return fasttime(d) >> 20 +} + +// clockPeriod is the approximate interval between updates of approximateClock. +const clockPeriod = time.Millisecond * 10 + +func runClock() { + fast.mu.Lock() + defer fast.mu.Unlock() + + for fast.current.read() <= fast.clockEnd.read() { + // Unlock while sleeping. + fast.mu.Unlock() + time.Sleep(clockPeriod) + fast.mu.Lock() + + newTime := durationToTicks(time.Since(fast.start)) + fast.current.write(newTime) + } + fast.running = false +} + +type atomicTime struct{ v int64 } // Should change to atomic.Int64 when we can use go 1.19 + +func (t *atomicTime) read() fasttime { return fasttime(atomic.LoadInt64(&t.v)) } +func (t *atomicTime) write(v fasttime) { atomic.StoreInt64(&t.v, int64(v)) } diff --git a/fastclock_test.go b/fastclock_test.go new file mode 100644 index 0000000..29c950c --- /dev/null +++ b/fastclock_test.go @@ -0,0 +1,33 @@ +package regexp2 + +import ( + "fmt" + "testing" + "time" +) + +func TestDeadline(t *testing.T) { + for _, delay := range []time.Duration{ + clockPeriod / 10, + clockPeriod, + clockPeriod * 5, + clockPeriod * 10, + } { + t.Run(fmt.Sprint(delay), func(t *testing.T) { + t.Parallel() + start := time.Now() + d := makeDeadline(delay) + if d.reached() { + t.Fatalf("deadline (%v) unexpectedly expired immediately", delay) + } + time.Sleep(delay / 2) + if d.reached() { + t.Fatalf("deadline (%v) expired too soon (after %v)", delay, time.Since(start)) + } + time.Sleep(delay/2 + 2*clockPeriod) // Give clock time to tick + if !d.reached() { + t.Fatalf("deadline (%v) did not expire within %v", delay, time.Since(start)) + } + }) + } +} diff --git a/regexp_performance_test.go b/regexp_performance_test.go index 01a87d0..c955969 100644 --- a/regexp_performance_test.go +++ b/regexp_performance_test.go @@ -3,6 +3,7 @@ package regexp2 import ( "strings" "testing" + "time" ) func BenchmarkLiteral(b *testing.B) { @@ -305,3 +306,25 @@ func BenchmarkLeading(b *testing.B) { } } } + +func BenchmarkShortSearch(b *testing.B) { + for _, name := range []string{"no-timeout", "timeout"} { + b.Run(name, func(b *testing.B) { + b.StopTimer() + r := MustCompile(easy0, 0) + if name == "timeout" { + r.MatchTimeout = time.Second + } + t := makeText(100) + b.SetBytes(int64(len(t))) + b.StartTimer() + for i := 0; i < b.N; i++ { + if m, err := r.MatchRunes(t); m { + b.Fatal("match!") + } else if err != nil { + b.Fatalf("Err %v", err) + } + } + }) + } +} diff --git a/runner.go b/runner.go index 4d7f9b0..494dcef 100644 --- a/runner.go +++ b/runner.go @@ -58,10 +58,9 @@ type runner struct { runmatch *Match // result object - ignoreTimeout bool - timeout time.Duration // timeout in milliseconds (needed for actual) - timeoutChecksToSkip int - timeoutAt time.Time + ignoreTimeout bool + timeout time.Duration // timeout in milliseconds (needed for actual) + deadline fasttime operator syntax.InstOp codepos int @@ -1551,39 +1550,15 @@ func (r *runner) isECMABoundary(index, startpos, endpos int) bool { (index < endpos && syntax.IsECMAWordChar(r.runtext[index])) } -// this seems like a comment to justify randomly picking 1000 :-P -// We have determined this value in a series of experiments where x86 retail -// builds (ono-lab-optimized) were run on different pattern/input pairs. Larger values -// of TimeoutCheckFrequency did not tend to increase performance; smaller values -// of TimeoutCheckFrequency tended to slow down the execution. -const timeoutCheckFrequency int = 1000 - func (r *runner) startTimeoutWatch() { if r.ignoreTimeout { return } - - r.timeoutChecksToSkip = timeoutCheckFrequency - r.timeoutAt = time.Now().Add(r.timeout) + r.deadline = makeDeadline(r.timeout) } func (r *runner) checkTimeout() error { - if r.ignoreTimeout { - return nil - } - r.timeoutChecksToSkip-- - if r.timeoutChecksToSkip != 0 { - return nil - } - - r.timeoutChecksToSkip = timeoutCheckFrequency - return r.doCheckTimeout() -} - -func (r *runner) doCheckTimeout() error { - current := time.Now() - - if current.Before(r.timeoutAt) { + if r.ignoreTimeout || !r.deadline.reached() { return nil }