From 605ed466a179e5923d9723a7d3165cbf01f1e6d6 Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Thu, 23 Jan 2020 11:19:25 -0500 Subject: [PATCH] Closes #5175: Add Glean keys for distinguishing between fatal and non-fatal native crashes --- components/lib/crash/README.md | 2 +- components/lib/crash/docs/metrics.md | 2 +- components/lib/crash/metrics.yaml | 7 +- .../mozilla/components/lib/crash/Crash.kt | 2 + .../service/GleanCrashReporterService.kt | 32 +++++-- .../service/GleanCrashReporterServiceTest.kt | 87 +++++++++++++++---- docs/changelog.md | 3 + 7 files changed, 104 insertions(+), 31 deletions(-) diff --git a/components/lib/crash/README.md b/components/lib/crash/README.md index 7ddf958f577..f0c14ed1e73 100644 --- a/components/lib/crash/README.md +++ b/components/lib/crash/README.md @@ -104,7 +104,7 @@ CrashReporter( ### Sending crash reports to Glean [Glean](https://docs.telemetry.mozilla.org/concepts/glean/glean.html) is a new way to collect telemetry by Mozilla. -This will record crash counts as a labeled counter with each label corresponding to a specific type of crash (`native_code_crash`, and `unhandled_exception`, currently). +This will record crash counts as a labeled counter with each label corresponding to a specific type of crash (`fatal_native_code_crash`, `nonfatal_native_code_crash`, `caught_exception`, `uncaught_exception`, currently). The list of collected metrics is available in the [metrics.yaml file](metrics.yaml), with their documentation [living here](docs/metrics.md). Due to the fact that Glean can only be recorded to in the main process and lib-crash runs in a separate process when it runs to handle the crash, lib-crash persists the data in a file format and then reads and records the data from the main process when the application is next run since the `GleanCrashReporterService` diff --git a/components/lib/crash/docs/metrics.md b/components/lib/crash/docs/metrics.md index e711e58fbca..a16be2ba4bc 100644 --- a/components/lib/crash/docs/metrics.md +++ b/components/lib/crash/docs/metrics.md @@ -17,7 +17,7 @@ The following metrics are added to the ping: | Name | Type | Description | Data reviews | Extras | Expiration | | --- | --- | --- | --- | --- | --- | -| crash_metrics.crash_count |[labeled_counter](https://mozilla.github.io/glean/book/user/metrics/labeled_counters.html) |Counts the number of crashes that occur in the application. This measures only the counts of each crash in association with the labeled type of the crash. The labels correspond to the types of crashes handled by lib-crash. |[1](https://bugzilla.mozilla.org/show_bug.cgi?id=1553935#c3)||never | +| crash_metrics.crash_count |[labeled_counter](https://mozilla.github.io/glean/book/user/metrics/labeled_counters.html) |Counts the number of crashes that occur in the application. This measures only the counts of each crash in association with the labeled type of the crash. The labels correspond to the types of crashes handled by lib-crash. |[1](https://bugzilla.mozilla.org/show_bug.cgi?id=1553935#c3)||never | diff --git a/components/lib/crash/metrics.yaml b/components/lib/crash/metrics.yaml index a8a55fffa95..f5bf29bf991 100644 --- a/components/lib/crash/metrics.yaml +++ b/components/lib/crash/metrics.yaml @@ -15,14 +15,19 @@ crash_metrics: Counts the number of crashes that occur in the application. This measures only the counts of each crash in association with the labeled type of the crash. The labels correspond to the types of crashes handled by lib-crash. + + Deprecated: native_code_crash replaced by fatal_native_code_crash and nonfatal_native_code_crash labels: - uncaught_exception - caught_exception - - native_code_crash + - fatal_native_code_crash + - nonfatal_native_code_crash bugs: - https://bugzilla.mozilla.org/1553935 + - https://github.com/mozilla-mobile/android-components/issues/5175 data_reviews: - https://bugzilla.mozilla.org/show_bug.cgi?id=1553935#c3 + - https://github.com/mozilla-mobile/android-components/pull/5700#pullrequestreview-347721248 notification_emails: - android-components-team@mozilla.com expires: never diff --git a/components/lib/crash/src/main/java/mozilla/components/lib/crash/Crash.kt b/components/lib/crash/src/main/java/mozilla/components/lib/crash/Crash.kt index 11486361017..46e4d59538c 100644 --- a/components/lib/crash/src/main/java/mozilla/components/lib/crash/Crash.kt +++ b/components/lib/crash/src/main/java/mozilla/components/lib/crash/Crash.kt @@ -31,6 +31,7 @@ sealed class Crash { * A crash caused by an uncaught exception. * * @property throwable The [Throwable] that caused the crash. + * @property breadcrumbs List of breadcrumbs to send with the crash report. */ data class UncaughtExceptionCrash( val throwable: Throwable, @@ -61,6 +62,7 @@ sealed class Crash { * @property isFatal Whether or not the crash was fatal or not: If true, the main application process was affected * by the crash. If false, only an internal process used by Gecko has crashed and the application * may be able to recover. + * @property breadcrumbs List of breadcrumbs to send with the crash report. */ data class NativeCodeCrash( val minidumpPath: String?, diff --git a/components/lib/crash/src/main/java/mozilla/components/lib/crash/service/GleanCrashReporterService.kt b/components/lib/crash/src/main/java/mozilla/components/lib/crash/service/GleanCrashReporterService.kt index d70f31d8399..94a9584ff6a 100644 --- a/components/lib/crash/src/main/java/mozilla/components/lib/crash/service/GleanCrashReporterService.kt +++ b/components/lib/crash/src/main/java/mozilla/components/lib/crash/service/GleanCrashReporterService.kt @@ -35,7 +35,8 @@ class GleanCrashReporterService( // as the persisted crashes in the crash count file (see above comment) const val UNCAUGHT_EXCEPTION_KEY = "uncaught_exception" const val CAUGHT_EXCEPTION_KEY = "caught_exception" - const val NATIVE_CODE_CRASH_KEY = "native_code_crash" + const val FATAL_NATIVE_CODE_CRASH_KEY = "fatal_native_code_crash" + const val NONFATAL_NATIVE_CODE_CRASH_KEY = "nonfatal_native_code_crash" } private val logger = Logger("glean/GleanCrashReporterService") @@ -90,7 +91,8 @@ class GleanCrashReporterService( /** * Parses the crashes collected in the persisted crash file. The format of this file is simple, - * each line may contain [UNCAUGHT_EXCEPTION_KEY], [CAUGHT_EXCEPTION_KEY], or [NATIVE_CODE_CRASH_KEY] + * each line may contain [UNCAUGHT_EXCEPTION_KEY], [CAUGHT_EXCEPTION_KEY], + * [FATAL_NATIVE_CODE_CRASH_KEY] or [NONFATAL_NATIVE_CODE_CRASH_KEY] * followed by a newline character. * * Example: @@ -98,21 +100,24 @@ class GleanCrashReporterService( * <--Beginning of file--> * uncaught_exception\n * uncaught_exception\n - * native_code_crash\n + * fatal_native_code_crash\n * uncaught_exception\n * caught_exception\n + * nonfatal_native_code_crash\n * <--End of file--> * * It is unlikely that there will be more than one crash in a file, but not impossible. This * could happen, for instance, if the application crashed again before the file could be * processed. */ + @Suppress("ComplexMethod") @VisibleForTesting(otherwise = VisibleForTesting.PRIVATE) internal fun parseCrashFile() { val lines = file.readLines() var uncaughtExceptionCount = 0 var caughtExceptionCount = 0 - var nativeCodeCrashCount = 0 + var fatalNativeCodeCrashCount = 0 + var nonfatalNativeCodeCrashCount = 0 // It's possible that there was more than one crash recorded in the file so process each // line and accumulate the crash counts. @@ -120,7 +125,8 @@ class GleanCrashReporterService( when (line) { UNCAUGHT_EXCEPTION_KEY -> ++uncaughtExceptionCount CAUGHT_EXCEPTION_KEY -> ++caughtExceptionCount - NATIVE_CODE_CRASH_KEY -> ++nativeCodeCrashCount + FATAL_NATIVE_CODE_CRASH_KEY -> ++fatalNativeCodeCrashCount + NONFATAL_NATIVE_CODE_CRASH_KEY -> ++nonfatalNativeCodeCrashCount } } @@ -131,8 +137,11 @@ class GleanCrashReporterService( if (caughtExceptionCount > 0) { CrashMetrics.crashCount[CAUGHT_EXCEPTION_KEY].add(caughtExceptionCount) } - if (nativeCodeCrashCount > 0) { - CrashMetrics.crashCount[NATIVE_CODE_CRASH_KEY].add(nativeCodeCrashCount) + if (fatalNativeCodeCrashCount > 0) { + CrashMetrics.crashCount[FATAL_NATIVE_CODE_CRASH_KEY].add(fatalNativeCodeCrashCount) + } + if (nonfatalNativeCodeCrashCount > 0) { + CrashMetrics.crashCount[NONFATAL_NATIVE_CODE_CRASH_KEY].add(nonfatalNativeCodeCrashCount) } } @@ -145,7 +154,8 @@ class GleanCrashReporterService( * that this would be called from more than one place at the same time. * * @param crash Pass in the correct crash label to write to the file - * [UNCAUGHT_EXCEPTION_KEY], [CAUGHT_EXCEPTION_KEY] or [NATIVE_CODE_CRASH_KEY] + * [UNCAUGHT_EXCEPTION_KEY], [CAUGHT_EXCEPTION_KEY], [FATAL_NATIVE_CODE_CRASH_KEY] + * or [NONFATAL_NATIVE_CODE_CRASH_KEY] */ @VisibleForTesting(otherwise = VisibleForTesting.PRIVATE) internal fun reportCrash(crash: String) { @@ -168,7 +178,11 @@ class GleanCrashReporterService( } override fun report(crash: Crash.NativeCodeCrash) { - reportCrash(NATIVE_CODE_CRASH_KEY) + if (crash.isFatal) { + reportCrash(FATAL_NATIVE_CODE_CRASH_KEY) + } else { + reportCrash(NONFATAL_NATIVE_CODE_CRASH_KEY) + } } override fun report(throwable: Throwable) { diff --git a/components/lib/crash/src/test/java/mozilla/components/lib/crash/service/GleanCrashReporterServiceTest.kt b/components/lib/crash/src/test/java/mozilla/components/lib/crash/service/GleanCrashReporterServiceTest.kt index 8bd0488deb1..db84a99a398 100644 --- a/components/lib/crash/src/test/java/mozilla/components/lib/crash/service/GleanCrashReporterServiceTest.kt +++ b/components/lib/crash/src/test/java/mozilla/components/lib/crash/service/GleanCrashReporterServiceTest.kt @@ -30,12 +30,51 @@ class GleanCrashReporterServiceTest { val gleanRule = GleanTestRule(context) @Test - fun `GleanCrashReporterService records native code crashes`() { + fun `GleanCrashReporterService records fatal native code crashes`() { // Because of how Glean is implemented, it can potentially persist information between // tests or even between test classes, so we compensate by capturing the initial value // to compare to. val initialValue = try { - CrashMetrics.crashCount[GleanCrashReporterService.NATIVE_CODE_CRASH_KEY].testGetValue() + CrashMetrics.crashCount[GleanCrashReporterService.FATAL_NATIVE_CODE_CRASH_KEY].testGetValue() + } catch (e: NullPointerException) { + 0 + } + + run { + val service = spy(GleanCrashReporterService(context)) + + assertFalse("No previous persisted crashes must exist", service.file.exists()) + + val crash = Crash.NativeCodeCrash("", true, "", true, arrayListOf()) + service.report(crash) + + verify(service).report(crash) + + assertTrue("Persistence file must exist", service.file.exists()) + val lines = service.file.readLines() + assertEquals("Must be native code crash", + GleanCrashReporterService.FATAL_NATIVE_CODE_CRASH_KEY, lines.first()) + } + + // Initialize a fresh GleanCrashReporterService and ensure metrics are recorded in Glean + run { + GleanCrashReporterService(context) + + assertTrue("Glean must record a value", + CrashMetrics.crashCount[GleanCrashReporterService.FATAL_NATIVE_CODE_CRASH_KEY].testHasValue()) + assertEquals("Glean must record correct value", + 1, + CrashMetrics.crashCount[GleanCrashReporterService.FATAL_NATIVE_CODE_CRASH_KEY].testGetValue() - initialValue) + } + } + + @Test + fun `GleanCrashReporterService records non-fatal native code crashes`() { + // Because of how Glean is implemented, it can potentially persist information between + // tests or even between test classes, so we compensate by capturing the initial value + // to compare to. + val initialValue = try { + CrashMetrics.crashCount[GleanCrashReporterService.NONFATAL_NATIVE_CODE_CRASH_KEY].testGetValue() } catch (e: NullPointerException) { 0 } @@ -53,7 +92,7 @@ class GleanCrashReporterServiceTest { assertTrue("Persistence file must exist", service.file.exists()) val lines = service.file.readLines() assertEquals("Must be native code crash", - GleanCrashReporterService.NATIVE_CODE_CRASH_KEY, lines.first()) + GleanCrashReporterService.NONFATAL_NATIVE_CODE_CRASH_KEY, lines.first()) } // Initialize a fresh GleanCrashReporterService and ensure metrics are recorded in Glean @@ -61,10 +100,10 @@ class GleanCrashReporterServiceTest { GleanCrashReporterService(context) assertTrue("Glean must record a value", - CrashMetrics.crashCount[GleanCrashReporterService.NATIVE_CODE_CRASH_KEY].testHasValue()) + CrashMetrics.crashCount[GleanCrashReporterService.NONFATAL_NATIVE_CODE_CRASH_KEY].testHasValue()) assertEquals("Glean must record correct value", 1, - CrashMetrics.crashCount[GleanCrashReporterService.NATIVE_CODE_CRASH_KEY].testGetValue() - initialValue) + CrashMetrics.crashCount[GleanCrashReporterService.NONFATAL_NATIVE_CODE_CRASH_KEY].testGetValue() - initialValue) } } @@ -151,8 +190,14 @@ class GleanCrashReporterServiceTest { } catch (e: NullPointerException) { 0 } - val initialNativeCrashValue = try { - CrashMetrics.crashCount[GleanCrashReporterService.NATIVE_CODE_CRASH_KEY].testGetValue() + val initialFatalNativeCrashValue = try { + CrashMetrics.crashCount[GleanCrashReporterService.FATAL_NATIVE_CODE_CRASH_KEY].testGetValue() + } catch (e: NullPointerException) { + 0 + } + + val initialNonfatalNativeCrashValue = try { + CrashMetrics.crashCount[GleanCrashReporterService.NONFATAL_NATIVE_CODE_CRASH_KEY].testGetValue() } catch (e: NullPointerException) { 0 } @@ -163,13 +208,14 @@ class GleanCrashReporterServiceTest { assertFalse("No previous persisted crashes must exist", service.file.exists()) val uncaughtExceptionCrash = Crash.UncaughtExceptionCrash(RuntimeException("Test"), arrayListOf()) - val nativeCodeCrash = Crash.NativeCodeCrash("", true, "", false, arrayListOf()) + val fatalNativeCodeCrash = Crash.NativeCodeCrash("", true, "", true, arrayListOf()) + val nonfatalNativeCodeCrash = Crash.NativeCodeCrash("", true, "", false, arrayListOf()) // Record some crashes service.report(uncaughtExceptionCrash) - service.report(nativeCodeCrash) + service.report(fatalNativeCodeCrash) service.report(uncaughtExceptionCrash) - service.report(nativeCodeCrash) + service.report(nonfatalNativeCodeCrash) // Make sure the file exists assertTrue("Persistence file must exist", service.file.exists()) @@ -180,11 +226,11 @@ class GleanCrashReporterServiceTest { assertEquals("First element must be uncaught exception", GleanCrashReporterService.UNCAUGHT_EXCEPTION_KEY, lines[0]) assertEquals("Second element must be native code crash", - GleanCrashReporterService.NATIVE_CODE_CRASH_KEY, lines[1]) + GleanCrashReporterService.FATAL_NATIVE_CODE_CRASH_KEY, lines[1]) assertEquals("First element must be uncaught exception", GleanCrashReporterService.UNCAUGHT_EXCEPTION_KEY, lines[2]) assertEquals("Second element must be native code crash", - GleanCrashReporterService.NATIVE_CODE_CRASH_KEY, lines[3]) + GleanCrashReporterService.NONFATAL_NATIVE_CODE_CRASH_KEY, lines[3]) } // Initialize a fresh GleanCrashReporterService and ensure metrics are recorded in Glean @@ -197,8 +243,11 @@ class GleanCrashReporterServiceTest { 2, CrashMetrics.crashCount[GleanCrashReporterService.UNCAUGHT_EXCEPTION_KEY].testGetValue() - initialExceptionValue) assertEquals("Glean must record correct value", - 2, - CrashMetrics.crashCount[GleanCrashReporterService.NATIVE_CODE_CRASH_KEY].testGetValue() - initialNativeCrashValue) + 1, + CrashMetrics.crashCount[GleanCrashReporterService.FATAL_NATIVE_CODE_CRASH_KEY].testGetValue() - initialFatalNativeCrashValue) + assertEquals("Glean must record correct value", + 1, + CrashMetrics.crashCount[GleanCrashReporterService.NONFATAL_NATIVE_CODE_CRASH_KEY].testGetValue() - initialNonfatalNativeCrashValue) } } @@ -225,7 +274,7 @@ class GleanCrashReporterServiceTest { // tests or even between test classes, so we compensate by capturing the initial value // to compare to. val initialValue = try { - CrashMetrics.crashCount[GleanCrashReporterService.NATIVE_CODE_CRASH_KEY].testGetValue() + CrashMetrics.crashCount[GleanCrashReporterService.FATAL_NATIVE_CODE_CRASH_KEY].testGetValue() } catch (e: NullPointerException) { 0 } @@ -235,7 +284,7 @@ class GleanCrashReporterServiceTest { assertFalse("No previous persisted crashes must exist", service.file.exists()) - val crash = Crash.NativeCodeCrash("", true, "", false, arrayListOf()) + val crash = Crash.NativeCodeCrash("", true, "", true, arrayListOf()) service.report(crash) verify(service).report(crash) @@ -247,7 +296,7 @@ class GleanCrashReporterServiceTest { val lines = service.file.readLines() assertEquals("First must be native code crash", - GleanCrashReporterService.NATIVE_CODE_CRASH_KEY, lines.first()) + GleanCrashReporterService.FATAL_NATIVE_CODE_CRASH_KEY, lines.first()) assertEquals("bad data in here", lines[1]) } @@ -255,10 +304,10 @@ class GleanCrashReporterServiceTest { GleanCrashReporterService(context) assertTrue("Glean must record a value", - CrashMetrics.crashCount[GleanCrashReporterService.NATIVE_CODE_CRASH_KEY].testHasValue()) + CrashMetrics.crashCount[GleanCrashReporterService.FATAL_NATIVE_CODE_CRASH_KEY].testHasValue()) assertEquals("Glean must record correct value", 1, - CrashMetrics.crashCount[GleanCrashReporterService.NATIVE_CODE_CRASH_KEY].testGetValue() - initialValue) + CrashMetrics.crashCount[GleanCrashReporterService.FATAL_NATIVE_CODE_CRASH_KEY].testGetValue() - initialValue) } } } diff --git a/docs/changelog.md b/docs/changelog.md index 4e12a563134..f0e02865cc1 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -20,6 +20,9 @@ permalink: /changelog/ * Added support for loading images into error pages with `createUrlEncodedErrorPage`. These error pages load dynamically with javascript by parsing params in the URL * ⚠️ To use custom HTML & CSS with image error pages, resources **must** be located in the assets folder +* **lib-crash** + * Glean reports now distinguishes between fatal and non-fatal native code crashes. + # 28.0.0 * [Commits](https://github.com/mozilla-mobile/android-components/compare/v27.0.0...v28.0.0)