From 5e29e49f62276c8a49b135ba5d4cc71c4268477b Mon Sep 17 00:00:00 2001 From: Jan-Erik Rediger Date: Wed, 11 Nov 2020 20:55:42 +0100 Subject: [PATCH 1/4] Migrate an LMDB database to a safe-mode database if enabled --- glean-core/src/database/mod.rs | 362 ++++++++++++++++++++++++++++++++- 1 file changed, 361 insertions(+), 1 deletion(-) diff --git a/glean-core/src/database/mod.rs b/glean-core/src/database/mod.rs index eb25af7e25..97ddc69aba 100644 --- a/glean-core/src/database/mod.rs +++ b/glean-core/src/database/mod.rs @@ -27,12 +27,18 @@ mod backend { pub fn rkv_new(path: &Path) -> Result { Rkv::new::(path) } + + /// No migration necessary when staying with LMDB. + pub fn migrate(_path: &Path, _dst_env: &Rkv) { + // intentionally left empty + } } // Select the "safe mode" storage backend when the feature is activated. #[cfg(feature = "rkv-safe-mode")] mod backend { - use std::path::Path; + use rkv::migrator::Migrator; + use std::{fs, path::Path}; /// cbindgen:ignore pub type Rkv = rkv::Rkv; @@ -44,6 +50,91 @@ mod backend { pub fn rkv_new(path: &Path) -> Result { Rkv::new::(path) } + + fn delete_and_log(path: &Path, msg: &str) { + if let Err(err) = fs::remove_file(path) { + match err.kind() { + std::io::ErrorKind::NotFound => { + // silently drop this error, the file was already non-existing + } + _ => log::warn!("{}", msg), + } + } + } + + fn delete_lmdb_database(path: &Path) { + let datamdb = path.join("data.mdb"); + delete_and_log(&datamdb, "Failed to delete old data."); + + let lockmdb = path.join("lock.mdb"); + delete_and_log(&lockmdb, "Failed to delete old lock."); + } + + /// Migrate from LMDB storage to safe-mode storage. + /// + /// This migrates the data once, then deletes the LMDB storage. + /// The safe-mode storage must be empty for it to work. + /// Existing data will not be overwritten. + /// If the destination database is not empty the LMDB database is deleted + /// without migrating data. + /// This is a no-op if no LMDB database file exists. + pub fn migrate(path: &Path, dst_env: &Rkv) { + use rkv::{MigrateError, StoreError}; + + log::debug!("Migrating files in {}", path.display()); + + // Shortcut if no data to migrate is around. + let datamdb = path.join("data.mdb"); + if !datamdb.exists() { + log::debug!("No data to migrate."); + return; + } + + // We're handling the same error cases as `easy_migrate_lmdb_to_safe_mode`, + // but annotate each why they don't cause problems for Glean. + // Additionally for known cases we delete the LMDB database regardless. + let should_delete = + match Migrator::open_and_migrate_lmdb_to_safe_mode(path, |builder| builder, dst_env) { + // Source environment is corrupted. + // We start fresh with the new database. + Err(MigrateError::StoreError(StoreError::FileInvalid)) => true, + // Path not accessible. + // Somehow our directory vanished between us creating it and reading from it. + // Nothing we can do really. + Err(MigrateError::StoreError(StoreError::IoError(_))) => true, + // Path accessible but incompatible for configuration. + // This should not happen, we never used storages that safe-mode doesn't understand. + // If it does happen, let's start fresh and use the safe-mode from now on. + Err(MigrateError::StoreError(StoreError::UnsuitableEnvironmentPath(_))) => true, + // Nothing to migrate. + // Source database was empty. We just start fresh anyway. + Err(MigrateError::SourceEmpty) => true, + // Migrating would overwrite. + // Either a previous migration failed and we still started writing data, + // or someone placed back an old data file. + // In any case we better stay on the new data and delete the old one. + Err(MigrateError::DestinationNotEmpty) => { + log::warn!("Failed to migrate old data. Destination was not empty"); + true + } + // An internal lock was poisoned. + // This would only happen if multiple things run concurrently and one crashes. + Err(MigrateError::ManagerPoisonError) => false, + // Other store errors are never returned from the migrator. + // We need to handle them to please rustc. + Err(MigrateError::StoreError(_)) => false, + // Other errors can't happen, so this leaves us with the Ok case. + // This already deleted the LMDB files. + Ok(()) => false, + }; + + if should_delete { + log::debug!("Need to delete remaining LMDB files."); + delete_lmdb_database(&path); + } + + log::debug!("Migration ended. Safe-mode database in {}", path.display()); + } } use crate::metrics::Metric; @@ -167,6 +258,8 @@ impl Database { fs::create_dir_all(&path)?; let rkv = rkv_new(&path)?; + migrate(path, &rkv); + log::info!("Database initialized"); Ok(rkv) } @@ -1202,4 +1295,271 @@ mod test { }, ); } + + #[cfg(feature = "rkv-safe-mode")] + mod safe_mode_migration { + use super::*; + use rkv::Value; + + #[test] + fn migration_works_on_startup() { + let dir = tempdir().unwrap(); + let str_dir = dir.path().display().to_string(); + + let database_dir = dir.path().join("db"); + let datamdb = database_dir.join("data.mdb"); + let lockmdb = database_dir.join("lock.mdb"); + let safebin = database_dir.join("data.safe.bin"); + + assert!(!safebin.exists()); + assert!(!datamdb.exists()); + assert!(!lockmdb.exists()); + + let store_name = "store1"; + let metric_name = "bool"; + let key = Database::get_storage_key(store_name, Some(metric_name)); + + // Ensure some old data in the LMDB format exists. + { + fs::create_dir_all(&database_dir).expect("create dir"); + let rkv_db = rkv::Rkv::new::(&database_dir).expect("rkv env"); + + let store = rkv_db + .open_single("ping", StoreOptions::create()) + .expect("opened"); + let mut writer = rkv_db.write().expect("writer"); + let metric = Metric::Boolean(true); + let value = bincode::serialize(&metric).expect("serialized"); + store + .put(&mut writer, &key, &Value::Blob(&value)) + .expect("wrote"); + writer.commit().expect("committed"); + + assert!(datamdb.exists()); + assert!(lockmdb.exists()); + assert!(!safebin.exists()); + } + + // First open should migrate the data. + { + let db = Database::new(&str_dir, false).unwrap(); + let safebin = database_dir.join("data.safe.bin"); + assert!(safebin.exists(), "safe-mode file should exist"); + assert!(!datamdb.exists(), "LMDB data should be deleted"); + assert!(!lockmdb.exists(), "LMDB lock should be deleted"); + + let mut stored_metrics = vec![]; + let mut snapshotter = |name: &[u8], metric: &Metric| { + let name = str::from_utf8(name).unwrap().to_string(); + stored_metrics.push((name, metric.clone())) + }; + db.iter_store_from(Lifetime::Ping, "store1", None, &mut snapshotter); + + assert_eq!(1, stored_metrics.len()); + assert_eq!(metric_name, stored_metrics[0].0); + assert_eq!(&Metric::Boolean(true), &stored_metrics[0].1); + } + + // Next open should not re-create the LMDB files. + { + let db = Database::new(&str_dir, false).unwrap(); + let safebin = database_dir.join("data.safe.bin"); + assert!(safebin.exists(), "safe-mode file exists"); + assert!(!datamdb.exists(), "LMDB data should not be recreated"); + assert!(!lockmdb.exists(), "LMDB lock should not be recreated"); + + let mut stored_metrics = vec![]; + let mut snapshotter = |name: &[u8], metric: &Metric| { + let name = str::from_utf8(name).unwrap().to_string(); + stored_metrics.push((name, metric.clone())) + }; + db.iter_store_from(Lifetime::Ping, "store1", None, &mut snapshotter); + + assert_eq!(1, stored_metrics.len()); + assert_eq!(metric_name, stored_metrics[0].0); + assert_eq!(&Metric::Boolean(true), &stored_metrics[0].1); + } + } + + #[test] + fn migration_doesnt_overwrite() { + let dir = tempdir().unwrap(); + let str_dir = dir.path().display().to_string(); + + let database_dir = dir.path().join("db"); + let datamdb = database_dir.join("data.mdb"); + let lockmdb = database_dir.join("lock.mdb"); + let safebin = database_dir.join("data.safe.bin"); + + assert!(!safebin.exists()); + assert!(!datamdb.exists()); + assert!(!lockmdb.exists()); + + let store_name = "store1"; + let metric_name = "counter"; + let key = Database::get_storage_key(store_name, Some(metric_name)); + + // Ensure some old data in the LMDB format exists. + { + fs::create_dir_all(&database_dir).expect("create dir"); + let rkv_db = rkv::Rkv::new::(&database_dir).expect("rkv env"); + + let store = rkv_db + .open_single("ping", StoreOptions::create()) + .expect("opened"); + let mut writer = rkv_db.write().expect("writer"); + let metric = Metric::Counter(734); // this value will be ignored + let value = bincode::serialize(&metric).expect("serialized"); + store + .put(&mut writer, &key, &Value::Blob(&value)) + .expect("wrote"); + writer.commit().expect("committed"); + + assert!(datamdb.exists()); + assert!(lockmdb.exists()); + } + + // Ensure some data exists in the new database. + { + fs::create_dir_all(&database_dir).expect("create dir"); + let rkv_db = + rkv::Rkv::new::(&database_dir).expect("rkv env"); + + let store = rkv_db + .open_single("ping", StoreOptions::create()) + .expect("opened"); + let mut writer = rkv_db.write().expect("writer"); + let metric = Metric::Counter(2); + let value = bincode::serialize(&metric).expect("serialized"); + store + .put(&mut writer, &key, &Value::Blob(&value)) + .expect("wrote"); + writer.commit().expect("committed"); + + assert!(safebin.exists()); + } + + // First open should try migration and ignore it, because destination is not empty. + // It also deletes the leftover LMDB database. + { + let db = Database::new(&str_dir, false).unwrap(); + let safebin = database_dir.join("data.safe.bin"); + assert!(safebin.exists(), "safe-mode file should exist"); + assert!(!datamdb.exists(), "LMDB data should be deleted"); + assert!(!lockmdb.exists(), "LMDB lock should be deleted"); + + let mut stored_metrics = vec![]; + let mut snapshotter = |name: &[u8], metric: &Metric| { + let name = str::from_utf8(name).unwrap().to_string(); + stored_metrics.push((name, metric.clone())) + }; + db.iter_store_from(Lifetime::Ping, "store1", None, &mut snapshotter); + + assert_eq!(1, stored_metrics.len()); + assert_eq!(metric_name, stored_metrics[0].0); + assert_eq!(&Metric::Counter(2), &stored_metrics[0].1); + } + } + + #[test] + fn migration_ignores_broken_database() { + let dir = tempdir().unwrap(); + let str_dir = dir.path().display().to_string(); + + let database_dir = dir.path().join("db"); + let datamdb = database_dir.join("data.mdb"); + let lockmdb = database_dir.join("lock.mdb"); + let safebin = database_dir.join("data.safe.bin"); + + assert!(!safebin.exists()); + assert!(!datamdb.exists()); + assert!(!lockmdb.exists()); + + let store_name = "store1"; + let metric_name = "counter"; + let key = Database::get_storage_key(store_name, Some(metric_name)); + + // Ensure some old data in the LMDB format exists. + { + fs::create_dir_all(&database_dir).expect("create dir"); + fs::write(&datamdb, "bogus").expect("dbfile created"); + + assert!(datamdb.exists()); + } + + // Ensure some data exists in the new database. + { + fs::create_dir_all(&database_dir).expect("create dir"); + let rkv_db = + rkv::Rkv::new::(&database_dir).expect("rkv env"); + + let store = rkv_db + .open_single("ping", StoreOptions::create()) + .expect("opened"); + let mut writer = rkv_db.write().expect("writer"); + let metric = Metric::Counter(2); + let value = bincode::serialize(&metric).expect("serialized"); + store + .put(&mut writer, &key, &Value::Blob(&value)) + .expect("wrote"); + writer.commit().expect("committed"); + } + + // First open should try migration and ignore it, because destination is not empty. + // It also deletes the leftover LMDB database. + { + let db = Database::new(&str_dir, false).unwrap(); + let safebin = database_dir.join("data.safe.bin"); + assert!(safebin.exists(), "safe-mode file should exist"); + assert!(!datamdb.exists(), "LMDB data should be deleted"); + assert!(!lockmdb.exists(), "LMDB lock should be deleted"); + + let mut stored_metrics = vec![]; + let mut snapshotter = |name: &[u8], metric: &Metric| { + let name = str::from_utf8(name).unwrap().to_string(); + stored_metrics.push((name, metric.clone())) + }; + db.iter_store_from(Lifetime::Ping, "store1", None, &mut snapshotter); + + assert_eq!(1, stored_metrics.len()); + assert_eq!(metric_name, stored_metrics[0].0); + assert_eq!(&Metric::Counter(2), &stored_metrics[0].1); + } + } + + #[test] + fn migration_ignores_empty_database() { + let dir = tempdir().unwrap(); + let str_dir = dir.path().display().to_string(); + + let database_dir = dir.path().join("db"); + let datamdb = database_dir.join("data.mdb"); + let lockmdb = database_dir.join("lock.mdb"); + let safebin = database_dir.join("data.safe.bin"); + + assert!(!safebin.exists()); + assert!(!datamdb.exists()); + assert!(!lockmdb.exists()); + + // Ensure old LMDB database exists, but is empty. + { + fs::create_dir_all(&database_dir).expect("create dir"); + let rkv_db = rkv::Rkv::new::(&database_dir).expect("rkv env"); + drop(rkv_db); + assert!(datamdb.exists()); + assert!(lockmdb.exists()); + } + + // First open should try migration, but find no data. + // safe-mode does not write an empty database to disk. + // It also deletes the leftover LMDB database. + { + let _db = Database::new(&str_dir, false).unwrap(); + let safebin = database_dir.join("data.safe.bin"); + assert!(!safebin.exists(), "safe-mode file should exist"); + assert!(!datamdb.exists(), "LMDB data should be deleted"); + assert!(!lockmdb.exists(), "LMDB lock should be deleted"); + } + } + } } From 85ea9419fd572b7a9f0a19b75fb731a331d4bb1c Mon Sep 17 00:00:00 2001 From: Jan-Erik Rediger Date: Thu, 12 Nov 2020 16:36:32 +0100 Subject: [PATCH 2/4] Enable tests for rkv-safe-mode on CI --- .circleci/config.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index 03f68e15c9..a0ef161601 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -61,6 +61,11 @@ commands: - run: name: Test command: cargo test --all --verbose + - run: + name: Test Glean with rkv safe-mode + command: | + cd glean-core + cargo test -p glean-core --features rkv-safe-mode install-rustup: steps: From 3208f746f7013bfaed0b4ed11d9dbd857e03b720 Mon Sep 17 00:00:00 2001 From: Jan-Erik Rediger Date: Mon, 16 Nov 2020 10:49:48 +0100 Subject: [PATCH 3/4] Apply suggestions from code review [doc only] Co-authored-by: Alessio Placitelli --- glean-core/src/database/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/glean-core/src/database/mod.rs b/glean-core/src/database/mod.rs index 97ddc69aba..2d19869e50 100644 --- a/glean-core/src/database/mod.rs +++ b/glean-core/src/database/mod.rs @@ -30,7 +30,7 @@ mod backend { /// No migration necessary when staying with LMDB. pub fn migrate(_path: &Path, _dst_env: &Rkv) { - // intentionally left empty + // Intentionally left empty. } } @@ -55,7 +55,7 @@ mod backend { if let Err(err) = fs::remove_file(path) { match err.kind() { std::io::ErrorKind::NotFound => { - // silently drop this error, the file was already non-existing + // Silently drop this error, the file was already non-existing. } _ => log::warn!("{}", msg), } From abdb786bf06a4bab3fe04ba0083dd71259e1eaac Mon Sep 17 00:00:00 2001 From: Jan-Erik Rediger Date: Mon, 16 Nov 2020 11:21:20 +0100 Subject: [PATCH 4/4] Document safe-mode migration in changelog [doc only] --- .dictionary | 3 ++- CHANGELOG.md | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/.dictionary b/.dictionary index 7db3c01f94..3ac4f6d84f 100644 --- a/.dictionary +++ b/.dictionary @@ -1,4 +1,4 @@ -personal_ws-1.1 en 201 utf-8 +personal_ws-1.1 en 202 utf-8 AAR AARs ABI @@ -55,6 +55,7 @@ Protobufs PyPI RLB Redash +Rkv's SDK SDK's TLDs diff --git a/CHANGELOG.md b/CHANGELOG.md index 8d38631562..ae1c789610 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ [Full changelog](https://github.com/mozilla/glean/compare/v33.2.0...main) +* General + * When Rkv's safe mode is enabled (`features = ["rkv-safe-mode"]` on the `glean-core` crate) LMDB data is migrated at first start ([#1322](https://github.com/mozilla/glean/pull/1322)). + # v33.2.0 (2020-11-10) [Full changelog](https://github.com/mozilla/glean/compare/v33.1.2...v33.2.0)