Skip to content

Commit

Permalink
Bug 1644359: Support error reporting in Python bindings (#1039)
Browse files Browse the repository at this point in the history
* Bug 1644359: Support error reporting in Python bindings

This works by initializing a full Glean in the subprocess that
*doesn't* do any startup work such as setting metrics or submitting
pings. This makes it safe to initialize Glean in the subprocess,
and thus errors can be reported there.

* Change to borrow

* Add CHANGELOG.md

* Improve testing
  • Loading branch information
mdboom authored Jul 8, 2020
1 parent cf0613a commit 3165681
Show file tree
Hide file tree
Showing 9 changed files with 160 additions and 150 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
* BUGFIX: raise an error if Glean is initialized with an empty string as the `application_id`.
* Python
* BUGFIX: correctly set the `app_build` metric to the newly provided `application_build_id` initialization option.
* The Python bindings now report networking errors in the `glean.upload.ping_upload_failure` metric (like all the other bindings).

[Full changelog](https://github.com/mozilla/glean/compare/v31.2.3...main)

Expand Down
7 changes: 6 additions & 1 deletion glean-core/ffi/glean.h
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,12 @@ void glean_get_upload_task(FfiPingUploadTask *result, uint8_t log_ping);
*/
uint8_t glean_initialize(const FfiConfiguration *cfg);

uint8_t glean_initialize_standalone_uploader(FfiStr data_dir, FfiStr language_binding_name);
/**
* # Safety
*
* A valid and non-null configuration object is required for this function.
*/
uint8_t glean_initialize_for_subprocess(const FfiConfiguration *cfg);

uint8_t glean_is_dirty_flag_set(void);

Expand Down
103 changes: 14 additions & 89 deletions glean-core/ffi/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ use ffi_support::{define_string_destructor, ConcurrentHandleMap, FfiStr, IntoFfi
pub use glean_core::metrics::MemoryUnit;
pub use glean_core::metrics::TimeUnit;
pub use glean_core::upload::ffi_upload_result::*;
use glean_core::upload::PingUploadManager;
use glean_core::Glean;
pub use glean_core::Lifetime;

Expand Down Expand Up @@ -128,48 +127,6 @@ where
with_glean_mut(|glean| Ok(callback(glean)))
}

/// Execute the callback with a reference to the PingUploadManager singleton,
/// returning a `Result`.
///
/// The callback returns a `Result<T, E>` while:
///
/// - Catching panics, and logging them.
/// - Converting `T` to a C-compatible type using [`IntoFfi`].
/// - Logging `E` and returning a default value.
pub(crate) fn with_standalone_uploader<R, F>(callback: F) -> R::Value
where
F: UnwindSafe + FnOnce(&PingUploadManager) -> Result<R, glean_core::Error>,
R: IntoFfi,
{
let mut error = ffi_support::ExternError::success();
let res = ffi_support::abort_on_panic::call_with_result(&mut error, || {
match glean_core::upload::global_upload_manager() {
Some(upload_manager) => {
let upload_manager = upload_manager.lock().unwrap();
callback(&upload_manager)
}
None => Err(glean_core::Error::not_initialized()),
}
});
handlemap_ext::log_if_error(error);
res
}

/// Execute the callback with a reference to the PingUploadManager singleton,
/// returning a value.
///
/// The callback returns a value while:
///
/// - Catching panics, and logging them.
/// - Converting the returned value to a C-compatible type using [`IntoFfi`].
pub(crate) fn with_standalone_uploader_value<R, F>(callback: F) -> R::Value
where
F: UnwindSafe + FnOnce(&PingUploadManager) -> R,
R: IntoFfi,
{
with_standalone_uploader(|ping_uploader| Ok(callback(ping_uploader)))
}

/// Initialize the logging system based on the target platform. This ensures
/// that logging is shown when executing the Glean SDK unit tests.
#[no_mangle]
Expand Down Expand Up @@ -402,19 +359,6 @@ pub extern "C" fn glean_is_first_run() -> u8 {
// * `result`: the object the output task will be written to.
#[no_mangle]
pub extern "C" fn glean_get_upload_task(result: *mut FfiPingUploadTask, log_ping: u8) {
// If an upload manager instance is available, use that (it should only happen
// in processes which do not initialize Glean).
if glean_core::upload::global_upload_manager().is_some() {
with_standalone_uploader_value(|ping_uploader| {
let ffi_task = FfiPingUploadTask::from(ping_uploader.get_upload_task(log_ping != 0));
unsafe {
std::ptr::write(result, ffi_task);
}
});
return;
}

// Otherwise
with_glean_value(|glean| {
let ffi_task = FfiPingUploadTask::from(glean.get_upload_task(log_ping != 0));
unsafe {
Expand Down Expand Up @@ -451,22 +395,6 @@ pub unsafe extern "C" fn glean_process_ping_upload_response(
// but as it controls the memory, we put something valid in place, just in case.
let task = std::ptr::replace(task, FfiPingUploadTask::Done);

// If an upload manager instance is available, use that (it should only happen
// in processes which do not initialize Glean).
if glean_core::upload::global_upload_manager().is_some() {
with_standalone_uploader(|ping_uploader| {
if let FfiPingUploadTask::Upload { document_id, .. } = task {
assert!(!document_id.is_null());
let document_id_str = CStr::from_ptr(document_id)
.to_str()
.map_err(|_| glean_core::Error::utf8_error())?;
ping_uploader.process_ping_upload_response(document_id_str, status.into());
};
Ok(())
});
return;
}

with_glean(|glean| {
if let FfiPingUploadTask::Upload { document_id, .. } = task {
assert!(!document_id.is_null());
Expand All @@ -479,25 +407,22 @@ pub unsafe extern "C" fn glean_process_ping_upload_response(
});
}

/// # Safety
///
/// A valid and non-null configuration object is required for this function.
#[no_mangle]
pub extern "C" fn glean_initialize_standalone_uploader(
data_dir: FfiStr,
language_binding_name: FfiStr,
) -> u8 {
pub unsafe extern "C" fn glean_initialize_for_subprocess(cfg: *const FfiConfiguration) -> u8 {
assert!(!cfg.is_null());

handlemap_ext::handle_result(|| {
// Init the upload manager to perform a synchronous ping directory scan.
// Since this method is meant to be called from a process used exclusively
// for uploading, this is fine.
let mut upload_manager = PingUploadManager::new(
data_dir.to_string_fallible()?,
&language_binding_name.to_string_fallible()?,
true,
);
upload_manager.set_rate_limiter(
/* seconds per interval */ 60, /* max tasks per interval */ 10,
);
glean_core::upload::setup_upload_manager(upload_manager)?;
log::info!("Glean initialized in upload-only mode");
// We can create a reference to the FfiConfiguration struct:
// 1. We did a null check
// 2. We're not holding on to it beyond this function
// and we copy out all data when needed.
let glean_cfg = glean_core::Configuration::try_from(&*cfg)?;
let glean = Glean::new_for_subprocess(&glean_cfg)?;
glean_core::setup_glean(glean)?;
log::info!("Glean initialized for subprocess");
Ok(true)
})
}
Expand Down
7 changes: 6 additions & 1 deletion glean-core/ios/Glean/GleanFfi.h
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,12 @@ void glean_get_upload_task(FfiPingUploadTask *result, uint8_t log_ping);
*/
uint8_t glean_initialize(const FfiConfiguration *cfg);

uint8_t glean_initialize_standalone_uploader(FfiStr data_dir, FfiStr language_binding_name);
/**
* # Safety
*
* A valid and non-null configuration object is required for this function.
*/
uint8_t glean_initialize_for_subprocess(const FfiConfiguration *cfg);

uint8_t glean_is_dirty_flag_set(void);

Expand Down
21 changes: 11 additions & 10 deletions glean-core/python/glean/net/ping_upload_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import logging
from pathlib import Path
import re
import sys
import time
from typing import List, Tuple

Expand Down Expand Up @@ -50,7 +51,7 @@ def _process(cls):
from .. import Glean

return ProcessDispatcher.dispatch(
_process, (Glean._data_dir, Glean._configuration)
_process, (Glean._data_dir, Glean._application_id, Glean._configuration)
)

@classmethod
Expand Down Expand Up @@ -101,21 +102,21 @@ def _parse_ping_headers(
return headers


def _process(data_dir: Path, configuration) -> bool:
def _process(data_dir: Path, application_id: str, configuration) -> bool:

# Import here to avoid cyclical import
from ..glean import Glean

if not Glean.is_initialized():
# Always load the Glean shared object / dll even if we're in a (ping upload worker)
# subprocess.
# To make startup time better in subprocesses, consumers can initialize just the
# ping upload manager.
data_dir = ffi_support.new("char[]", _ffi.ffi_encode_string(str(data_dir)))
language_binding_name = ffi_support.new(
"char[]", _ffi.ffi_encode_string(_ffi.LANGUAGE_BINDING_NAME)
# We don't want to send pings or otherwise update the database during
# initialization in a subprocess, so we use
# `glean_initialize_for_subprocess` rather than `glean_initialize` here.
cfg = _ffi.make_config(
data_dir, application_id, True, configuration.max_events,
)
_ffi.lib.glean_initialize_standalone_uploader(data_dir, language_binding_name)
if _ffi.lib.glean_initialize_for_subprocess(cfg) == 0:
log.error("Couldn't initialize Glean in subprocess")
sys.exit(1)

wait_attempts = 0

Expand Down
18 changes: 18 additions & 0 deletions glean-core/python/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,24 @@ def safe_httpserver(httpserver):
return httpserver


@pytest.fixture
def slow_httpserver(httpserver):
"""
An httpserver that takes 0.5 seconds to respond.
"""
wait_for_server(httpserver)

orig_call = httpserver.__call__

def __call__(self, *args, **kwargs):
time.sleep(0.5)
return orig_call(*args, **kwargs)

httpserver.__call__ = __call__

return httpserver


@pytest.fixture
def safe_httpsserver(httpsserver):
wait_for_server(httpsserver)
Expand Down
88 changes: 88 additions & 0 deletions glean-core/python/tests/test_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,30 @@


from glean import Glean
from glean import metrics
from glean._process_dispatcher import ProcessDispatcher
from glean.net import PingUploadWorker
from glean.net.http_client import HttpClientUploader
from glean.net import ping_uploader


def get_upload_failure_metric():
return metrics.LabeledCounterMetricType(
disabled=False,
send_in_pings=["metrics"],
name="ping_upload_failure",
category="glean.upload",
labels=[
"status_code_4xx",
"status_code_5xx",
"status_code_unknown",
"unrecoverable",
"recoverable",
],
lifetime=metrics.Lifetime.PING,
)


def test_400_error(safe_httpserver):
safe_httpserver.serve_content(b"", code=400)

Expand All @@ -24,6 +43,22 @@ def test_400_error(safe_httpserver):
assert 1 == len(safe_httpserver.requests)


def test_400_error_submit(safe_httpserver, monkeypatch):
safe_httpserver.serve_content(b"", code=400)

# Force the ping upload worker into a separate process
monkeypatch.setattr(PingUploadWorker, "process", PingUploadWorker._process)
Glean._configuration._server_endpoint = safe_httpserver.url
Glean._submit_ping_by_name("baseline")
ProcessDispatcher._wait_for_last_process()

assert 1 == len(safe_httpserver.requests)

metric = get_upload_failure_metric()
assert 1 == metric["status_code_4xx"].test_get_value()
assert not metric["status_code_5xx"].test_has_value()


def test_500_error(safe_httpserver):
safe_httpserver.serve_content(b"", code=500)

Expand All @@ -36,6 +71,59 @@ def test_500_error(safe_httpserver):
assert 1 == len(safe_httpserver.requests)


def test_500_error_submit(safe_httpserver, monkeypatch):
safe_httpserver.serve_content(b"", code=500)

# Force the ping upload worker into a separate process
monkeypatch.setattr(PingUploadWorker, "process", PingUploadWorker._process)
Glean._configuration._server_endpoint = safe_httpserver.url
Glean._submit_ping_by_name("baseline")
ProcessDispatcher._wait_for_last_process()

# This kind of recoverable error will be tried 10 times
assert 10 == len(safe_httpserver.requests)

metric = get_upload_failure_metric()
assert not metric["status_code_4xx"].test_has_value()
assert 10 == metric["status_code_5xx"].test_get_value()


def test_500_error_submit_concurrent_writing(slow_httpserver, monkeypatch):
# This tests that concurrently writing to the database from the main process
# and the ping uploading subprocess.
slow_httpserver.serve_content(b"", code=500)

counter = metrics.CounterMetricType(
disabled=False,
category="test",
name="counter",
send_in_pings=["metrics"],
lifetime=metrics.Lifetime.PING,
)

# Force the ping upload worker into a separate process
monkeypatch.setattr(PingUploadWorker, "process", PingUploadWorker._process)
Glean._configuration._server_endpoint = slow_httpserver.url
Glean._submit_ping_by_name("baseline")

# While the uploader is running, increment the counter as fast as we can
times = 0
last_process = ProcessDispatcher._last_process
while last_process.poll() is None:
counter.add()
times += 1

# This kind of recoverable error will be tried 10 times
assert 10 == len(slow_httpserver.requests)

metric = get_upload_failure_metric()
assert not metric["status_code_4xx"].test_has_value()
assert 10 == metric["status_code_5xx"].test_get_value()

assert times > 0
assert times == counter.test_get_value()


def test_unknown_scheme():
response = HttpClientUploader.upload(
url="ftp://example.com/", data=b"{}", headers=[]
Expand Down
Loading

0 comments on commit 3165681

Please sign in to comment.