Skip to content

Commit

Permalink
Let the icinga check command also output the error when last reload f…
Browse files Browse the repository at this point in the history
…ailed
  • Loading branch information
Al2Klimov committed Feb 4, 2025
1 parent d55c364 commit c6123c9
Show file tree
Hide file tree
Showing 6 changed files with 112 additions and 29 deletions.
1 change: 1 addition & 0 deletions doc/10-icinga-template-library.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ Custom variables passed as [command parameters](03-monitoring-basics.md#command-
Name | Description
-----------------------|---------------
icinga\_min\_version | **Optional.** Required minimum Icinga 2 version, e.g. `2.8.0`. If not satisfied, the state changes to `Critical`. Release packages only.
icinga\_verbose | **Optional.** If the last reload failed, tell the reason.

### cluster <a id="itl-icinga-cluster"></a>

Expand Down
34 changes: 19 additions & 15 deletions lib/base/application.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <boost/exception/errinfo_errno.hpp>
#include <boost/exception/errinfo_file_name.hpp>
#include <boost/stacktrace.hpp>
#include <cstring>
#include <sstream>
#include <iostream>
#include <fstream>
Expand All @@ -30,6 +31,7 @@
#ifdef _WIN32
#include <windows.h>
#else /* _WIN32 */
#include "base/shared-memory.hpp"
#include <signal.h>
#endif /* _WIN32 */

Expand Down Expand Up @@ -63,11 +65,18 @@ char **Application::m_ArgV;
double Application::m_StartTime;
bool Application::m_ScriptDebuggerEnabled = false;

Application::LastReloadFailed* Application::m_LastReloadFailed (Application::AllocLastReloadFailed());

Application::LastReloadFailed* Application::AllocLastReloadFailed()
{
#ifdef _WIN32
double Application::m_LastReloadFailed = 0;
static LastReloadFailed lrf;
return &lrf;
#else /* _WIN32 */
SharedMemory<Application::AtomicTs> Application::m_LastReloadFailed (0);
static SharedMemory<LastReloadFailed> slrf;
return &slrf.Get();
#endif /* _WIN32 */
}

#ifdef _WIN32
static LPTOP_LEVEL_EXCEPTION_FILTER l_DefaultUnhandledExceptionFilter = nullptr;
Expand Down Expand Up @@ -379,7 +388,7 @@ void Application::OnShutdown()
static void ReloadProcessCallbackInternal(const ProcessResult& pr)
{
if (pr.ExitStatus != 0) {
Application::SetLastReloadFailed(Utility::GetTime());
Application::SetLastReloadFailed(Utility::GetTime(), pr.Output);
Log(LogCritical, "Application", "Found error in config: reloading aborted");
}
#ifdef _WIN32
Expand Down Expand Up @@ -1211,22 +1220,17 @@ void Application::SetScriptDebuggerEnabled(bool enabled)
m_ScriptDebuggerEnabled = enabled;
}

double Application::GetLastReloadFailed()
std::pair<double, String> Application::GetLastReloadFailed()
{
#ifdef _WIN32
return m_LastReloadFailed;
#else /* _WIN32 */
return m_LastReloadFailed.Get().load();
#endif /* _WIN32 */
LastReloadFailed::SharedLock lock (m_LastReloadFailed->Mutex);
return {m_LastReloadFailed->When, String(m_LastReloadFailed->Why)};
}

void Application::SetLastReloadFailed(double ts)
void Application::SetLastReloadFailed(double ts, const String& error)
{
#ifdef _WIN32
m_LastReloadFailed = ts;
#else /* _WIN32 */
m_LastReloadFailed.Get().store(ts);
#endif /* _WIN32 */
LastReloadFailed::UniqueLock lock (m_LastReloadFailed->Mutex);
m_LastReloadFailed->When = ts;
strncpy(m_LastReloadFailed->Why, error.CStr(), sizeof(m_LastReloadFailed->Why));
}

void Application::ValidateName(const Lazy<String>& lvalue, const ValidationUtils& utils)
Expand Down
37 changes: 29 additions & 8 deletions lib/base/application.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,22 @@
#define APPLICATION_H

#include "base/i2-base.hpp"
#include "base/atomic.hpp"
#include "base/application-ti.hpp"
#include "base/logger.hpp"
#include "base/configuration.hpp"
#include "base/shared-memory.hpp"
#include <cstdint>
#include <iosfwd>
#include <type_traits>
#include <utility>

#ifdef _WIN32
#include <mutex>
#include <shared_mutex>
#else /* _WIN32 */
#include <boost/interprocess/sync/interprocess_sharable_mutex.hpp>
#include <boost/interprocess/sync/scoped_lock.hpp>
#include <boost/interprocess/sync/sharable_lock.hpp>
#endif /* _WIN32 */

namespace icinga
{
Expand Down Expand Up @@ -104,8 +112,8 @@ class Application : public ObjectImpl<Application> {
static bool GetScriptDebuggerEnabled();
static void SetScriptDebuggerEnabled(bool enabled);

static double GetLastReloadFailed();
static void SetLastReloadFailed(double ts);
static std::pair<double, String> GetLastReloadFailed();
static void SetLastReloadFailed(double ts, const String& error);

static void DisplayInfoMessage(std::ostream& os, bool skipVersion = false);

Expand Down Expand Up @@ -141,13 +149,26 @@ class Application : public ObjectImpl<Application> {
static double m_StartTime;
static double m_MainTime;
static bool m_ScriptDebuggerEnabled;

struct LastReloadFailed
{
#ifdef _WIN32
static double m_LastReloadFailed;
typedef std::shared_lock<std::shared_mutex> SharedLock;
typedef std::unique_lock<std::shared_mutex> UniqueLock;

std::shared_mutex Mutex;
#else /* _WIN32 */
typedef Atomic<std::conditional_t<Atomic<double>::is_always_lock_free, double, uint32_t>> AtomicTs;
static_assert(AtomicTs::is_always_lock_free);
static SharedMemory<AtomicTs> m_LastReloadFailed;
typedef boost::interprocess::sharable_lock<boost::interprocess::interprocess_sharable_mutex> SharedLock;
typedef boost::interprocess::scoped_lock<boost::interprocess::interprocess_sharable_mutex> UniqueLock;

boost::interprocess::interprocess_sharable_mutex Mutex;
#endif /* _WIN32 */
double When = 0;
char Why[16 * 1024] = {0};
};

static LastReloadFailed* m_LastReloadFailed;
static LastReloadFailed* AllocLastReloadFailed();

#ifdef _WIN32
static BOOL WINAPI CtrlHandler(DWORD type);
Expand Down
1 change: 0 additions & 1 deletion lib/base/logger.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,6 @@ class Logger : public ObjectImpl<Logger>
void SetSeverity(const String& value, bool suppress_events = false, const Value& cookie = Empty) override;
void ValidateSeverity(const Lazy<String>& lvalue, const ValidationUtils& utils) final;

protected:
void Start(bool runtimeCreated) override;
void Stop(bool runtimeRemoved) override;

Expand Down
48 changes: 46 additions & 2 deletions lib/cli/daemoncommand.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "base/atomic.hpp"
#include "base/defer.hpp"
#include "base/logger.hpp"
#include "base/streamlogger.hpp"
#include "base/application.hpp"
#include "base/process.hpp"
#include "base/timer.hpp"
Expand All @@ -25,6 +26,7 @@
#include <boost/program_options.hpp>
#include <iostream>
#include <fstream>
#include <sstream>

#ifdef _WIN32
#include <windows.h>
Expand Down Expand Up @@ -222,6 +224,10 @@ static double GetDebugWorkerDelay()

static String l_ObjectsPath;

#ifndef _WIN32
static bool l_WorkerLoadedConfig = false;
#endif /* _WIN32 */

/**
* Do the actual work (config loading, ...)
*
Expand All @@ -246,6 +252,13 @@ int RunWorker(const std::vector<std::string>& configs, bool closeConsoleLog = fa
}
#endif /* I2_DEBUG */

std::ostringstream oss;
StreamLogger::Ptr sl = new StreamLogger();

sl->BindStream(&oss, false);
sl->Start(true);
sl->SetActive(true);

Log(LogInformation, "cli", "Loading configuration file(s).");
NotifyStatus("Loading configuration file(s)...");

Expand All @@ -255,14 +268,24 @@ int RunWorker(const std::vector<std::string>& configs, bool closeConsoleLog = fa
if (!DaemonUtility::LoadConfigFiles(configs, newItems, l_ObjectsPath, Configuration::VarsPath)) {
Log(LogCritical, "cli", "Config validation failed. Re-run with 'icinga2 daemon -C' after fixing the config.");
NotifyStatus("Config validation failed.");

sl->Stop(true);
sl = nullptr;
Application::SetLastReloadFailed(Utility::GetTime(), oss.str());

return EXIT_FAILURE;
}

sl->Stop(true);
sl = nullptr;
oss = decltype(oss)();

#ifndef _WIN32
Log(LogNotice, "cli")
<< "Notifying umbrella process (PID " << l_UmbrellaPid << ") about the config loading success";

(void)kill(l_UmbrellaPid, SIGUSR2);
l_WorkerLoadedConfig = true;

Log(LogNotice, "cli")
<< "Waiting for the umbrella process to let us doing the actual work";
Expand Down Expand Up @@ -489,6 +512,7 @@ static pid_t StartUnixWorker(const std::vector<std::string>& configs, bool close
}

(void)sigprocmask(SIG_UNBLOCK, &l_UnixWorkerSignals, nullptr);
Application::SetLastReloadFailed(Utility::GetTime(), "fork(2) failed");
return -1;

case 0:
Expand Down Expand Up @@ -531,6 +555,12 @@ static pid_t StartUnixWorker(const std::vector<std::string>& configs, bool close
} catch (const std::exception& ex) {
Log(LogCritical, "cli")
<< "Failed to re-initialize thread pool after forking (child): " << DiagnosticInformation(ex);

Application::SetLastReloadFailed(
Utility::GetTime(),
"Failed to re-initialize thread pool after forking (child): " + DiagnosticInformation(ex)
);

_exit(EXIT_FAILURE);
}

Expand All @@ -539,14 +569,29 @@ static pid_t StartUnixWorker(const std::vector<std::string>& configs, bool close
} catch (const std::exception& ex) {
Log(LogCritical, "cli")
<< "Failed to initialize process spawn helper after forking (child): " << DiagnosticInformation(ex);

Application::SetLastReloadFailed(
Utility::GetTime(),
"Failed to initialize process spawn helper after forking (child): " + DiagnosticInformation(ex)
);

_exit(EXIT_FAILURE);
}

_exit(RunWorker(configs, closeConsoleLog, stderrFile));
} catch (const std::exception& ex) {
Log(LogCritical, "cli") << "Exception in main process: " << DiagnosticInformation(ex);

if (!l_WorkerLoadedConfig) {
Application::SetLastReloadFailed(Utility::GetTime(), "Exception in main process: " + DiagnosticInformation(ex));
}

_exit(EXIT_FAILURE);
} catch (...) {
if (!l_WorkerLoadedConfig) {
Application::SetLastReloadFailed(Utility::GetTime(), "Exception in main process");
}

_exit(EXIT_FAILURE);
}

Expand Down Expand Up @@ -813,15 +858,14 @@ int DaemonCommand::Run(const po::variables_map& vm, const std::vector<std::strin
break;
case -2:
Log(LogCritical, "Application", "Found error in config: reloading aborted");
Application::SetLastReloadFailed(Utility::GetTime());
break;
default:
Log(LogInformation, "Application")
<< "Reload done, old process shutting down. Child process with PID '" << nextWorker << "' is taking over.";

NotifyStatus("Shutting down old instance...");

Application::SetLastReloadFailed(0);
Application::SetLastReloadFailed(0, "");
(void)kill(currentWorker, SIGTERM);

{
Expand Down
20 changes: 17 additions & 3 deletions lib/methods/icingachecktask.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ void IcingaCheckTask::ScriptFunc(const Checkable::Ptr& checkable, const CheckRes
String icingaMinVersion = MacroProcessor::ResolveMacros("$icinga_min_version$", resolvers, checkable->GetLastCheckResult(),
&missingIcingaMinVersion, MacroProcessor::EscapeCallback(), resolvedMacros, useResolvedMacros);

auto verbose (MacroProcessor::ResolveMacros("$icinga_verbose$", resolvers, checkable->GetLastCheckResult(),
nullptr, MacroProcessor::EscapeCallback(), resolvedMacros, useResolvedMacros));

if (resolvedMacros && !useResolvedMacros)
return;

Expand Down Expand Up @@ -158,10 +161,17 @@ void IcingaCheckTask::ScriptFunc(const Checkable::Ptr& checkable, const CheckRes
". Version: " + appVersion;

/* Indicate a warning if the last reload failed. */
double lastReloadFailed = Application::GetLastReloadFailed();
auto lastReloadFailed (Application::GetLastReloadFailed());
String verboseText;

if (lastReloadFailed.first > 0) {
output += "; Last reload attempt failed at " + Utility::FormatDateTime("%Y-%m-%d %H:%M:%S %z", lastReloadFailed.first);

if (verbose.ToBool() && lastReloadFailed.second.GetLength()) {
output += ", see below";
verboseText = lastReloadFailed.second;
}

if (lastReloadFailed > 0) {
output += "; Last reload attempt failed at " + Utility::FormatDateTime("%Y-%m-%d %H:%M:%S %z", lastReloadFailed);
state =ServiceWarning;
}

Expand All @@ -187,6 +197,10 @@ void IcingaCheckTask::ScriptFunc(const Checkable::Ptr& checkable, const CheckRes
state = ServiceCritical;
}

if (verboseText.GetLength()) {
output += "\n\n" + verboseText;
}

String commandName = command->GetName();

if (Checkable::ExecuteCommandProcessFinishedHandler) {
Expand Down

0 comments on commit c6123c9

Please sign in to comment.