pantheon-systems · jerryblakley · Apr 29, 2016 · Apr 18, 2016 · Apr 18, 2016 · Apr 20, 2016
@@ -38,6 +38,7 @@
 #include "stats.h"
 #include "session.h"
 #include "fusedav_config.h"
+#include "fusedav-statsd.h"
 
 #define REFRESH_INTERVAL 3
 #define CACHE_FILE_ENTROPY 20
@@ -141,14 +142,14 @@ static char *path2key(const char *path) {
  * to enhanced logging to LOG_NOTICE.
  */
 struct curl_slist* enhanced_logging(struct curl_slist *slist, int log_level, int section, const char *format, ...) {
-    va_list ap;
 
     // If we are injecting errors, we can trigger enhanced logging by decrementing
     // the log level (LOG_INFO -> LOG_NOTICE)
     if (inject_error(filecache_error_enhanced_logging)) {
         log_level -= 1;
     }
     if (logging(log_level, section)) {
+        va_list ap;
         char *user_agent = NULL;
         char msg[81] = {0};
         slist = curl_slist_append(slist, "Log-To-Journal: true");
@@ -363,13 +364,8 @@ static void get_fresh_fd(filecache_t *cache,
     struct timespec start_time;
     long response_code = 500; // seed it as bad so we can enter the loop
     CURLcode res = CURLE_OK;
-    static __thread unsigned long lgcount = 0;
-    static __thread long lglatency = 0;
-    static __thread time_t lgtime = 0;
-    static __thread unsigned long sgcount = 0;
-    static __thread long sglatency = 0;
-    static __thread time_t sgtime = 0;
     // Not to exceed time for operation, else it's an error. Allow large files a longer time
+    // Somewhat arbitrary
     static const unsigned small_time_allotment = 2000; // 2 seconds
     static const unsigned large_time_allotment = 8000; // 8 seconds
 
@@ -652,80 +648,68 @@ static void get_fresh_fd(filecache_t *cache,
             latency = FETCH(filecache_get_xlg_timing);
             count = FETCH(filecache_get_xlg_count);
             sz = "XLG";
-            aggregate_log_print_server(LOG_INFO, SECTION_ENHANCED, funcname, &lgtime,
-                "large-gets", &lgcount, 1,
-                "large-get-latency", &lglatency, elapsed_time);
+            stats_counter("large-gets", 1);
+            stats_timer("large-get-latency", elapsed_time);
         }
         else if (st.st_size > LG) {
             TIMING(filecache_get_lg_timing, elapsed_time);
             BUMP(filecache_get_lg_count);
             latency = FETCH(filecache_get_lg_timing);
             count = FETCH(filecache_get_lg_count);
             sz = "LG";
-            aggregate_log_print_server(LOG_INFO, SECTION_ENHANCED, funcname, &lgtime,
-                "large-gets", &lgcount, 1,
-                "large-get-latency", &lglatency, elapsed_time);
+            stats_counter("large-gets", 1);
+            stats_timer("large-get-latency", elapsed_time);
          }
         else if (st.st_size > MED) {
             TIMING(filecache_get_med_timing, elapsed_time);
             BUMP(filecache_get_med_count);
             latency = FETCH(filecache_get_med_timing);
             count = FETCH(filecache_get_med_count);
             sz = "MED";
-            aggregate_log_print_server(LOG_INFO, SECTION_ENHANCED, funcname, &lgtime,
-                "large-gets", &lgcount, 1,
-                "large-get-latency", &lglatency, elapsed_time);
+            stats_counter("large-gets", 1);
+            stats_timer("large-get-latency", elapsed_time);
         }
         else if (st.st_size > SM) {
             TIMING(filecache_get_sm_timing, elapsed_time);
             BUMP(filecache_get_sm_count);
             latency = FETCH(filecache_get_sm_timing);
             count = FETCH(filecache_get_sm_count);
             sz = "SM";
-            aggregate_log_print_server(LOG_INFO, SECTION_ENHANCED, funcname, &sgtime,
-                "small-gets", &sgcount, 1,
-                "small-get-latency", &sglatency, elapsed_time);
+            stats_counter("small-gets", 1);
+            stats_timer("small-get-latency", elapsed_time);
         }
         else if (st.st_size > XSM) {
             TIMING(filecache_get_xsm_timing, elapsed_time);
             BUMP(filecache_get_xsm_count);
             latency = FETCH(filecache_get_xsm_timing);
             count = FETCH(filecache_get_xsm_count);
             sz = "XSM";
-            aggregate_log_print_server(LOG_INFO, SECTION_ENHANCED, funcname, &sgtime,
-                "small-gets", &sgcount, 1,
-                "small-get-latency", &sglatency, elapsed_time);
+            stats_counter("small-gets", 1);
+            stats_timer("small-get-latency", elapsed_time);
         }
         else {
             TIMING(filecache_get_xxsm_timing, elapsed_time);
             BUMP(filecache_get_xxsm_count);
             latency = FETCH(filecache_get_xxsm_timing);
             count = FETCH(filecache_get_xxsm_count);
             sz = "XXSM";
-            aggregate_log_print_server(LOG_INFO, SECTION_ENHANCED, funcname, &sgtime,
-                "small-gets", &sgcount, 1,
-                "small-get-latency", &sglatency, elapsed_time);
+            stats_counter("small-gets", 1);
+            stats_timer("small-get-latency", elapsed_time);
         }
         log_print(LOG_DEBUG, SECTION_FILECACHE_OPEN, "put_fresh_fd: GET on size %s (%lu) for %s -- Current:Average latency %lu :: %lu",
             sz, st.st_size, path, elapsed_time, (latency / count));
 
-        /* The aggregate_log_print_server routine is expecting a cumulative count and latency, but by
-         * passing NULL for time, we ensure that aggregate prints this message and resets to 0, so
-         * there's no need to keep a static variable for count and latency.
-         */
         if (st.st_size >= LG && elapsed_time > large_time_allotment) {
-            unsigned long exceeded_count = 0;
             log_print(LOG_WARNING, SECTION_FILECACHE_OPEN, "put_fresh_fd: large (%lu) GET for %s exceeded time allotment %lu with %lu",
                 st.st_size, path, large_time_allotment, elapsed_time);
-            aggregate_log_print_server(LOG_INFO, SECTION_ENHANCED, "put_fresh_fd", NULL, "exceeded-time-large-GET-count",
-                &exceeded_count, 1, "exceeded-time-large-GET-latency", &elapsed_time, 0);
+            stats_counter("exceeded-time-large-GET-count", 1);
+            stats_timer("exceeded-time-large-GET-latency", elapsed_time);
         }
         else if (st.st_size < LG && elapsed_time > small_time_allotment) {
-            unsigned long exceeded_count = 0;
             log_print(LOG_WARNING, SECTION_FILECACHE_OPEN, "put_fresh_fd: small (%lu) GET for %s exceeded time allotment %lu with %lu",
                 st.st_size, path, small_time_allotment, elapsed_time);
-            aggregate_log_print_server(LOG_INFO, SECTION_ENHANCED, "put_fresh_fd", NULL, "exceeded-time-small-GET-count",
-                &exceeded_count, 1, "exceeded-time-small-GET-latency", &elapsed_time, 0);
+            stats_counter("exceeded-time-small-GET-count", 1);
+            stats_timer("exceeded-time-small-GET-latency", elapsed_time);
         }
     }
     else if (response_code == 404) {
@@ -1006,13 +990,8 @@ static void put_return_etag(const char *path, int fd, char *etag, GError **gerr)
     struct timespec start_time;
     long response_code = 500; // seed it as bad so we can enter the loop
     CURLcode res = CURLE_OK;
-    static __thread unsigned long lpcount = 0;
-    static __thread long lplatency = 0;
-    static __thread time_t lptime = 0;
-    static __thread unsigned long spcount = 0;
-    static __thread long splatency = 0;
-    static __thread time_t sptime = 0;
     // Not to exceed time for operation, else it's an error. Allow large files a longer time
+    // Somewhat arbitrary
     static const unsigned small_time_allotment = 2000; // 2 seconds
     static const unsigned large_time_allotment = 8000; // 8 seconds
 
@@ -1135,80 +1114,68 @@ static void put_return_etag(const char *path, int fd, char *etag, GError **gerr)
             latency = FETCH(filecache_put_xlg_timing);
             count = FETCH(filecache_put_xlg_count);
             sz = "XLG";
-            aggregate_log_print_server(LOG_INFO, SECTION_ENHANCED, funcname, &lptime,
-                "large-puts", &lpcount, 1,
-                "large-put-latency", &lplatency, elapsed_time);
+            stats_counter("large-puts", 1);
+            stats_timer("large-put-latency", elapsed_time);
         }
         else if (st.st_size > LG) {
             TIMING(filecache_put_lg_timing, elapsed_time);
             BUMP(filecache_put_lg_count);
             latency = FETCH(filecache_put_lg_timing);
             count = FETCH(filecache_put_lg_count);
             sz = "LG";
-            aggregate_log_print_server(LOG_INFO, SECTION_ENHANCED, funcname, &lptime,
-                "large-puts", &lpcount, 1,
-                "large-put-latency", &lplatency, elapsed_time);
+            stats_counter("large-puts", 1);
+            stats_timer("large-put-latency", elapsed_time);
          }
         else if (st.st_size > MED) {
             TIMING(filecache_put_med_timing, elapsed_time);
             BUMP(filecache_put_med_count);
             latency = FETCH(filecache_put_med_timing);
             count = FETCH(filecache_put_med_count);
             sz = "MED";
-            aggregate_log_print_server(LOG_INFO, SECTION_ENHANCED, funcname, &lptime,
-                "large-puts", &lpcount, 1,
-                "large-put-latency", &lplatency, elapsed_time);
+            stats_counter("large-puts", 1);
+            stats_timer("large-put-latency", elapsed_time);
         }
         else if (st.st_size > SM) {
             TIMING(filecache_put_sm_timing, elapsed_time);
             BUMP(filecache_put_sm_count);
             latency = FETCH(filecache_put_sm_timing);
             count = FETCH(filecache_put_sm_count);
             sz = "SM";
-            aggregate_log_print_server(LOG_INFO, SECTION_ENHANCED, funcname, &sptime,
-                "small-puts", &spcount, 1,
-                "small-put-latency", &splatency, elapsed_time);
+            stats_counter("small-puts", 1);
+            stats_timer("small-put-latency", elapsed_time);
         }
         else if (st.st_size > XSM) {
             TIMING(filecache_put_xsm_timing, elapsed_time);
             BUMP(filecache_put_xsm_count);
             latency = FETCH(filecache_put_xsm_timing);
             count = FETCH(filecache_put_xsm_count);
             sz = "XSM";
-            aggregate_log_print_server(LOG_INFO, SECTION_ENHANCED, funcname, &sptime,
-                "small-puts", &spcount, 1,
-                "small-put-latency", &splatency, elapsed_time);
+            stats_counter("small-puts", 1);
+            stats_timer("small-put-latency", elapsed_time);
         }
         else {
             TIMING(filecache_put_xxsm_timing, elapsed_time);
             BUMP(filecache_put_xxsm_count);
             latency = FETCH(filecache_put_xxsm_timing);
             count = FETCH(filecache_put_xxsm_count);
             sz = "XXSM";
-            aggregate_log_print_server(LOG_INFO, SECTION_ENHANCED, funcname, &sptime,
-                "small-puts", &spcount, 1,
-                "small-put-latency", &splatency, elapsed_time);
+            stats_counter("small-puts", 1);
+            stats_timer("small-put-latency", elapsed_time);
         }
         log_print(LOG_DEBUG, SECTION_FILECACHE_OPEN, "%s: PUT on size %s (%lu) for %s -- Current:Average latency %lu :: %lu",
             funcname, sz, st.st_size, path, elapsed_time, (latency / count));
 
-        /* The aggregate_log_print_server routine is expecting a cumulative count and latency, but by
-         * passing NULL for time, we ensure that aggregate prints this message and resets to 0, so
-         * there's no need to keep a static variable for count and latency.
-         */
         if (st.st_size >= LG && elapsed_time > large_time_allotment) {
-            unsigned long exceeded_count = 0;
             log_print(LOG_WARNING, SECTION_FILECACHE_OPEN, "%s: large (%lu) PUT for %s exceeded time allotment %lu with %lu",
                 funcname, st.st_size, path, large_time_allotment, elapsed_time);
-            aggregate_log_print_server(LOG_INFO, SECTION_ENHANCED, funcname, NULL, "exceeded-time-large-PUT-count",
-                &exceeded_count, 1, "exceeded-time-large-PUT-latency", &elapsed_time, 0);
+            stats_counter("exceeded-time-large-PUT-count", 1);
+            stats_timer("exceeded-time-large-PUT-latency", elapsed_time);
         }
         else if (st.st_size < LG && elapsed_time > small_time_allotment) {
-            unsigned long exceeded_count = 0;
             log_print(LOG_WARNING, SECTION_FILECACHE_OPEN, "%s: small (%lu) PUT for %s exceeded time allotment %lu with %lu",
                 funcname, st.st_size, path, small_time_allotment, elapsed_time);
-            aggregate_log_print_server(LOG_INFO, SECTION_ENHANCED, funcname, NULL, "exceeded-time-small-PUT-count",
-                &exceeded_count, 1, "exceeded-time-small-PUT-latency", &elapsed_time, 0);
+            stats_counter("exceeded-time-small-PUT-count", 1);
+            stats_timer("exceeded-time-small-PUT-latency", elapsed_time);
         }
     }
 

@@ -73,24 +73,22 @@ static G_DEFINE_QUARK("FUSEDAV", fusedav)
 
 static int processed_gerror(const char *prefix, const char *path, GError **pgerr) {
     int ret;
-    static __thread unsigned long count = 0;
-    static __thread time_t previous_time = 0;
     GError *gerr = *pgerr;
 
     log_print(LOG_ERR, SECTION_FUSEDAV_DEFAULT, "%s on %s: %s -- %d: %s",
         prefix, path ? path : "null path", gerr->message, gerr->code, g_strerror(gerr->code));
     ret = -gerr->code;
     if (gerr->code == ENOENT) {
-        aggregate_log_print_server(LOG_INFO, SECTION_ENHANCED, "processed_gerror", &previous_time, "error-ENOENT", &count, 1, NULL, NULL, 0);
+        stats_counter("error-ENOENT", 1);
     }
     else if (gerr->code == ENETDOWN) {
-        aggregate_log_print_server(LOG_INFO, SECTION_ENHANCED, "processed_gerror", &previous_time, "error-ENETDOWN", &count, 1, NULL, NULL, 0);
+        stats_counter("error-ENETDOWN", 1);
     }
     else if (gerr->code == EIO) {
-        aggregate_log_print_server(LOG_INFO, SECTION_ENHANCED, "processed_gerror", &previous_time, "error-EIO", &count, 1, NULL, NULL, 0);
+        stats_counter("error-EIO", 1);
     }
     else {
-        aggregate_log_print_server(LOG_INFO, SECTION_ENHANCED, "processed_gerror", &previous_time, "error-OTHER", &count, 1, NULL, NULL, 0);
+        stats_counter("error-OTHER", 1);
     }
     g_clear_error(pgerr);
 
@@ -109,32 +107,24 @@ static int simple_propfind_with_redirect(
     struct timespec start_time;
     struct timespec now;
     long elapsed_time;
-    static __thread unsigned long count = 0;
-    static __thread long latency = 0;
-    static __thread time_t time = 0;
-    unsigned long exceeded_count = 0;
-    // Alert on propfind taking longer than 10 seconds
+    // Alert on propfind taking longer than 4 seconds. This is rather arbitrary.
     static const unsigned propfind_time_allotment = 4000; // 4 seconds
     int ret;
 
-    log_print(LOG_DEBUG, SECTION_FUSEDAV_STAT, "simple_propfind_with_redirect: Performing (%s) PROPFIND of depth %d on path %s.", last_updated > 0 ? "progressive" : "complete", depth, path);
+    log_print(LOG_DEBUG, SECTION_FUSEDAV_STAT, "simple_propfind_with_redirect: Performing (%s) PROPFIND of depth %d on path %s.", 
+            last_updated > 0 ? "progressive" : "complete", depth, path);
 
     clock_gettime(CLOCK_MONOTONIC, &start_time);
     ret = simple_propfind(path, depth, last_updated, result_callback, userdata, &subgerr);
-    aggregate_log_print_server(LOG_INFO, SECTION_ENHANCED, "simple_propfind_with_redirect", &time, "propfind-count", &count, 1,
-        "propfind-latency", &latency, elapsed_time);
-    stats_counter("propfind-count", 1);
     clock_gettime(CLOCK_MONOTONIC, &now);
     elapsed_time = ((now.tv_sec - start_time.tv_sec) * 1000) + ((now.tv_nsec - start_time.tv_nsec) / (1000 * 1000));
-    /* The aggregate_log_print_server routine is expecting a cumulative count and latency, but by
-     * passing NULL for time, we ensure that aggregate prints this message and resets to 0, so
-     * there's no need to keep a static variable for count and latency.
-     */
+    stats_counter("propfind-count", 1);
+    stats_timer("propfind-latency", elapsed_time);
     if (elapsed_time > propfind_time_allotment) {
         log_print(LOG_DEBUG, SECTION_FUSEDAV_STAT, "simple_propfind_with_redirect: (%s) PROPFIND exceeded allotment of %u ms; took %u ms.",
             last_updated > 0 ? "progressive" : "complete", propfind_time_allotment, elapsed_time);
-        aggregate_log_print_server(LOG_INFO, SECTION_ENHANCED, "simple_propfind_with_redirect", NULL, "exceeded-time-propfind-count",
-            &exceeded_count, 1, "exceeded-time-propfind-latency", &elapsed_time, 0);
+        stats_counter("exceeded-time-propfind-count", 1);
+        stats_timer("exceeded-time-propfind-latency", elapsed_time);
     }
     if (subgerr) {
         g_propagate_prefixed_error(gerr, subgerr, "simple_propfind_with_redirect: ");
@@ -322,12 +312,12 @@ static void update_directory(const char *path, bool attempt_progressive_update,
     struct fusedav_config *config = fuse_get_context()->private_data;
     GError *tmpgerr = NULL;
     bool needs_update = true;
-    time_t last_updated;
     time_t timestamp;
     int propfind_result;
 
     // Attempt to freshen the cache.
     if (attempt_progressive_update && config->progressive_propfind) {
+        time_t last_updated;
         timestamp = time(NULL);
         last_updated = stat_cache_read_updated_children(config->cache, path, &tmpgerr);
         if (tmpgerr) {
@@ -409,7 +399,6 @@ static int dav_readdir(
     struct fusedav_config *config = fuse_get_context()->private_data;
     struct fill_info f;
     GError *gerr = NULL;
-    int ret;
     int iters = 2;
     bool ignore_freshness = false;
 
@@ -445,6 +434,7 @@ static int dav_readdir(
      * way, the first access after network failure acts like a subsequent access in saint mode.
      */
     for (int idx = 0; idx < iters; idx++) {
+        int ret;
         // First, attempt to hit the cache.
         ret = stat_cache_enumerate(config->cache, path, getdir_cache_callback, &f, ignore_freshness);
         if (ret < 0) {
@@ -596,9 +586,6 @@ static void get_stat(const char *path, struct stat *stbuf, GError **gerr) {
     time_t parent_children_update_ts;
     bool is_base_directory;
     int ret = -ENOENT;
-    // We are sharing these variables between negative and nonnegative cache counting. Should be fine.
-    static __thread unsigned long count = 0;
-    static __thread time_t previous_time = 0;
     enum ignore_freshness skip_freshness_check = OFF;
 
     memset(stbuf, 0, sizeof(struct stat));
@@ -724,7 +711,7 @@ static void get_stat(const char *path, struct stat *stbuf, GError **gerr) {
     if (parent_children_update_ts < (time(NULL) - STAT_CACHE_NEGATIVE_TTL)) {
         GError *subgerr = NULL;
 
-        aggregate_log_print_local(LOG_INFO, SECTION_ENHANCED, "get_stat", &previous_time, "propfind-nonnegative-cache", &count, 1, NULL, NULL, 0);
+        stats_counter("propfind-nonnegative-cache", 1);
         log_print(LOG_INFO, SECTION_FUSEDAV_STAT, "get_stat: Calling update_directory: %s; attempt_progressive_update will be %d",
             parent_path, (parent_children_update_ts > 0));
         // If parent_children_update_ts is 0, there are no entries for updated_children in statcache
@@ -735,7 +722,7 @@ static void get_stat(const char *path, struct stat *stbuf, GError **gerr) {
             goto fail;
         }
     } else {
-        aggregate_log_print_local(LOG_INFO, SECTION_ENHANCED, "get_stat", &previous_time, "propfind-negative-cache", &count, 1, NULL, NULL, 0);
+        stats_counter("propfind-negative-cache", 1);
         BUMP(propfind_negative_cache);
     }