From e839b044d2e1300e5fce138d5518ed57270a292c Mon Sep 17 00:00:00 2001 From: d-netto Date: Tue, 10 Oct 2023 13:45:22 -0300 Subject: [PATCH] revert to 1.8 heuristics --- src/gc-debug.c | 35 +----- src/gc-pages.c | 4 - src/gc.c | 251 +++++++++++++------------------------------- src/gc.h | 11 +- src/julia_threads.h | 4 +- 5 files changed, 77 insertions(+), 228 deletions(-) diff --git a/src/gc-debug.c b/src/gc-debug.c index 23d1ca8f5f884c..3f0f3368533a42 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -953,29 +953,6 @@ void gc_time_sweep_pause(uint64_t gc_end_t, int64_t actual_allocd, jl_ns2ms(gc_postmark_end - gc_premark_end), sweep_full ? "full" : "quick", -gc_num.allocd / 1024); } - -void gc_time_summary(int sweep_full, uint64_t start, uint64_t end, - uint64_t freed, uint64_t live, uint64_t interval, - uint64_t pause, uint64_t ttsp, uint64_t mark, - uint64_t sweep) -{ - if (sweep_full > 0) - jl_safe_printf("TS: %" PRIu64 " Major collection: estimate freed = %" PRIu64 - " live = %" PRIu64 "m new interval = %" PRIu64 - "m time = %" PRIu64 "ms ttsp = %" PRIu64 "us mark time = %" - PRIu64 "ms sweep time = %" PRIu64 "ms \n", - end, freed, live/1024/1024, - interval/1024/1024, pause/1000000, ttsp, - mark/1000000,sweep/1000000); - else - jl_safe_printf("TS: %" PRIu64 " Minor collection: estimate freed = %" PRIu64 - " live = %" PRIu64 "m new interval = %" PRIu64 "m pause time = %" - PRIu64 "ms ttsp = %" PRIu64 "us mark time = %" PRIu64 - "ms sweep time = %" PRIu64 "ms \n", - end, freed, live/1024/1024, - interval/1024/1024, pause/1000000, ttsp, - mark/1000000,sweep/1000000); -} #endif void jl_gc_debug_init(void) @@ -1219,7 +1196,7 @@ JL_DLLEXPORT void jl_enable_gc_logging(int enable) { gc_logging_enabled = enable; } -void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect, int64_t live_bytes) JL_NOTSAFEPOINT { +void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect) JL_NOTSAFEPOINT { if (!gc_logging_enabled) { return; } @@ -1228,16 +1205,6 @@ void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect full ? "full" : "incr", recollect ? "recollect" : "" ); - - jl_safe_printf("Heap stats: bytes_mapped %.2f MB, bytes_resident %.2f MB,\nheap_size %.2f MB, heap_target %.2f MB, Fragmentation %.3f\n", - jl_atomic_load_relaxed(&gc_heap_stats.bytes_mapped)/(double)(1<<20), - jl_atomic_load_relaxed(&gc_heap_stats.bytes_resident)/(double)(1<<20), - // live_bytes/(double)(1<<20), live byes tracking is not accurate. - jl_atomic_load_relaxed(&gc_heap_stats.heap_size)/(double)(1<<20), - jl_atomic_load_relaxed(&gc_heap_stats.heap_target)/(double)(1<<20), - (double)live_bytes/(double)jl_atomic_load_relaxed(&gc_heap_stats.heap_size) - ); - // Should fragmentation use bytes_resident instead of heap_size? } #ifdef __cplusplus diff --git a/src/gc-pages.c b/src/gc-pages.c index 8d596f4a815ca8..682e76611f5d9d 100644 --- a/src/gc-pages.c +++ b/src/gc-pages.c @@ -52,8 +52,6 @@ char *jl_gc_try_alloc_pages_(int pg_cnt) JL_NOTSAFEPOINT // round data pointer up to the nearest gc_page_data-aligned // boundary if mmap didn't already do so. mem = (char*)gc_page_data(mem + GC_PAGE_SZ - 1); - jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_mapped, pages_sz); - jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_resident, pages_sz); return mem; } @@ -117,7 +115,6 @@ NOINLINE jl_gc_pagemeta_t *jl_gc_alloc_page(void) JL_NOTSAFEPOINT // try to get page from `pool_freed` meta = pop_lf_page_metadata_back(&global_page_pool_freed); if (meta != NULL) { - jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_resident, GC_PAGE_SZ); gc_alloc_map_set(meta->data, GC_PAGE_ALLOCATED); goto exit; } @@ -191,7 +188,6 @@ void jl_gc_free_page(jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT madvise(p, decommit_size, MADV_DONTNEED); #endif msan_unpoison(p, decommit_size); - jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_resident, -decommit_size); } #ifdef __cplusplus diff --git a/src/gc.c b/src/gc.c index 8969969ab66f5f..8e37fa95a6cfc2 100644 --- a/src/gc.c +++ b/src/gc.c @@ -189,7 +189,6 @@ jl_gc_num_t gc_num = {0}; static size_t last_long_collect_interval; int gc_n_threads; jl_ptls_t* gc_all_tls_states; -gc_heapstatus_t gc_heap_stats = {0}; int next_sweep_full = 0; const uint64_t _jl_buff_tag[3] = {0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull}; // aka 0xHEADER00 JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) @@ -678,27 +677,21 @@ static int64_t last_gc_total_bytes = 0; #ifdef _P64 typedef uint64_t memsize_t; static const size_t default_collect_interval = 5600 * 1024 * sizeof(void*); +static const size_t max_collect_interval = 1250000000UL; static size_t total_mem; // We expose this to the user/ci as jl_gc_set_max_memory static memsize_t max_total_memory = (memsize_t) 2 * 1024 * 1024 * 1024 * 1024 * 1024; #else typedef uint32_t memsize_t; static const size_t default_collect_interval = 3200 * 1024 * sizeof(void*); +static const size_t max_collect_interval = 500000000UL; // Work really hard to stay within 2GB // Alternative is to risk running out of address space // on 32 bit architectures. #define MAX32HEAP 1536 * 1024 * 1024 static memsize_t max_total_memory = (memsize_t) MAX32HEAP; #endif -// heuristic stuff for https://dl.acm.org/doi/10.1145/3563323 -static uint64_t old_pause_time = 0; -static uint64_t old_mut_time = 0; -static uint64_t old_heap_size = 0; -static uint64_t old_alloc_diff = 0; -static uint64_t old_freed_diff = 0; static uint64_t gc_end_time = 0; -static int thrash_counter = 0; -static int thrashing = 0; // global variables for GC stats // Resetting the object to a young object, this is used when marking the @@ -755,8 +748,9 @@ int under_pressure = 0; // Full collection heuristics static int64_t live_bytes = 0; static int64_t promoted_bytes = 0; +static int64_t last_full_live = 0; // live_bytes after last full collection static int64_t last_live_bytes = 0; // live_bytes at last collection -static int64_t t_start = 0; // Time GC starts; +static int64_t grown_heap_age = 0; // # of collects since live_bytes grew and remained #ifdef __GLIBC__ // maxrss at last malloc_trim static int64_t last_trim_maxrss = 0; @@ -937,7 +931,7 @@ void gc_setmark_buf(jl_ptls_t ptls, void *o, uint8_t mark_mode, size_t minsz) JL STATIC_INLINE void maybe_collect(jl_ptls_t ptls) { - if (jl_atomic_load_relaxed(&gc_heap_stats.heap_size) >= jl_atomic_load_relaxed(&gc_heap_stats.heap_target) || jl_gc_debug_check_other()) { + if (jl_atomic_load_relaxed(&ptls->gc_num.allocd) >= 0 || jl_gc_debug_check_other()) { jl_gc_collect(JL_GC_AUTO); } else { @@ -1026,13 +1020,6 @@ STATIC_INLINE jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz) jl_atomic_load_relaxed(&ptls->gc_num.allocd) + allocsz); jl_atomic_store_relaxed(&ptls->gc_num.bigalloc, jl_atomic_load_relaxed(&ptls->gc_num.bigalloc) + 1); - uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc); - if (alloc_acc + allocsz < 16*1024) - jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc + allocsz); - else { - jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc + allocsz); - jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0); - } #ifdef MEMDEBUG memset(v, 0xee, allocsz); #endif @@ -1078,8 +1065,6 @@ static bigval_t **sweep_big_list(int sweep_full, bigval_t **pv) JL_NOTSAFEPOINT if (nxt) nxt->prev = pv; gc_num.freed += v->sz&~3; - jl_atomic_store_relaxed(&gc_heap_stats.heap_size, - jl_atomic_load_relaxed(&gc_heap_stats.heap_size) - (v->sz&~3)); #ifdef MEMDEBUG memset(v, 0xbb, v->sz&~3); #endif @@ -1139,13 +1124,6 @@ void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT jl_ptls_t ptls = jl_current_task->ptls; jl_atomic_store_relaxed(&ptls->gc_num.allocd, jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz); - uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc); - if (alloc_acc + sz < 16*1024) - jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc + sz); - else { - jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc + sz); - jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0); - } } static void combine_thread_gc_counts(jl_gc_num_t *dest) JL_NOTSAFEPOINT @@ -1158,16 +1136,12 @@ static void combine_thread_gc_counts(jl_gc_num_t *dest) JL_NOTSAFEPOINT jl_ptls_t ptls = gc_all_tls_states[i]; if (ptls) { dest->allocd += (jl_atomic_load_relaxed(&ptls->gc_num.allocd) + gc_num.interval); + dest->freed += jl_atomic_load_relaxed(&ptls->gc_num.freed); dest->malloc += jl_atomic_load_relaxed(&ptls->gc_num.malloc); dest->realloc += jl_atomic_load_relaxed(&ptls->gc_num.realloc); dest->poolalloc += jl_atomic_load_relaxed(&ptls->gc_num.poolalloc); dest->bigalloc += jl_atomic_load_relaxed(&ptls->gc_num.bigalloc); - uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc); - uint64_t free_acc = jl_atomic_load_relaxed(&ptls->gc_num.free_acc); - dest->freed += jl_atomic_load_relaxed(&ptls->gc_num.free_acc); - jl_atomic_store_relaxed(&gc_heap_stats.heap_size, alloc_acc - free_acc + jl_atomic_load_relaxed(&gc_heap_stats.heap_size)); - jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0); - jl_atomic_store_relaxed(&ptls->gc_num.free_acc, 0); + dest->freecall += jl_atomic_load_relaxed(&ptls->gc_num.freecall); } } } @@ -1224,8 +1198,6 @@ static void jl_gc_free_array(jl_array_t *a) JL_NOTSAFEPOINT jl_free_aligned(d); else free(d); - jl_atomic_store_relaxed(&gc_heap_stats.heap_size, - jl_atomic_load_relaxed(&gc_heap_stats.heap_size) - jl_array_nbytes(a)); gc_num.freed += jl_array_nbytes(a); gc_num.freecall++; } @@ -1300,7 +1272,6 @@ static NOINLINE jl_taggedvalue_t *gc_add_page(jl_gc_pool_t *p) JL_NOTSAFEPOINT set_page_metadata(pg); push_page_metadata_back(&ptls->page_metadata_allocd, pg); jl_taggedvalue_t *fl = gc_reset_page(ptls, p, pg); - jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, GC_PAGE_SZ); p->newpages = fl; return fl; } @@ -1495,10 +1466,8 @@ static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allo else if (freed_lazily) { gc_alloc_map_set(pg->data, GC_PAGE_LAZILY_FREED); push_page_metadata_back(lazily_freed, pg); - jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -GC_PAGE_SZ); } else { - jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -GC_PAGE_SZ); #ifdef _P64 // only enable concurrent sweeping on 64bit if (jl_n_sweepthreads == 0) { jl_gc_free_page(pg); @@ -3138,8 +3107,6 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) jl_gc_markqueue_t *mq = &ptls->mark_queue; uint64_t gc_start_time = jl_hrtime(); - uint64_t mutator_time = gc_start_time - gc_end_time; - uint64_t before_free_heap_size = jl_atomic_load_relaxed(&gc_heap_stats.heap_size); int64_t last_perm_scanned_bytes = perm_scanned_bytes; uint64_t start_mark_time = jl_hrtime(); JL_PROBE_GC_MARK_BEGIN(); @@ -3230,12 +3197,16 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) uint64_t mark_time = end_mark_time - start_mark_time; gc_num.mark_time = mark_time; gc_num.total_mark_time += mark_time; + int64_t actual_allocd = gc_num.allocd; gc_settime_postmark_end(); // marking is over // Flush everything in mark cache gc_sync_all_caches_nolock(ptls); + int64_t live_sz_ub = live_bytes + actual_allocd; + int64_t live_sz_est = scanned_bytes + perm_scanned_bytes; + int64_t estimate_freed = live_sz_ub - live_sz_est; gc_verify(ptls); gc_stats_all_pool(); @@ -3246,16 +3217,53 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) if (!prev_sweep_full) promoted_bytes += perm_scanned_bytes - last_perm_scanned_bytes; // 5. next collection decision - int remset_nptr = 0; - int sweep_full = next_sweep_full; - int recollect = 0; + int not_freed_enough = (collection == JL_GC_AUTO) && estimate_freed < (7*(actual_allocd/10)); + int nptr = 0; assert(gc_n_threads); for (int i = 0; i < gc_n_threads; i++) { jl_ptls_t ptls2 = gc_all_tls_states[i]; if (ptls2 != NULL) - remset_nptr += ptls2->heap.remset_nptr; + nptr += ptls2->heap.remset_nptr; + } + int large_frontier = nptr*sizeof(void*) >= default_collect_interval; // many pointers in the intergen frontier => "quick" mark is not quick + // trigger a full collection if the number of live bytes doubles since the last full + // collection and then remains at least that high for a while. + if (grown_heap_age == 0) { + if (live_bytes > 2 * last_full_live) + grown_heap_age = 1; + } + else if (live_bytes >= last_live_bytes) { + grown_heap_age++; + } + int sweep_full = 0; + int recollect = 0; + if ((large_frontier || + ((not_freed_enough || promoted_bytes >= gc_num.interval) && + (promoted_bytes >= default_collect_interval || prev_sweep_full)) || + grown_heap_age > 1) && gc_num.pause > 1) { + sweep_full = 1; + } + // update heuristics only if this GC was automatically triggered + if (collection == JL_GC_AUTO) { + if (sweep_full) { + if (large_frontier) + gc_num.interval = last_long_collect_interval; + if (not_freed_enough || large_frontier) { + if (gc_num.interval <= 2*(max_collect_interval/5)) { + gc_num.interval = 5 * (gc_num.interval / 2); + } + } + last_long_collect_interval = gc_num.interval; + } + else { + // reset interval to default, or at least half of live_bytes + int64_t half = live_bytes/2; + if (default_collect_interval < half && half <= max_collect_interval) + gc_num.interval = half; + else + gc_num.interval = default_collect_interval; + } } - (void)remset_nptr; //Use this information for something? // If the live data outgrows the suggested max_total_memory @@ -3309,56 +3317,6 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) gc_num.last_full_sweep = gc_end_time; } - size_t heap_size = jl_atomic_load_relaxed(&gc_heap_stats.heap_size); - double target_allocs = 0.0; - double min_interval = default_collect_interval; - if (collection == JL_GC_AUTO) { - uint64_t alloc_diff = before_free_heap_size - old_heap_size; - uint64_t freed_diff = before_free_heap_size - heap_size; - double alloc_smooth_factor = 0.95; - double collect_smooth_factor = 0.5; - double tuning_factor = 0.03; - double alloc_mem = jl_gc_smooth(old_alloc_diff, alloc_diff, alloc_smooth_factor); - double alloc_time = jl_gc_smooth(old_mut_time, mutator_time + sweep_time, alloc_smooth_factor); // Charge sweeping to the mutator - double gc_mem = jl_gc_smooth(old_freed_diff, freed_diff, collect_smooth_factor); - double gc_time = jl_gc_smooth(old_pause_time, pause - sweep_time, collect_smooth_factor); - old_alloc_diff = alloc_diff; - old_mut_time = mutator_time; - old_freed_diff = freed_diff; - old_pause_time = pause; - old_heap_size = heap_size; // TODO: Update these values dynamically instead of just during the GC - if (gc_time > alloc_time * 95 && !(thrash_counter < 4)) - thrash_counter += 1; - else if (thrash_counter > 0) - thrash_counter -= 1; - if (alloc_mem != 0 && alloc_time != 0 && gc_mem != 0 && gc_time != 0 ) { - double alloc_rate = alloc_mem/alloc_time; - double gc_rate = gc_mem/gc_time; - target_allocs = sqrt(((double)heap_size/min_interval * alloc_rate)/(gc_rate * tuning_factor)); // work on multiples of min interval - } - } - if (thrashing == 0 && thrash_counter >= 3) - thrashing = 1; - else if (thrashing == 1 && thrash_counter <= 2) - thrashing = 0; // maybe we should report this to the user or error out? - - int bad_result = (target_allocs*min_interval + heap_size) > 2 * jl_atomic_load_relaxed(&gc_heap_stats.heap_target); // Don't follow through on a bad decision - if (target_allocs == 0.0 || thrashing || bad_result) // If we are thrashing go back to default - target_allocs = 2*sqrt((double)heap_size/min_interval); - uint64_t target_heap = (uint64_t)target_allocs*min_interval + heap_size; - if (target_heap > max_total_memory && !thrashing) // Allow it to go over if we are thrashing if we die we die - target_heap = max_total_memory; - else if (target_heap < default_collect_interval) - target_heap = default_collect_interval; - jl_atomic_store_relaxed(&gc_heap_stats.heap_target, target_heap); - - double old_ratio = (double)promoted_bytes/(double)heap_size; - if (heap_size > max_total_memory * 0.8 || old_ratio > 0.15) - next_sweep_full = 1; - else - next_sweep_full = 0; - if (heap_size > max_total_memory * 0.8 || thrashing) - under_pressure = 1; // sweeping is over // 7. if it is a quick sweep, put back the remembered objects in queued state // so that we don't trigger the barrier again on them. @@ -3390,28 +3348,32 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) } #endif - _report_gc_finished(pause, gc_num.freed, sweep_full, recollect, live_bytes); + _report_gc_finished(pause, gc_num.freed, sweep_full, recollect); + + gc_final_pause_end(gc_start_time, gc_end_time); + gc_time_sweep_pause(gc_end_time, actual_allocd, live_bytes, + estimate_freed, sweep_full); + gc_num.full_sweep += sweep_full; uint64_t max_memory = last_live_bytes + gc_num.allocd; if (max_memory > gc_num.max_memory) { gc_num.max_memory = max_memory; } - gc_final_pause_end(gc_start_time, gc_end_time); - gc_time_sweep_pause(gc_end_time, allocd, live_bytes, - estimate_freed, sweep_full); - gc_num.full_sweep += sweep_full; + + gc_num.allocd = 0; last_live_bytes = live_bytes; - live_bytes += -gc_num.freed + gc_num.allocd; + live_bytes += -gc_num.freed + actual_allocd; jl_timing_counter_dec(JL_TIMING_COUNTER_HeapSize, gc_num.freed); - gc_time_summary(sweep_full, t_start, gc_end_time, gc_num.freed, live_bytes, gc_num.interval, pause, gc_num.time_to_safepoint, gc_num.mark_time, gc_num.sweep_time); - + if (prev_sweep_full) { + last_full_live = live_bytes; + grown_heap_age = 0; + } prev_sweep_full = sweep_full; gc_num.pause += !recollect; gc_num.total_time += pause; - gc_num.allocd = 0; gc_num.freed = 0; if (pause > gc_num.max_pause) { gc_num.max_pause = pause; @@ -3597,7 +3559,6 @@ void jl_gc_init(void) arraylist_new(&finalizer_list_marked, 0); arraylist_new(&to_finalize, 0); - jl_atomic_store_relaxed(&gc_heap_stats.heap_target, default_collect_interval); gc_num.interval = default_collect_interval; last_long_collect_interval = default_collect_interval; gc_num.allocd = 0; @@ -3612,8 +3573,6 @@ void jl_gc_init(void) #endif if (jl_options.heap_size_hint) jl_gc_set_max_memory(jl_options.heap_size_hint - 250*1024*1024); - - t_start = jl_hrtime(); } JL_DLLEXPORT void jl_gc_set_max_memory(uint64_t max_mem) @@ -3653,13 +3612,6 @@ JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz) jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz); jl_atomic_store_relaxed(&ptls->gc_num.malloc, jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1); - uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc); - if (alloc_acc + sz < 16*1024) - jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc + sz); - else { - jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc + sz); - jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0); - } } return data; } @@ -3676,13 +3628,6 @@ JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz) jl_atomic_load_relaxed(&ptls->gc_num.allocd) + nm*sz); jl_atomic_store_relaxed(&ptls->gc_num.malloc, jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1); - uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc); - if (alloc_acc + sz < 16*1024) - jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc + sz * nm); - else { - jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc + sz * nm); - jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0); - } } return data; } @@ -3694,13 +3639,10 @@ JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz) free(p); if (pgcstack != NULL && ct->world_age) { jl_ptls_t ptls = ct->ptls; - uint64_t free_acc = jl_atomic_load_relaxed(&ptls->gc_num.free_acc); - if (free_acc + sz < 16*1024) - jl_atomic_store_relaxed(&ptls->gc_num.free_acc, free_acc + sz); - else { - jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -(free_acc + sz)); - jl_atomic_store_relaxed(&ptls->gc_num.free_acc, 0); - } + jl_atomic_store_relaxed(&ptls->gc_num.freed, + jl_atomic_load_relaxed(&ptls->gc_num.freed) + sz); + jl_atomic_store_relaxed(&ptls->gc_num.freecall, + jl_atomic_load_relaxed(&ptls->gc_num.freecall) + 1); } } @@ -3717,27 +3659,6 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size jl_atomic_load_relaxed(&ptls->gc_num.allocd) + (sz - old)); jl_atomic_store_relaxed(&ptls->gc_num.realloc, jl_atomic_load_relaxed(&ptls->gc_num.realloc) + 1); - - int64_t diff = sz - old; - if (diff < 0) { - diff = -diff; - uint64_t free_acc = jl_atomic_load_relaxed(&ptls->gc_num.free_acc); - if (free_acc + diff < 16*1024) - jl_atomic_store_relaxed(&ptls->gc_num.free_acc, free_acc + diff); - else { - jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -(free_acc + diff)); - jl_atomic_store_relaxed(&ptls->gc_num.free_acc, 0); - } - } - else { - uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc); - if (alloc_acc + diff < 16*1024) - jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc + diff); - else { - jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc + diff); - jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0); - } - } } return data; } @@ -3821,13 +3742,6 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz) jl_atomic_load_relaxed(&ptls->gc_num.allocd) + allocsz); jl_atomic_store_relaxed(&ptls->gc_num.malloc, jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1); - uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc); - if (alloc_acc + allocsz < 16*1024) - jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc + allocsz); - else { - jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc + allocsz); - jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0); - } #ifdef _OS_WINDOWS_ SetLastError(last_error); #endif @@ -3872,29 +3786,9 @@ static void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t olds jl_atomic_load_relaxed(&ptls->gc_num.allocd) + (allocsz - oldsz)); jl_atomic_store_relaxed(&ptls->gc_num.realloc, jl_atomic_load_relaxed(&ptls->gc_num.realloc) + 1); - - int64_t diff = allocsz - oldsz; - if (diff < 0) { - diff = -diff; - uint64_t free_acc = jl_atomic_load_relaxed(&ptls->gc_num.free_acc); - if (free_acc + diff < 16*1024) - jl_atomic_store_relaxed(&ptls->gc_num.free_acc, free_acc + diff); - else { - jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -(free_acc + diff)); - jl_atomic_store_relaxed(&ptls->gc_num.free_acc, 0); - } + if (allocsz > oldsz) { + maybe_record_alloc_to_profile((jl_value_t*)b, allocsz - oldsz, (jl_datatype_t*)jl_buff_tag); } - else { - uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc); - if (alloc_acc + diff < 16*1024) - jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc + diff); - else { - jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc + diff); - jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0); - } - } - - maybe_record_alloc_to_profile((jl_value_t*)b, sz, jl_gc_unknown_type_tag); return b; } @@ -3967,7 +3861,6 @@ static void *gc_perm_alloc_large(size_t sz, int zero, unsigned align, unsigned o #ifdef _OS_WINDOWS_ SetLastError(last_error); #endif - jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size,sz); errno = last_errno; jl_may_leak(base); assert(align > 0); diff --git a/src/gc.h b/src/gc.h index 4bfe5dc328e9de..a00157c0d34ba0 100644 --- a/src/gc.h +++ b/src/gc.h @@ -258,13 +258,6 @@ typedef struct { pagetable1_t *meta1[REGION2_PG_COUNT]; } pagetable_t; -typedef struct { - _Atomic(size_t) bytes_mapped; - _Atomic(size_t) bytes_resident; - _Atomic(size_t) heap_size; - _Atomic(size_t) heap_target; -} gc_heapstatus_t; - #define GC_PAGE_UNMAPPED 0 #define GC_PAGE_ALLOCATED 1 #define GC_PAGE_LAZILY_FREED 2 @@ -382,7 +375,6 @@ extern int64_t lazy_freed_pages; extern int gc_first_tid; extern int gc_n_threads; extern jl_ptls_t* gc_all_tls_states; -extern gc_heapstatus_t gc_heap_stats; STATIC_INLINE bigval_t *bigval_header(jl_taggedvalue_t *o) JL_NOTSAFEPOINT { @@ -646,7 +638,8 @@ void gc_count_pool(void); size_t jl_array_nbytes(jl_array_t *a) JL_NOTSAFEPOINT; JL_DLLEXPORT void jl_enable_gc_logging(int enable); -void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect, int64_t live_bytes) JL_NOTSAFEPOINT; +JL_DLLEXPORT uint32_t jl_get_num_stack_mappings(void); +void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect) JL_NOTSAFEPOINT; #ifdef __cplusplus } diff --git a/src/julia_threads.h b/src/julia_threads.h index d4cbb88e619ba7..f4c235243e6840 100644 --- a/src/julia_threads.h +++ b/src/julia_threads.h @@ -130,12 +130,12 @@ typedef struct { typedef struct { _Atomic(int64_t) allocd; + _Atomic(int64_t) freed; _Atomic(uint64_t) malloc; _Atomic(uint64_t) realloc; _Atomic(uint64_t) poolalloc; _Atomic(uint64_t) bigalloc; - _Atomic(int64_t) free_acc; - _Atomic(uint64_t) alloc_acc; + _Atomic(uint64_t) freecall; } jl_thread_gc_num_t; typedef struct {