From aa48cb0ef744e6d8e7541e13212af422c0ad1aee Mon Sep 17 00:00:00 2001 From: Jian Fang <131804380+JianFangAtRai@users.noreply.github.com> Date: Wed, 10 Jan 2024 18:35:41 -0800 Subject: [PATCH] Change to streaming out the heap snapshot data (#1) * Streaming the heap snapshot! This should prevent the engine from OOMing while recording the snapshot! Now we just need to sample the files, either online, before downloading, or offline after downloading :) If we're gonna do it offline, we'll want to gzip the files before downloading them. * Allow custom filename; use original API * Support legacy heap snapshot interface. Add reassembly function. * Add tests * Apply suggestions from code review * Update src/gc-heap-snapshot.cpp * Change to always save the parts in the same directory This way you can always recover from an OOM * Fix bug in reassembler: from_node and to_node were in the wrong order * Fix correctness mistake: The edges have to be reordered according to the node order. That's the whole reason this is tricky. But i'm not sure now whether the SoAs approach is actually an optimization.... It seems like we should probably prefer to inline the Edges right into the vector, rather than having to do another random lookup into the edges table? * Debugging messed up edge array idxs * Disable log message * Write the .nodes and .edges as binary data * Remove unnecessary logging * fix merge issues * attempt to add back the orphan node checking logic --------- Co-authored-by: Nathan Daly Co-authored-by: Nathan Daly --- src/gc-heap-snapshot.cpp | 295 ++++++++---------- src/gc-heap-snapshot.h | 3 +- stdlib/Profile/src/Profile.jl | 71 ++++- stdlib/Profile/src/heapsnapshot_reassemble.jl | 182 +++++++++++ .../Profile/test/heapsnapshot_reassemble.jl | 31 ++ stdlib/Profile/test/runtests.jl | 1 + 6 files changed, 413 insertions(+), 170 deletions(-) create mode 100644 stdlib/Profile/src/heapsnapshot_reassemble.jl create mode 100644 stdlib/Profile/test/heapsnapshot_reassemble.jl diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 24df141d053f4..5311840811f2c 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -58,8 +58,9 @@ void print_str_escape_json(ios_t *stream, StringRef s) // mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L2598-L2601 struct Edge { - size_t type; // These *must* match the Enums on the JS side; control interpretation of name_or_index. + uint8_t type; // These *must* match the Enums on the JS side; control interpretation of name_or_index. size_t name_or_index; // name of the field (for objects/modules) or index of array + size_t from_node; // This is a deviation from the .heapsnapshot format to support streaming. size_t to_node; }; @@ -70,15 +71,14 @@ struct Edge { const int k_node_number_of_fields = 7; struct Node { - size_t type; // index into snapshot->node_types + uint8_t type; // index into snapshot->node_types size_t name; size_t id; // This should be a globally-unique counter, but we use the memory address size_t self_size; size_t trace_node_id; // This is ALWAYS 0 in Javascript heap-snapshots. // whether the from_node is attached or detached from the main application state // https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/include/v8-profiler.h#L739-L745 - int detachedness; // 0 - unknown, 1 - attached, 2 - detached - SmallVector edges; + uint8_t detachedness; // 0 - unknown, 1 - attached, 2 - detached ~Node() JL_NOTSAFEPOINT = default; }; @@ -111,15 +111,27 @@ struct StringTable { }; struct HeapSnapshot { - SmallVector nodes; - // edges are stored on each from_node - StringTable names; StringTable node_types; StringTable edge_types; DenseMap node_ptr_to_index_map; - size_t num_edges = 0; // For metadata, updated as you add each edge. Needed because edges owned by nodes. + size_t num_nodes = 0; // Since we stream out to files, + size_t num_edges = 0; // we need to track the counts here. + + // Node internal_root; + + // Used for streaming + // Since nodes and edges are just one giant array of integers, we stream them as + // *BINARY DATA*: a sequence of bytes, each of which is a 64-bit integer (big enough to + // fit the pointer ids). + ios_t *nodes; + ios_t *edges; + // These files are written out as json data. + ios_t *strings; + ios_t *json; + + size_t internal_root_idx = 0; // node index of the internal root node size_t _gc_root_idx = 1; // node index of the GC roots node size_t _gc_finlist_root_idx = 2; // node index of the GC finlist roots node }; @@ -130,17 +142,22 @@ int gc_heap_snapshot_enabled = 0; HeapSnapshot *g_snapshot = nullptr; extern jl_mutex_t heapsnapshot_lock; +void final_serialize_heap_snapshot(ios_t *json, ios_t *strings, HeapSnapshot &snapshot, char all_one); void serialize_heap_snapshot(ios_t *stream, HeapSnapshot &snapshot, char all_one); static inline void _record_gc_edge(const char *edge_type, jl_value_t *a, jl_value_t *b, size_t name_or_index) JL_NOTSAFEPOINT; -void _record_gc_just_edge(const char *edge_type, Node &from_node, size_t to_idx, size_t name_or_idx) JL_NOTSAFEPOINT; +void _record_gc_just_edge(const char *edge_type, size_t from_idx, size_t to_idx, size_t name_or_idx) JL_NOTSAFEPOINT; void _add_synthetic_root_entries(HeapSnapshot *snapshot); -JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *stream, char all_one) +JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *nodes, ios_t *edges, + ios_t *strings, ios_t *json, char all_one) { HeapSnapshot snapshot; - _add_synthetic_root_entries(&snapshot); + snapshot.nodes = nodes; + snapshot.edges = edges; + snapshot.strings = strings; + snapshot.json = json; jl_mutex_lock(&heapsnapshot_lock); @@ -148,6 +165,8 @@ JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *stream, char all_one) g_snapshot = &snapshot; gc_heap_snapshot_enabled = true; + _add_synthetic_root_entries(&snapshot); + // Do a full GC mark (and incremental sweep), which will invoke our callbacks on `g_snapshot` jl_gc_collect(JL_GC_FULL); @@ -159,7 +178,33 @@ JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *stream, char all_one) // When we return, the snapshot is full // Dump the snapshot - serialize_heap_snapshot((ios_t*)stream, snapshot, all_one); + final_serialize_heap_snapshot((ios_t*)json, (ios_t*)strings, snapshot, all_one); +} + +void serialize_node(HeapSnapshot *snapshot, const Node &node) +{ + // ["type","name","id","self_size","edge_count","trace_node_id","detachedness"] + ios_write(snapshot->nodes, (char*)&node.type, sizeof(node.type)); + ios_write(snapshot->nodes, (char*)&node.name, sizeof(node.name)); + ios_write(snapshot->nodes, (char*)&node.id, sizeof(node.id)); + ios_write(snapshot->nodes, (char*)&node.self_size, sizeof(node.self_size)); + // NOTE: We don't write edge_count, since it's always 0. It will be reconstructed in + // post-processing. + ios_write(snapshot->nodes, (char*)&node.trace_node_id, sizeof(node.trace_node_id)); + ios_write(snapshot->nodes, (char*)&node.detachedness, sizeof(node.detachedness)); + + g_snapshot->num_nodes += 1; +} +void serialize_edge(HeapSnapshot *snapshot, const Edge &edge) +{ + // ["type","name_or_index","to_node"] + ios_write(snapshot->edges, (char*)&edge.type, sizeof(edge.type)); + ios_write(snapshot->edges, (char*)&edge.name_or_index, sizeof(edge.name_or_index)); + // NOTE: Row numbers for nodes (not adjusted for k_node_number_of_fields) + ios_write(snapshot->edges, (char*)&edge.from_node, sizeof(edge.from_node)); + ios_write(snapshot->edges, (char*)&edge.to_node, sizeof(edge.to_node)); + + g_snapshot->num_edges += 1; } // mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L212 @@ -169,60 +214,59 @@ void _add_synthetic_root_entries(HeapSnapshot *snapshot) // adds a node at id 0 which is the "uber root": // a synthetic node which points to all the GC roots. Node internal_root{ - snapshot->node_types.find_or_create_string_id("synthetic"), + (uint8_t)snapshot->node_types.find_or_create_string_id("synthetic"), snapshot->names.find_or_create_string_id(""), // name 0, // id 0, // size 0, // size_t trace_node_id (unused) - 0, // int detachedness; // 0 - unknown, 1 - attached; 2 - detached - SmallVector() // outgoing edges + 0 // int detachedness; // 0 - unknown, 1 - attached; 2 - detached }; - snapshot->nodes.push_back(internal_root); + serialize_node(snapshot, internal_root); // Add a node for the GC roots - snapshot->_gc_root_idx = snapshot->nodes.size(); + snapshot->_gc_root_idx = snapshot->internal_root_idx + 1; Node gc_roots{ - snapshot->node_types.find_or_create_string_id("synthetic"), + (uint8_t)snapshot->node_types.find_or_create_string_id("synthetic"), snapshot->names.find_or_create_string_id("GC roots"), // name snapshot->_gc_root_idx, // id 0, // size 0, // size_t trace_node_id (unused) - 0, // int detachedness; // 0 - unknown, 1 - attached; 2 - detached - SmallVector() // outgoing edges + 0 // int detachedness; // 0 - unknown, 1 - attached; 2 - detached }; - snapshot->nodes.push_back(gc_roots); - snapshot->nodes.front().edges.push_back(Edge{ + serialize_node(snapshot, gc_roots); + Edge root_to_gc_roots{ snapshot->edge_types.find_or_create_string_id("internal"), snapshot->names.find_or_create_string_id("GC roots"), // edge label + snapshot->internal_root_idx, // from snapshot->_gc_root_idx // to - }); - snapshot->num_edges += 1; + }; + serialize_edge(snapshot, root_to_gc_roots); // add a node for the gc finalizer list roots - snapshot->_gc_finlist_root_idx = snapshot->nodes.size(); + snapshot->_gc_finlist_root_idx = snapshot->internal_root_idx + 2; Node gc_finlist_roots{ - snapshot->node_types.find_or_create_string_id("synthetic"), + (uint8_t)snapshot->node_types.find_or_create_string_id("synthetic"), snapshot->names.find_or_create_string_id("GC finalizer list roots"), // name snapshot->_gc_finlist_root_idx, // id 0, // size 0, // size_t trace_node_id (unused) - 0, // int detachedness; // 0 - unknown, 1 - attached; 2 - detached - SmallVector() // outgoing edges + 0 // int detachedness; // 0 - unknown, 1 - attached; 2 - detached }; - snapshot->nodes.push_back(gc_finlist_roots); - snapshot->nodes.front().edges.push_back(Edge{ + serialize_node(snapshot, gc_finlist_roots); + Edge root_to_gc_finlist_roots{ snapshot->edge_types.find_or_create_string_id("internal"), - snapshot->names.find_or_create_string_id("GC finlist roots"), // edge label + snapshot->names.find_or_create_string_id("GC finalizer list roots"), // edge label + snapshot->internal_root_idx, // from snapshot->_gc_finlist_root_idx // to - }); - snapshot->num_edges += 1; + }; + serialize_edge(snapshot, root_to_gc_finlist_roots); } // mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L597-L597 // returns the index of the new node size_t record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT { - auto val = g_snapshot->node_ptr_to_index_map.insert(make_pair(a, g_snapshot->nodes.size())); + auto val = g_snapshot->node_ptr_to_index_map.insert(make_pair(a, g_snapshot->num_nodes)); if (!val.second) { return val.first->second; } @@ -292,8 +336,8 @@ size_t record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT name = StringRef((const char*)str_.buf, str_.size); } - g_snapshot->nodes.push_back(Node{ - g_snapshot->node_types.find_or_create_string_id(node_type), // size_t type; + auto node = Node{ + (uint8_t)g_snapshot->node_types.find_or_create_string_id(node_type), // size_t type; g_snapshot->names.find_or_create_string_id(name), // size_t name; (size_t)a, // size_t id; // We add 1 to self-size for the type tag that all heap-allocated objects have. @@ -301,8 +345,8 @@ size_t record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT sizeof(void*) + self_size, // size_t self_size; 0, // size_t trace_node_id (unused) 0, // int detachedness; // 0 - unknown, 1 - attached; 2 - detached - SmallVector() // outgoing edges - }); + }; + serialize_node(g_snapshot, node); if (ios_need_close) ios_close(&str_); @@ -312,20 +356,20 @@ size_t record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT static size_t record_pointer_to_gc_snapshot(void *a, size_t bytes, StringRef name) JL_NOTSAFEPOINT { - auto val = g_snapshot->node_ptr_to_index_map.insert(make_pair(a, g_snapshot->nodes.size())); + auto val = g_snapshot->node_ptr_to_index_map.insert(make_pair(a, g_snapshot->num_nodes)); if (!val.second) { return val.first->second; } - g_snapshot->nodes.push_back(Node{ - g_snapshot->node_types.find_or_create_string_id( "object"), // size_t type; + auto node = Node{ + (uint8_t)g_snapshot->node_types.find_or_create_string_id( "object"), // size_t type; g_snapshot->names.find_or_create_string_id(name), // size_t name; (size_t)a, // size_t id; bytes, // size_t self_size; 0, // size_t trace_node_id (unused) 0, // int detachedness; // 0 - unknown, 1 - attached; 2 - detached - SmallVector() // outgoing edges - }); + }; + serialize_node(g_snapshot, node); return val.first->second; } @@ -361,36 +405,29 @@ static string _fieldpath_for_slot(void *obj, void *slot) JL_NOTSAFEPOINT } } - void _gc_heap_snapshot_record_root(jl_value_t *root, char *name) JL_NOTSAFEPOINT { - record_node_to_gc_snapshot(root); - - auto &internal_root = g_snapshot->nodes.front(); - auto to_node_idx = g_snapshot->node_ptr_to_index_map[root]; + size_t to_node_idx = record_node_to_gc_snapshot(root); auto edge_label = g_snapshot->names.find_or_create_string_id(name); - _record_gc_just_edge("internal", internal_root, to_node_idx, edge_label); + _record_gc_just_edge("internal", g_snapshot->internal_root_idx, to_node_idx, edge_label); } void _gc_heap_snapshot_record_gc_roots(jl_value_t *root, char *name) JL_NOTSAFEPOINT { - record_node_to_gc_snapshot(root); - - auto from_node_idx = g_snapshot->_gc_root_idx; auto to_node_idx = record_node_to_gc_snapshot(root); auto edge_label = g_snapshot->names.find_or_create_string_id(name); - _record_gc_just_edge("internal", g_snapshot->nodes[from_node_idx], to_node_idx, edge_label); + + _record_gc_just_edge("internal", g_snapshot->_gc_root_idx, to_node_idx, edge_label); } void _gc_heap_snapshot_record_finlist(jl_value_t *obj, size_t index) JL_NOTSAFEPOINT { - auto from_node_idx = g_snapshot->_gc_finlist_root_idx; auto to_node_idx = record_node_to_gc_snapshot(obj); ostringstream ss; ss << "finlist-" << index; auto edge_label = g_snapshot->names.find_or_create_string_id(ss.str()); - _record_gc_just_edge("internal", g_snapshot->nodes[from_node_idx], to_node_idx, edge_label); + _record_gc_just_edge("internal", g_snapshot->_gc_finlist_root_idx, to_node_idx, edge_label); } // Add a node to the heap snapshot representing a Julia stack frame. @@ -399,20 +436,20 @@ void _gc_heap_snapshot_record_finlist(jl_value_t *obj, size_t index) JL_NOTSAFEP // Stack frame nodes point at the objects they have as local variables. size_t _record_stack_frame_node(HeapSnapshot *snapshot, void *frame) JL_NOTSAFEPOINT { - auto val = g_snapshot->node_ptr_to_index_map.insert(make_pair(frame, g_snapshot->nodes.size())); + auto val = g_snapshot->node_ptr_to_index_map.insert(make_pair(frame, g_snapshot->num_nodes)); if (!val.second) { return val.first->second; } - snapshot->nodes.push_back(Node{ - snapshot->node_types.find_or_create_string_id("synthetic"), + auto node = Node{ + (uint8_t)snapshot->node_types.find_or_create_string_id("synthetic"), snapshot->names.find_or_create_string_id("(stack frame)"), // name (size_t)frame, // id 1, // size 0, // size_t trace_node_id (unused) 0, // int detachedness; // 0 - unknown, 1 - attached; 2 - detached - SmallVector() // outgoing edges - }); + }; + serialize_node(snapshot, node); return val.first->second; } @@ -421,30 +458,27 @@ void _gc_heap_snapshot_record_frame_to_object_edge(void *from, jl_value_t *to) J { auto from_node_idx = _record_stack_frame_node(g_snapshot, (jl_gcframe_t*)from); auto to_idx = record_node_to_gc_snapshot(to); - Node &from_node = g_snapshot->nodes[from_node_idx]; auto name_idx = g_snapshot->names.find_or_create_string_id("local var"); - _record_gc_just_edge("internal", from_node, to_idx, name_idx); + _record_gc_just_edge("internal", from_node_idx, to_idx, name_idx); } void _gc_heap_snapshot_record_task_to_frame_edge(jl_task_t *from, void *to) JL_NOTSAFEPOINT { auto from_node_idx = record_node_to_gc_snapshot((jl_value_t*)from); auto to_node_idx = _record_stack_frame_node(g_snapshot, to); - Node &from_node = g_snapshot->nodes[from_node_idx]; auto name_idx = g_snapshot->names.find_or_create_string_id("stack"); - _record_gc_just_edge("internal", from_node, to_node_idx, name_idx); + _record_gc_just_edge("internal", from_node_idx, to_node_idx, name_idx); } void _gc_heap_snapshot_record_frame_to_frame_edge(jl_gcframe_t *from, jl_gcframe_t *to) JL_NOTSAFEPOINT { auto from_node_idx = _record_stack_frame_node(g_snapshot, from); auto to_node_idx = _record_stack_frame_node(g_snapshot, to); - Node &from_node = g_snapshot->nodes[from_node_idx]; auto name_idx = g_snapshot->names.find_or_create_string_id("next frame"); - _record_gc_just_edge("internal", from_node, to_node_idx, name_idx); + _record_gc_just_edge("internal", from_node_idx, to_node_idx, name_idx); } void _gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index) JL_NOTSAFEPOINT @@ -464,12 +498,12 @@ void _gc_heap_snapshot_record_module_to_binding(jl_module_t *module, jl_value_t auto from_node_idx = record_node_to_gc_snapshot((jl_value_t*)module); auto to_bindings_idx = record_node_to_gc_snapshot(bindings); auto to_bindingkeyset_idx = record_node_to_gc_snapshot(bindingkeyset); - auto &from_node = g_snapshot->nodes[from_node_idx]; + if (to_bindings_idx > 0) { - _record_gc_just_edge("internal", from_node, to_bindings_idx, g_snapshot->names.find_or_create_string_id("bindings")); + _record_gc_just_edge("internal", from_node_idx, to_bindings_idx, g_snapshot->names.find_or_create_string_id("bindings")); } if (to_bindingkeyset_idx > 0) { - _record_gc_just_edge("internal", from_node, to_bindingkeyset_idx, g_snapshot->names.find_or_create_string_id("bindingkeyset")); + _record_gc_just_edge("internal", from_node_idx, to_bindingkeyset_idx, g_snapshot->names.find_or_create_string_id("bindingkeyset")); } } @@ -501,9 +535,8 @@ void _gc_heap_snapshot_record_hidden_edge(jl_value_t *from, void* to, size_t byt break; } auto to_node_idx = record_pointer_to_gc_snapshot(to, bytes, alloc_kind); - auto &from_node = g_snapshot->nodes[from_node_idx]; - _record_gc_just_edge("hidden", from_node, to_node_idx, name_or_idx); + _record_gc_just_edge("hidden", from_node_idx, to_node_idx, name_or_idx); } static inline void _record_gc_edge(const char *edge_type, jl_value_t *a, @@ -512,104 +545,46 @@ static inline void _record_gc_edge(const char *edge_type, jl_value_t *a, auto from_node_idx = record_node_to_gc_snapshot(a); auto to_node_idx = record_node_to_gc_snapshot(b); - auto &from_node = g_snapshot->nodes[from_node_idx]; - - _record_gc_just_edge(edge_type, from_node, to_node_idx, name_or_idx); + _record_gc_just_edge(edge_type, from_node_idx, to_node_idx, name_or_idx); } -void _record_gc_just_edge(const char *edge_type, Node &from_node, size_t to_idx, size_t name_or_idx) JL_NOTSAFEPOINT +void _record_gc_just_edge(const char *edge_type, size_t from_idx, size_t to_idx, size_t name_or_idx) JL_NOTSAFEPOINT { - from_node.edges.push_back(Edge{ - g_snapshot->edge_types.find_or_create_string_id(edge_type), + auto edge = Edge{ + (uint8_t)g_snapshot->edge_types.find_or_create_string_id(edge_type), name_or_idx, // edge label + from_idx, // from to_idx // to - }); + }; - g_snapshot->num_edges += 1; + serialize_edge(g_snapshot, edge); } -void serialize_heap_snapshot(ios_t *stream, HeapSnapshot &snapshot, char all_one) +void final_serialize_heap_snapshot(ios_t *json, ios_t *strings, HeapSnapshot &snapshot, char all_one) { // mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L2567-L2567 - ios_printf(stream, "{\"snapshot\":{"); - ios_printf(stream, "\"meta\":{"); - ios_printf(stream, "\"node_fields\":[\"type\",\"name\",\"id\",\"self_size\",\"edge_count\",\"trace_node_id\",\"detachedness\"],"); - ios_printf(stream, "\"node_types\":["); - snapshot.node_types.print_json_array(stream, false); - ios_printf(stream, ","); - ios_printf(stream, "\"string\", \"number\", \"number\", \"number\", \"number\", \"number\"],"); - ios_printf(stream, "\"edge_fields\":[\"type\",\"name_or_index\",\"to_node\"],"); - ios_printf(stream, "\"edge_types\":["); - snapshot.edge_types.print_json_array(stream, false); - ios_printf(stream, ","); - ios_printf(stream, "\"string_or_number\",\"from_node\"]"); - ios_printf(stream, "},\n"); // end "meta" - ios_printf(stream, "\"node_count\":%zu,", snapshot.nodes.size()); - ios_printf(stream, "\"edge_count\":%zu", snapshot.num_edges); - ios_printf(stream, "},\n"); // end "snapshot" - - ios_printf(stream, "\"nodes\":["); - bool first_node = true; - // use a set to track the nodes that do not have parents - set orphans; - for (const auto &from_node : snapshot.nodes) { - if (first_node) { - first_node = false; - } - else { - ios_printf(stream, ","); - } - // ["type","name","id","self_size","edge_count","trace_node_id","detachedness"] - ios_printf(stream, "%zu,%zu,%zu,%zu,%zu,%zu,%d\n", - from_node.type, - from_node.name, - from_node.id, - all_one ? (size_t)1 : from_node.self_size, - from_node.edges.size(), - from_node.trace_node_id, - from_node.detachedness); - if (from_node.id != snapshot._gc_root_idx && from_node.id != snapshot._gc_finlist_root_idx) { - // find the node index from the node object pointer - void * ptr = (void*)from_node.id; - size_t n_id = snapshot.node_ptr_to_index_map[ptr]; - orphans.insert(n_id); - } else { - orphans.insert(from_node.id); - } - } - ios_printf(stream, "],\n"); - - ios_printf(stream, "\"edges\":["); - bool first_edge = true; - for (const auto &from_node : snapshot.nodes) { - for (const auto &edge : from_node.edges) { - if (first_edge) { - first_edge = false; - } - else { - ios_printf(stream, ","); - } - ios_printf(stream, "%zu,%zu,%zu\n", - edge.type, - edge.name_or_index, - edge.to_node * k_node_number_of_fields); - auto n_id = edge.to_node; - auto it = orphans.find(n_id); - if (it != orphans.end()) { - // remove the node from the orphans if it has at least one incoming edge - orphans.erase(it); - } - } - } - ios_printf(stream, "],\n"); // end "edges" - - ios_printf(stream, "\"strings\":"); - - snapshot.names.print_json_array(stream, true); - - ios_printf(stream, "}"); + ios_printf(json, "{\"snapshot\":{"); + ios_printf(json, "\"meta\":{"); + ios_printf(json, "\"node_fields\":[\"type\",\"name\",\"id\",\"self_size\",\"edge_count\",\"trace_node_id\",\"detachedness\"],"); + ios_printf(json, "\"node_types\":["); + snapshot.node_types.print_json_array(json, false); + ios_printf(json, ","); + ios_printf(json, "\"string\", \"number\", \"number\", \"number\", \"number\", \"number\"],"); + ios_printf(json, "\"edge_fields\":[\"type\",\"name_or_index\",\"to_node\"],"); + ios_printf(json, "\"edge_types\":["); + snapshot.edge_types.print_json_array(json, false); + ios_printf(json, ","); + ios_printf(json, "\"string_or_number\",\"from_node\"]"); + ios_printf(json, "},\n"); // end "meta" + ios_printf(json, "\"node_count\":%zu,", snapshot.num_nodes); + ios_printf(json, "\"edge_count\":%zu", snapshot.num_edges); + ios_printf(json, "}\n"); // end "snapshot" + ios_printf(json, "}"); + + ios_printf(strings, "{\n"); + ios_printf(strings, "\"strings\":"); + + snapshot.names.print_json_array(strings, true); + ios_printf(strings, "}"); - // remove the uber node from the orphans - orphans.erase(0); - assert(orphans.size() == 0 && "all nodes except the uber node should have at least one incoming edge"); } diff --git a/src/gc-heap-snapshot.h b/src/gc-heap-snapshot.h index 1799063825e83..70884f5f62d6a 100644 --- a/src/gc-heap-snapshot.h +++ b/src/gc-heap-snapshot.h @@ -120,7 +120,8 @@ static inline void gc_heap_snapshot_record_finlist(jl_value_t *finlist, size_t i // --------------------------------------------------------------------- // Functions to call from Julia to take heap snapshot // --------------------------------------------------------------------- -JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *stream, char all_one); +JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *nodes, ios_t *edges, + ios_t *strings, ios_t *json, char all_one); #ifdef __cplusplus diff --git a/stdlib/Profile/src/Profile.jl b/stdlib/Profile/src/Profile.jl index aad97be683309..c13e71cc0933c 100644 --- a/stdlib/Profile/src/Profile.jl +++ b/stdlib/Profile/src/Profile.jl @@ -1245,9 +1245,8 @@ end """ - Profile.take_heap_snapshot(io::IOStream, all_one::Bool=false) - Profile.take_heap_snapshot(filepath::String, all_one::Bool=false) - Profile.take_heap_snapshot(all_one::Bool=false; dir::String) + Profile.take_heap_snapshot(filepath::String, all_one::Bool=false, streaming=false) + Profile.take_heap_snapshot(all_one::Bool=false; dir::String, streaming=false) Write a snapshot of the heap, in the JSON format expected by the Chrome Devtools Heap Snapshot viewer (.heapsnapshot extension) to a file @@ -1257,16 +1256,69 @@ full file path, or IO stream. If `all_one` is true, then report the size of every object as one so they can be easily counted. Otherwise, report the actual size. + +If `streaming` is true, we will stream the snapshot data out into four files, using filepath +as the prefix, to avoid having to hold the entire snapshot in memory. This option should be +used for any setting where your memory is constrained. These files can then be reassembled +by calling [`Profile.HeapSnapshot.assemble_snapshot(filepath)`](@ref), which can +be done offline. + +NOTE: We strongly recommend setting streaming=true for performance reasons. Reconstructing +the snapshot from the parts requires holding the entire snapshot in memory, so if the +snapshot is large, you can run out of memory while processing it. Streaming allows you to +reconstruct the snapshot offline, after your workload is done running. +If you do attempt to collect a snapshot with streaming=false (the default, for +backwards-compatibility) and your process is killed, note that this will always save the +parts in the same directory as your provided filepath, so you can still reconstruct the +snapshot after the fact, via `assemble_snapshot()`. """ -function take_heap_snapshot(io::IOStream, all_one::Bool=false) - Base.@_lock_ios(io, ccall(:jl_gc_take_heap_snapshot, Cvoid, (Ptr{Cvoid}, Cchar), io.handle, Cchar(all_one))) -end -function take_heap_snapshot(filepath::String, all_one::Bool=false) - open(filepath, "w") do io - take_heap_snapshot(io, all_one) +function take_heap_snapshot(filepath::AbstractString, all_one::Bool=false; streaming::Bool=false) + if streaming + _stream_heap_snapshot(filepath, all_one) + println("Finished streaming heap snapshot parts to prefix: $filepath") + else + # Support the legacy, non-streaming mode, by first streaming the parts, then + # reassembling it after we're done. + prefix = filepath + _stream_heap_snapshot(prefix, all_one) + Profile.HeapSnapshot.assemble_snapshot(prefix, filepath) + println("Recorded heap snapshot: $filepath") end return filepath end +function take_heap_snapshot(io::IO, all_one::Bool=false) + # Support the legacy, non-streaming mode, by first streaming the parts to a tempdir, + # then reassembling it after we're done. + dir = tempdir() + prefix = joinpath(dir, "snapshot") + _stream_heap_snapshot(prefix, all_one) + Profile.HeapSnapshot.assemble_snapshot(prefix, io) +end +function _stream_heap_snapshot(prefix::AbstractString, all_one::Bool) + # Nodes and edges are binary files + open("$prefix.nodes", "w") do nodes + open("$prefix.edges", "w") do edges + # The other two files are json data + open("$prefix.strings.json", "w") do strings + open("$prefix.metadata.json", "w") do json + Base.@_lock_ios(nodes, + Base.@_lock_ios(edges, + Base.@_lock_ios(strings, + Base.@_lock_ios(json, + ccall(:jl_gc_take_heap_snapshot, + Cvoid, + (Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid}, Cchar), + nodes.handle, edges.handle, strings.handle, json.handle, + Cchar(all_one)) + ) + ) + ) + ) + end + end + end + end +end function take_heap_snapshot(all_one::Bool=false; dir::Union{Nothing,S}=nothing) where {S <: AbstractString} fname = "$(getpid())_$(time_ns()).heapsnapshot" if isnothing(dir) @@ -1302,6 +1354,7 @@ function take_page_profile(filepath::String) end include("Allocs.jl") +include("heapsnapshot_reassemble.jl") include("precompile.jl") end # module diff --git a/stdlib/Profile/src/heapsnapshot_reassemble.jl b/stdlib/Profile/src/heapsnapshot_reassemble.jl new file mode 100644 index 0000000000000..4e384d8f82a45 --- /dev/null +++ b/stdlib/Profile/src/heapsnapshot_reassemble.jl @@ -0,0 +1,182 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + +module HeapSnapshot + +# SoA layout to reduce padding +struct Edges + type::Vector{Int8} # index into `snapshot.meta.edge_types` + name_or_index::Vector{UInt} # Either an index into `snapshot.strings`, or the index in an array, depending on edge_type + to_pos::Vector{UInt} # index into `snapshot.nodes` +end +function init_edges(n::Int) + Edges( + Vector{Int8}(undef, n), + Vector{UInt}(undef, n), + Vector{UInt}(undef, n), + ) +end +Base.length(n::Edges) = length(n.type) + +# trace_node_id and detachedness are always 0 in the snapshots Julia produces so we don't store them +struct Nodes + type::Vector{Int8} # index into `snapshot.meta.node_types` + name_idx::Vector{UInt32} # index into `snapshot.strings` + id::Vector{UInt} # unique id, in julia it is the address of the object + self_size::Vector{Int} # size of the object itself, not including the size of its fields + edge_count::Vector{UInt} # number of outgoing edges + edges::Edges # outgoing edges + # This is the main complexity of the .heapsnapshot format, and it's the reason we need + # to read in all the data before writing it out. The edges vector contains all edges, + # but organized by which node they came from. First, it contains all the edges coming + # out of node 0, then all edges leaving node 1, etc. So we need to have visited all + # edges, and assigned them to their corresponding nodes, before we can emit the file. + edge_idxs::Vector{Vector{UInt}} # indexes into edges, keeping per-node outgoing edge ids +end +function init_nodes(n::Int, e::Int) + Nodes( + Vector{Int8}(undef, n), + Vector{UInt32}(undef, n), + Vector{UInt}(undef, n), + Vector{Int}(undef, n), + Vector{UInt32}(undef, n), + init_edges(e), + [Vector{UInt}() for _ in 1:n], # Take care to construct n separate empty vectors + ) +end +Base.length(n::Nodes) = length(n.type) + +# Like Base.dec, but doesn't allocate a string and writes directly to the io object +# We know all of the numbers we're about to write fit into a UInt and are non-negative +let _dec_d100 = UInt16[(0x30 + i % 10) << 0x8 + (0x30 + i ÷ 10) for i = 0:99] + global _write_decimal_number + _write_decimal_number(io, x::Integer, buf) = _write_decimal_number(io, unsigned(x), buf) + function _write_decimal_number(io, x::Unsigned, digits_buf) + buf = digits_buf + n = ndigits(x) + i = n + @inbounds while i >= 2 + d, r = divrem(x, 0x64) + d100 = _dec_d100[(r % Int)::Int + 1] + buf[i-1] = d100 % UInt8 + buf[i] = (d100 >> 0x8) % UInt8 + x = oftype(x, d) + i -= 2 + end + if i > 0 + @inbounds buf[i] = 0x30 + (rem(x, 0xa) % UInt8)::UInt8 + end + write(io, @view buf[max(i, 1):n]) + end +end + +function assemble_snapshot(in_prefix, out_file::AbstractString = in_prefix) + open(out_file, "w") do io + assemble_snapshot(in_prefix, io) + end +end +# Manually parse and write the .json files, given that we don't have JSON import/export in +# julia's stdlibs. +function assemble_snapshot(in_prefix, io::IO) + preamble = read(string(in_prefix, ".metadata.json"), String) + pos = last(findfirst("node_count\":", preamble)) + 1 + endpos = findnext(==(','), preamble, pos) - 1 + node_count = parse(Int, String(@view preamble[pos:endpos])) + + pos = last(findnext("edge_count\":", preamble, endpos)) + 1 + endpos = findnext(==('}'), preamble, pos) - 1 + edge_count = parse(Int, String(@view preamble[pos:endpos])) + + nodes = init_nodes(node_count, edge_count) + + orphans = Set{UInt}() # nodes that have no incoming edges + # Parse nodes with empty edge counts that we need to fill later + nodes_file = open(string(in_prefix, ".nodes"), "r") + for i in 1:length(nodes) + node_type = read(nodes_file, Int8) + node_name_idx = read(nodes_file, UInt) + id = read(nodes_file, UInt) + self_size = read(nodes_file, Int) + @assert read(nodes_file, Int) == 0 # trace_node_id + @assert read(nodes_file, Int8) == 0 # detachedness + + nodes.type[i] = node_type + nodes.name_idx[i] = node_name_idx + nodes.id[i] = id + nodes.self_size[i] = self_size + nodes.edge_count[i] = 0 # edge_count + # populate the orphans set with node index + push!(orphans, i-1) + end + + # Parse the edges to fill in the edge counts for nodes and correct the to_node offsets + edges_file = open(string(in_prefix, ".edges"), "r") + for i in 1:length(nodes.edges) + edge_type = read(edges_file, Int8) + edge_name_or_index = read(edges_file, UInt) + from_node = read(edges_file, UInt) + to_node = read(edges_file, UInt) + + nodes.edges.type[i] = edge_type + nodes.edges.name_or_index[i] = edge_name_or_index + nodes.edges.to_pos[i] = to_node * 7 # 7 fields per node, the streaming format doesn't multiply the offset by 7 + nodes.edge_count[from_node + 1] += UInt32(1) # C and JSON use 0-based indexing + push!(nodes.edge_idxs[from_node + 1], i) # Index into nodes.edges + # remove the node from the orphans if it has at least one incoming edge + if to_node in orphans + delete!(orphans, to_node) + end + end + + _digits_buf = zeros(UInt8, ndigits(typemax(UInt))) + println(io, @view(preamble[1:end-2]), ",") # remove trailing "}\n", we don't end the snapshot here + println(io, "\"nodes\":[") + for i in 1:length(nodes) + i > 1 && println(io, ",") + _write_decimal_number(io, nodes.type[i], _digits_buf) + print(io, ",") + _write_decimal_number(io, nodes.name_idx[i], _digits_buf) + print(io, ",") + _write_decimal_number(io, nodes.id[i], _digits_buf) + print(io, ",") + _write_decimal_number(io, nodes.self_size[i], _digits_buf) + print(io, ",") + _write_decimal_number(io, nodes.edge_count[i], _digits_buf) + print(io, ",0,0") + end + print(io, "],\"edges\":[") + e = 1 + for n in 1:length(nodes) + count = nodes.edge_count[n] + len_edges = length(nodes.edge_idxs[n]) + @assert count == len_edges "For node $n: $count != $len_edges" + for i in nodes.edge_idxs[n] + e > 1 && print(io, ",") + println(io) + _write_decimal_number(io, nodes.edges.type[i], _digits_buf) + print(io, ",") + _write_decimal_number(io, nodes.edges.name_or_index[i], _digits_buf) + print(io, ",") + _write_decimal_number(io, nodes.edges.to_pos[i], _digits_buf) + if !(nodes.edges.to_pos[i] % 7 == 0) + @warn "Bug in to_pos for edge $i from node $n: $(nodes.edges.to_pos[i])" + end + e += 1 + end + end + println(io, "],") + open(string(in_prefix, ".strings.json"), "r") do strings_io + skip(strings_io, 2) # skip "{\n" + write(io, strings_io) # strings contain the trailing "}" so we close out what we opened in preamble + end + + # remove the uber node from the orphans + if 0 in orphans + delete!(orphans, 0) + end + + @assert isempty(orphans) "Orphaned nodes: $(orphans), node count: $(length(nodes)), orphan node count: $(length(orphans))" + + return nothing +end + +end diff --git a/stdlib/Profile/test/heapsnapshot_reassemble.jl b/stdlib/Profile/test/heapsnapshot_reassemble.jl new file mode 100644 index 0000000000000..bf1e6e76bce58 --- /dev/null +++ b/stdlib/Profile/test/heapsnapshot_reassemble.jl @@ -0,0 +1,31 @@ +using Test + +@testset "_write_decimal_number" begin + _digits_buf = zeros(UInt8, ndigits(typemax(UInt))) + io = IOBuffer() + + test_write(d) = begin + Profile.HeapSnapshot._write_decimal_number(io, d, _digits_buf) + s = String(take!(io)) + seekstart(io) + return s + end + @test test_write(0) == "0" + @test test_write(99) == "99" + + @test test_write(UInt8(0)) == "0" + @test test_write(UInt32(0)) == "0" + @test test_write(Int32(0)) == "0" + + @test test_write(UInt8(99)) == "99" + @test test_write(UInt32(99)) == "99" + @test test_write(Int32(99)) == "99" + + # Sample among possible UInts we might print + for x in typemin(UInt8):typemax(UInt8) + @test test_write(x) == string(x) + end + for x in typemin(UInt):typemax(UInt)÷10001:typemax(UInt) + @test test_write(x) == string(x) + end +end diff --git a/stdlib/Profile/test/runtests.jl b/stdlib/Profile/test/runtests.jl index d7358dc20e853..73b160b8329ca 100644 --- a/stdlib/Profile/test/runtests.jl +++ b/stdlib/Profile/test/runtests.jl @@ -300,3 +300,4 @@ end end include("allocs.jl") +include("heapsnapshot_reassemble.jl")