diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 24df141d053f4..5311840811f2c 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -58,8 +58,9 @@ void print_str_escape_json(ios_t *stream, StringRef s) // mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L2598-L2601 struct Edge { - size_t type; // These *must* match the Enums on the JS side; control interpretation of name_or_index. + uint8_t type; // These *must* match the Enums on the JS side; control interpretation of name_or_index. size_t name_or_index; // name of the field (for objects/modules) or index of array + size_t from_node; // This is a deviation from the .heapsnapshot format to support streaming. size_t to_node; }; @@ -70,15 +71,14 @@ struct Edge { const int k_node_number_of_fields = 7; struct Node { - size_t type; // index into snapshot->node_types + uint8_t type; // index into snapshot->node_types size_t name; size_t id; // This should be a globally-unique counter, but we use the memory address size_t self_size; size_t trace_node_id; // This is ALWAYS 0 in Javascript heap-snapshots. // whether the from_node is attached or detached from the main application state // https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/include/v8-profiler.h#L739-L745 - int detachedness; // 0 - unknown, 1 - attached, 2 - detached - SmallVector edges; + uint8_t detachedness; // 0 - unknown, 1 - attached, 2 - detached ~Node() JL_NOTSAFEPOINT = default; }; @@ -111,15 +111,27 @@ struct StringTable { }; struct HeapSnapshot { - SmallVector nodes; - // edges are stored on each from_node - StringTable names; StringTable node_types; StringTable edge_types; DenseMap node_ptr_to_index_map; - size_t num_edges = 0; // For metadata, updated as you add each edge. Needed because edges owned by nodes. + size_t num_nodes = 0; // Since we stream out to files, + size_t num_edges = 0; // we need to track the counts here. + + // Node internal_root; + + // Used for streaming + // Since nodes and edges are just one giant array of integers, we stream them as + // *BINARY DATA*: a sequence of bytes, each of which is a 64-bit integer (big enough to + // fit the pointer ids). + ios_t *nodes; + ios_t *edges; + // These files are written out as json data. + ios_t *strings; + ios_t *json; + + size_t internal_root_idx = 0; // node index of the internal root node size_t _gc_root_idx = 1; // node index of the GC roots node size_t _gc_finlist_root_idx = 2; // node index of the GC finlist roots node }; @@ -130,17 +142,22 @@ int gc_heap_snapshot_enabled = 0; HeapSnapshot *g_snapshot = nullptr; extern jl_mutex_t heapsnapshot_lock; +void final_serialize_heap_snapshot(ios_t *json, ios_t *strings, HeapSnapshot &snapshot, char all_one); void serialize_heap_snapshot(ios_t *stream, HeapSnapshot &snapshot, char all_one); static inline void _record_gc_edge(const char *edge_type, jl_value_t *a, jl_value_t *b, size_t name_or_index) JL_NOTSAFEPOINT; -void _record_gc_just_edge(const char *edge_type, Node &from_node, size_t to_idx, size_t name_or_idx) JL_NOTSAFEPOINT; +void _record_gc_just_edge(const char *edge_type, size_t from_idx, size_t to_idx, size_t name_or_idx) JL_NOTSAFEPOINT; void _add_synthetic_root_entries(HeapSnapshot *snapshot); -JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *stream, char all_one) +JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *nodes, ios_t *edges, + ios_t *strings, ios_t *json, char all_one) { HeapSnapshot snapshot; - _add_synthetic_root_entries(&snapshot); + snapshot.nodes = nodes; + snapshot.edges = edges; + snapshot.strings = strings; + snapshot.json = json; jl_mutex_lock(&heapsnapshot_lock); @@ -148,6 +165,8 @@ JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *stream, char all_one) g_snapshot = &snapshot; gc_heap_snapshot_enabled = true; + _add_synthetic_root_entries(&snapshot); + // Do a full GC mark (and incremental sweep), which will invoke our callbacks on `g_snapshot` jl_gc_collect(JL_GC_FULL); @@ -159,7 +178,33 @@ JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *stream, char all_one) // When we return, the snapshot is full // Dump the snapshot - serialize_heap_snapshot((ios_t*)stream, snapshot, all_one); + final_serialize_heap_snapshot((ios_t*)json, (ios_t*)strings, snapshot, all_one); +} + +void serialize_node(HeapSnapshot *snapshot, const Node &node) +{ + // ["type","name","id","self_size","edge_count","trace_node_id","detachedness"] + ios_write(snapshot->nodes, (char*)&node.type, sizeof(node.type)); + ios_write(snapshot->nodes, (char*)&node.name, sizeof(node.name)); + ios_write(snapshot->nodes, (char*)&node.id, sizeof(node.id)); + ios_write(snapshot->nodes, (char*)&node.self_size, sizeof(node.self_size)); + // NOTE: We don't write edge_count, since it's always 0. It will be reconstructed in + // post-processing. + ios_write(snapshot->nodes, (char*)&node.trace_node_id, sizeof(node.trace_node_id)); + ios_write(snapshot->nodes, (char*)&node.detachedness, sizeof(node.detachedness)); + + g_snapshot->num_nodes += 1; +} +void serialize_edge(HeapSnapshot *snapshot, const Edge &edge) +{ + // ["type","name_or_index","to_node"] + ios_write(snapshot->edges, (char*)&edge.type, sizeof(edge.type)); + ios_write(snapshot->edges, (char*)&edge.name_or_index, sizeof(edge.name_or_index)); + // NOTE: Row numbers for nodes (not adjusted for k_node_number_of_fields) + ios_write(snapshot->edges, (char*)&edge.from_node, sizeof(edge.from_node)); + ios_write(snapshot->edges, (char*)&edge.to_node, sizeof(edge.to_node)); + + g_snapshot->num_edges += 1; } // mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L212 @@ -169,60 +214,59 @@ void _add_synthetic_root_entries(HeapSnapshot *snapshot) // adds a node at id 0 which is the "uber root": // a synthetic node which points to all the GC roots. Node internal_root{ - snapshot->node_types.find_or_create_string_id("synthetic"), + (uint8_t)snapshot->node_types.find_or_create_string_id("synthetic"), snapshot->names.find_or_create_string_id(""), // name 0, // id 0, // size 0, // size_t trace_node_id (unused) - 0, // int detachedness; // 0 - unknown, 1 - attached; 2 - detached - SmallVector() // outgoing edges + 0 // int detachedness; // 0 - unknown, 1 - attached; 2 - detached }; - snapshot->nodes.push_back(internal_root); + serialize_node(snapshot, internal_root); // Add a node for the GC roots - snapshot->_gc_root_idx = snapshot->nodes.size(); + snapshot->_gc_root_idx = snapshot->internal_root_idx + 1; Node gc_roots{ - snapshot->node_types.find_or_create_string_id("synthetic"), + (uint8_t)snapshot->node_types.find_or_create_string_id("synthetic"), snapshot->names.find_or_create_string_id("GC roots"), // name snapshot->_gc_root_idx, // id 0, // size 0, // size_t trace_node_id (unused) - 0, // int detachedness; // 0 - unknown, 1 - attached; 2 - detached - SmallVector() // outgoing edges + 0 // int detachedness; // 0 - unknown, 1 - attached; 2 - detached }; - snapshot->nodes.push_back(gc_roots); - snapshot->nodes.front().edges.push_back(Edge{ + serialize_node(snapshot, gc_roots); + Edge root_to_gc_roots{ snapshot->edge_types.find_or_create_string_id("internal"), snapshot->names.find_or_create_string_id("GC roots"), // edge label + snapshot->internal_root_idx, // from snapshot->_gc_root_idx // to - }); - snapshot->num_edges += 1; + }; + serialize_edge(snapshot, root_to_gc_roots); // add a node for the gc finalizer list roots - snapshot->_gc_finlist_root_idx = snapshot->nodes.size(); + snapshot->_gc_finlist_root_idx = snapshot->internal_root_idx + 2; Node gc_finlist_roots{ - snapshot->node_types.find_or_create_string_id("synthetic"), + (uint8_t)snapshot->node_types.find_or_create_string_id("synthetic"), snapshot->names.find_or_create_string_id("GC finalizer list roots"), // name snapshot->_gc_finlist_root_idx, // id 0, // size 0, // size_t trace_node_id (unused) - 0, // int detachedness; // 0 - unknown, 1 - attached; 2 - detached - SmallVector() // outgoing edges + 0 // int detachedness; // 0 - unknown, 1 - attached; 2 - detached }; - snapshot->nodes.push_back(gc_finlist_roots); - snapshot->nodes.front().edges.push_back(Edge{ + serialize_node(snapshot, gc_finlist_roots); + Edge root_to_gc_finlist_roots{ snapshot->edge_types.find_or_create_string_id("internal"), - snapshot->names.find_or_create_string_id("GC finlist roots"), // edge label + snapshot->names.find_or_create_string_id("GC finalizer list roots"), // edge label + snapshot->internal_root_idx, // from snapshot->_gc_finlist_root_idx // to - }); - snapshot->num_edges += 1; + }; + serialize_edge(snapshot, root_to_gc_finlist_roots); } // mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L597-L597 // returns the index of the new node size_t record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT { - auto val = g_snapshot->node_ptr_to_index_map.insert(make_pair(a, g_snapshot->nodes.size())); + auto val = g_snapshot->node_ptr_to_index_map.insert(make_pair(a, g_snapshot->num_nodes)); if (!val.second) { return val.first->second; } @@ -292,8 +336,8 @@ size_t record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT name = StringRef((const char*)str_.buf, str_.size); } - g_snapshot->nodes.push_back(Node{ - g_snapshot->node_types.find_or_create_string_id(node_type), // size_t type; + auto node = Node{ + (uint8_t)g_snapshot->node_types.find_or_create_string_id(node_type), // size_t type; g_snapshot->names.find_or_create_string_id(name), // size_t name; (size_t)a, // size_t id; // We add 1 to self-size for the type tag that all heap-allocated objects have. @@ -301,8 +345,8 @@ size_t record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT sizeof(void*) + self_size, // size_t self_size; 0, // size_t trace_node_id (unused) 0, // int detachedness; // 0 - unknown, 1 - attached; 2 - detached - SmallVector() // outgoing edges - }); + }; + serialize_node(g_snapshot, node); if (ios_need_close) ios_close(&str_); @@ -312,20 +356,20 @@ size_t record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT static size_t record_pointer_to_gc_snapshot(void *a, size_t bytes, StringRef name) JL_NOTSAFEPOINT { - auto val = g_snapshot->node_ptr_to_index_map.insert(make_pair(a, g_snapshot->nodes.size())); + auto val = g_snapshot->node_ptr_to_index_map.insert(make_pair(a, g_snapshot->num_nodes)); if (!val.second) { return val.first->second; } - g_snapshot->nodes.push_back(Node{ - g_snapshot->node_types.find_or_create_string_id( "object"), // size_t type; + auto node = Node{ + (uint8_t)g_snapshot->node_types.find_or_create_string_id( "object"), // size_t type; g_snapshot->names.find_or_create_string_id(name), // size_t name; (size_t)a, // size_t id; bytes, // size_t self_size; 0, // size_t trace_node_id (unused) 0, // int detachedness; // 0 - unknown, 1 - attached; 2 - detached - SmallVector() // outgoing edges - }); + }; + serialize_node(g_snapshot, node); return val.first->second; } @@ -361,36 +405,29 @@ static string _fieldpath_for_slot(void *obj, void *slot) JL_NOTSAFEPOINT } } - void _gc_heap_snapshot_record_root(jl_value_t *root, char *name) JL_NOTSAFEPOINT { - record_node_to_gc_snapshot(root); - - auto &internal_root = g_snapshot->nodes.front(); - auto to_node_idx = g_snapshot->node_ptr_to_index_map[root]; + size_t to_node_idx = record_node_to_gc_snapshot(root); auto edge_label = g_snapshot->names.find_or_create_string_id(name); - _record_gc_just_edge("internal", internal_root, to_node_idx, edge_label); + _record_gc_just_edge("internal", g_snapshot->internal_root_idx, to_node_idx, edge_label); } void _gc_heap_snapshot_record_gc_roots(jl_value_t *root, char *name) JL_NOTSAFEPOINT { - record_node_to_gc_snapshot(root); - - auto from_node_idx = g_snapshot->_gc_root_idx; auto to_node_idx = record_node_to_gc_snapshot(root); auto edge_label = g_snapshot->names.find_or_create_string_id(name); - _record_gc_just_edge("internal", g_snapshot->nodes[from_node_idx], to_node_idx, edge_label); + + _record_gc_just_edge("internal", g_snapshot->_gc_root_idx, to_node_idx, edge_label); } void _gc_heap_snapshot_record_finlist(jl_value_t *obj, size_t index) JL_NOTSAFEPOINT { - auto from_node_idx = g_snapshot->_gc_finlist_root_idx; auto to_node_idx = record_node_to_gc_snapshot(obj); ostringstream ss; ss << "finlist-" << index; auto edge_label = g_snapshot->names.find_or_create_string_id(ss.str()); - _record_gc_just_edge("internal", g_snapshot->nodes[from_node_idx], to_node_idx, edge_label); + _record_gc_just_edge("internal", g_snapshot->_gc_finlist_root_idx, to_node_idx, edge_label); } // Add a node to the heap snapshot representing a Julia stack frame. @@ -399,20 +436,20 @@ void _gc_heap_snapshot_record_finlist(jl_value_t *obj, size_t index) JL_NOTSAFEP // Stack frame nodes point at the objects they have as local variables. size_t _record_stack_frame_node(HeapSnapshot *snapshot, void *frame) JL_NOTSAFEPOINT { - auto val = g_snapshot->node_ptr_to_index_map.insert(make_pair(frame, g_snapshot->nodes.size())); + auto val = g_snapshot->node_ptr_to_index_map.insert(make_pair(frame, g_snapshot->num_nodes)); if (!val.second) { return val.first->second; } - snapshot->nodes.push_back(Node{ - snapshot->node_types.find_or_create_string_id("synthetic"), + auto node = Node{ + (uint8_t)snapshot->node_types.find_or_create_string_id("synthetic"), snapshot->names.find_or_create_string_id("(stack frame)"), // name (size_t)frame, // id 1, // size 0, // size_t trace_node_id (unused) 0, // int detachedness; // 0 - unknown, 1 - attached; 2 - detached - SmallVector() // outgoing edges - }); + }; + serialize_node(snapshot, node); return val.first->second; } @@ -421,30 +458,27 @@ void _gc_heap_snapshot_record_frame_to_object_edge(void *from, jl_value_t *to) J { auto from_node_idx = _record_stack_frame_node(g_snapshot, (jl_gcframe_t*)from); auto to_idx = record_node_to_gc_snapshot(to); - Node &from_node = g_snapshot->nodes[from_node_idx]; auto name_idx = g_snapshot->names.find_or_create_string_id("local var"); - _record_gc_just_edge("internal", from_node, to_idx, name_idx); + _record_gc_just_edge("internal", from_node_idx, to_idx, name_idx); } void _gc_heap_snapshot_record_task_to_frame_edge(jl_task_t *from, void *to) JL_NOTSAFEPOINT { auto from_node_idx = record_node_to_gc_snapshot((jl_value_t*)from); auto to_node_idx = _record_stack_frame_node(g_snapshot, to); - Node &from_node = g_snapshot->nodes[from_node_idx]; auto name_idx = g_snapshot->names.find_or_create_string_id("stack"); - _record_gc_just_edge("internal", from_node, to_node_idx, name_idx); + _record_gc_just_edge("internal", from_node_idx, to_node_idx, name_idx); } void _gc_heap_snapshot_record_frame_to_frame_edge(jl_gcframe_t *from, jl_gcframe_t *to) JL_NOTSAFEPOINT { auto from_node_idx = _record_stack_frame_node(g_snapshot, from); auto to_node_idx = _record_stack_frame_node(g_snapshot, to); - Node &from_node = g_snapshot->nodes[from_node_idx]; auto name_idx = g_snapshot->names.find_or_create_string_id("next frame"); - _record_gc_just_edge("internal", from_node, to_node_idx, name_idx); + _record_gc_just_edge("internal", from_node_idx, to_node_idx, name_idx); } void _gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index) JL_NOTSAFEPOINT @@ -464,12 +498,12 @@ void _gc_heap_snapshot_record_module_to_binding(jl_module_t *module, jl_value_t auto from_node_idx = record_node_to_gc_snapshot((jl_value_t*)module); auto to_bindings_idx = record_node_to_gc_snapshot(bindings); auto to_bindingkeyset_idx = record_node_to_gc_snapshot(bindingkeyset); - auto &from_node = g_snapshot->nodes[from_node_idx]; + if (to_bindings_idx > 0) { - _record_gc_just_edge("internal", from_node, to_bindings_idx, g_snapshot->names.find_or_create_string_id("bindings")); + _record_gc_just_edge("internal", from_node_idx, to_bindings_idx, g_snapshot->names.find_or_create_string_id("bindings")); } if (to_bindingkeyset_idx > 0) { - _record_gc_just_edge("internal", from_node, to_bindingkeyset_idx, g_snapshot->names.find_or_create_string_id("bindingkeyset")); + _record_gc_just_edge("internal", from_node_idx, to_bindingkeyset_idx, g_snapshot->names.find_or_create_string_id("bindingkeyset")); } } @@ -501,9 +535,8 @@ void _gc_heap_snapshot_record_hidden_edge(jl_value_t *from, void* to, size_t byt break; } auto to_node_idx = record_pointer_to_gc_snapshot(to, bytes, alloc_kind); - auto &from_node = g_snapshot->nodes[from_node_idx]; - _record_gc_just_edge("hidden", from_node, to_node_idx, name_or_idx); + _record_gc_just_edge("hidden", from_node_idx, to_node_idx, name_or_idx); } static inline void _record_gc_edge(const char *edge_type, jl_value_t *a, @@ -512,104 +545,46 @@ static inline void _record_gc_edge(const char *edge_type, jl_value_t *a, auto from_node_idx = record_node_to_gc_snapshot(a); auto to_node_idx = record_node_to_gc_snapshot(b); - auto &from_node = g_snapshot->nodes[from_node_idx]; - - _record_gc_just_edge(edge_type, from_node, to_node_idx, name_or_idx); + _record_gc_just_edge(edge_type, from_node_idx, to_node_idx, name_or_idx); } -void _record_gc_just_edge(const char *edge_type, Node &from_node, size_t to_idx, size_t name_or_idx) JL_NOTSAFEPOINT +void _record_gc_just_edge(const char *edge_type, size_t from_idx, size_t to_idx, size_t name_or_idx) JL_NOTSAFEPOINT { - from_node.edges.push_back(Edge{ - g_snapshot->edge_types.find_or_create_string_id(edge_type), + auto edge = Edge{ + (uint8_t)g_snapshot->edge_types.find_or_create_string_id(edge_type), name_or_idx, // edge label + from_idx, // from to_idx // to - }); + }; - g_snapshot->num_edges += 1; + serialize_edge(g_snapshot, edge); } -void serialize_heap_snapshot(ios_t *stream, HeapSnapshot &snapshot, char all_one) +void final_serialize_heap_snapshot(ios_t *json, ios_t *strings, HeapSnapshot &snapshot, char all_one) { // mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L2567-L2567 - ios_printf(stream, "{\"snapshot\":{"); - ios_printf(stream, "\"meta\":{"); - ios_printf(stream, "\"node_fields\":[\"type\",\"name\",\"id\",\"self_size\",\"edge_count\",\"trace_node_id\",\"detachedness\"],"); - ios_printf(stream, "\"node_types\":["); - snapshot.node_types.print_json_array(stream, false); - ios_printf(stream, ","); - ios_printf(stream, "\"string\", \"number\", \"number\", \"number\", \"number\", \"number\"],"); - ios_printf(stream, "\"edge_fields\":[\"type\",\"name_or_index\",\"to_node\"],"); - ios_printf(stream, "\"edge_types\":["); - snapshot.edge_types.print_json_array(stream, false); - ios_printf(stream, ","); - ios_printf(stream, "\"string_or_number\",\"from_node\"]"); - ios_printf(stream, "},\n"); // end "meta" - ios_printf(stream, "\"node_count\":%zu,", snapshot.nodes.size()); - ios_printf(stream, "\"edge_count\":%zu", snapshot.num_edges); - ios_printf(stream, "},\n"); // end "snapshot" - - ios_printf(stream, "\"nodes\":["); - bool first_node = true; - // use a set to track the nodes that do not have parents - set orphans; - for (const auto &from_node : snapshot.nodes) { - if (first_node) { - first_node = false; - } - else { - ios_printf(stream, ","); - } - // ["type","name","id","self_size","edge_count","trace_node_id","detachedness"] - ios_printf(stream, "%zu,%zu,%zu,%zu,%zu,%zu,%d\n", - from_node.type, - from_node.name, - from_node.id, - all_one ? (size_t)1 : from_node.self_size, - from_node.edges.size(), - from_node.trace_node_id, - from_node.detachedness); - if (from_node.id != snapshot._gc_root_idx && from_node.id != snapshot._gc_finlist_root_idx) { - // find the node index from the node object pointer - void * ptr = (void*)from_node.id; - size_t n_id = snapshot.node_ptr_to_index_map[ptr]; - orphans.insert(n_id); - } else { - orphans.insert(from_node.id); - } - } - ios_printf(stream, "],\n"); - - ios_printf(stream, "\"edges\":["); - bool first_edge = true; - for (const auto &from_node : snapshot.nodes) { - for (const auto &edge : from_node.edges) { - if (first_edge) { - first_edge = false; - } - else { - ios_printf(stream, ","); - } - ios_printf(stream, "%zu,%zu,%zu\n", - edge.type, - edge.name_or_index, - edge.to_node * k_node_number_of_fields); - auto n_id = edge.to_node; - auto it = orphans.find(n_id); - if (it != orphans.end()) { - // remove the node from the orphans if it has at least one incoming edge - orphans.erase(it); - } - } - } - ios_printf(stream, "],\n"); // end "edges" - - ios_printf(stream, "\"strings\":"); - - snapshot.names.print_json_array(stream, true); - - ios_printf(stream, "}"); + ios_printf(json, "{\"snapshot\":{"); + ios_printf(json, "\"meta\":{"); + ios_printf(json, "\"node_fields\":[\"type\",\"name\",\"id\",\"self_size\",\"edge_count\",\"trace_node_id\",\"detachedness\"],"); + ios_printf(json, "\"node_types\":["); + snapshot.node_types.print_json_array(json, false); + ios_printf(json, ","); + ios_printf(json, "\"string\", \"number\", \"number\", \"number\", \"number\", \"number\"],"); + ios_printf(json, "\"edge_fields\":[\"type\",\"name_or_index\",\"to_node\"],"); + ios_printf(json, "\"edge_types\":["); + snapshot.edge_types.print_json_array(json, false); + ios_printf(json, ","); + ios_printf(json, "\"string_or_number\",\"from_node\"]"); + ios_printf(json, "},\n"); // end "meta" + ios_printf(json, "\"node_count\":%zu,", snapshot.num_nodes); + ios_printf(json, "\"edge_count\":%zu", snapshot.num_edges); + ios_printf(json, "}\n"); // end "snapshot" + ios_printf(json, "}"); + + ios_printf(strings, "{\n"); + ios_printf(strings, "\"strings\":"); + + snapshot.names.print_json_array(strings, true); + ios_printf(strings, "}"); - // remove the uber node from the orphans - orphans.erase(0); - assert(orphans.size() == 0 && "all nodes except the uber node should have at least one incoming edge"); } diff --git a/src/gc-heap-snapshot.h b/src/gc-heap-snapshot.h index 1799063825e83..70884f5f62d6a 100644 --- a/src/gc-heap-snapshot.h +++ b/src/gc-heap-snapshot.h @@ -120,7 +120,8 @@ static inline void gc_heap_snapshot_record_finlist(jl_value_t *finlist, size_t i // --------------------------------------------------------------------- // Functions to call from Julia to take heap snapshot // --------------------------------------------------------------------- -JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *stream, char all_one); +JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *nodes, ios_t *edges, + ios_t *strings, ios_t *json, char all_one); #ifdef __cplusplus diff --git a/stdlib/Profile/src/Profile.jl b/stdlib/Profile/src/Profile.jl index aad97be683309..c13e71cc0933c 100644 --- a/stdlib/Profile/src/Profile.jl +++ b/stdlib/Profile/src/Profile.jl @@ -1245,9 +1245,8 @@ end """ - Profile.take_heap_snapshot(io::IOStream, all_one::Bool=false) - Profile.take_heap_snapshot(filepath::String, all_one::Bool=false) - Profile.take_heap_snapshot(all_one::Bool=false; dir::String) + Profile.take_heap_snapshot(filepath::String, all_one::Bool=false, streaming=false) + Profile.take_heap_snapshot(all_one::Bool=false; dir::String, streaming=false) Write a snapshot of the heap, in the JSON format expected by the Chrome Devtools Heap Snapshot viewer (.heapsnapshot extension) to a file @@ -1257,16 +1256,69 @@ full file path, or IO stream. If `all_one` is true, then report the size of every object as one so they can be easily counted. Otherwise, report the actual size. + +If `streaming` is true, we will stream the snapshot data out into four files, using filepath +as the prefix, to avoid having to hold the entire snapshot in memory. This option should be +used for any setting where your memory is constrained. These files can then be reassembled +by calling [`Profile.HeapSnapshot.assemble_snapshot(filepath)`](@ref), which can +be done offline. + +NOTE: We strongly recommend setting streaming=true for performance reasons. Reconstructing +the snapshot from the parts requires holding the entire snapshot in memory, so if the +snapshot is large, you can run out of memory while processing it. Streaming allows you to +reconstruct the snapshot offline, after your workload is done running. +If you do attempt to collect a snapshot with streaming=false (the default, for +backwards-compatibility) and your process is killed, note that this will always save the +parts in the same directory as your provided filepath, so you can still reconstruct the +snapshot after the fact, via `assemble_snapshot()`. """ -function take_heap_snapshot(io::IOStream, all_one::Bool=false) - Base.@_lock_ios(io, ccall(:jl_gc_take_heap_snapshot, Cvoid, (Ptr{Cvoid}, Cchar), io.handle, Cchar(all_one))) -end -function take_heap_snapshot(filepath::String, all_one::Bool=false) - open(filepath, "w") do io - take_heap_snapshot(io, all_one) +function take_heap_snapshot(filepath::AbstractString, all_one::Bool=false; streaming::Bool=false) + if streaming + _stream_heap_snapshot(filepath, all_one) + println("Finished streaming heap snapshot parts to prefix: $filepath") + else + # Support the legacy, non-streaming mode, by first streaming the parts, then + # reassembling it after we're done. + prefix = filepath + _stream_heap_snapshot(prefix, all_one) + Profile.HeapSnapshot.assemble_snapshot(prefix, filepath) + println("Recorded heap snapshot: $filepath") end return filepath end +function take_heap_snapshot(io::IO, all_one::Bool=false) + # Support the legacy, non-streaming mode, by first streaming the parts to a tempdir, + # then reassembling it after we're done. + dir = tempdir() + prefix = joinpath(dir, "snapshot") + _stream_heap_snapshot(prefix, all_one) + Profile.HeapSnapshot.assemble_snapshot(prefix, io) +end +function _stream_heap_snapshot(prefix::AbstractString, all_one::Bool) + # Nodes and edges are binary files + open("$prefix.nodes", "w") do nodes + open("$prefix.edges", "w") do edges + # The other two files are json data + open("$prefix.strings.json", "w") do strings + open("$prefix.metadata.json", "w") do json + Base.@_lock_ios(nodes, + Base.@_lock_ios(edges, + Base.@_lock_ios(strings, + Base.@_lock_ios(json, + ccall(:jl_gc_take_heap_snapshot, + Cvoid, + (Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid}, Cchar), + nodes.handle, edges.handle, strings.handle, json.handle, + Cchar(all_one)) + ) + ) + ) + ) + end + end + end + end +end function take_heap_snapshot(all_one::Bool=false; dir::Union{Nothing,S}=nothing) where {S <: AbstractString} fname = "$(getpid())_$(time_ns()).heapsnapshot" if isnothing(dir) @@ -1302,6 +1354,7 @@ function take_page_profile(filepath::String) end include("Allocs.jl") +include("heapsnapshot_reassemble.jl") include("precompile.jl") end # module diff --git a/stdlib/Profile/src/heapsnapshot_reassemble.jl b/stdlib/Profile/src/heapsnapshot_reassemble.jl new file mode 100644 index 0000000000000..4e384d8f82a45 --- /dev/null +++ b/stdlib/Profile/src/heapsnapshot_reassemble.jl @@ -0,0 +1,182 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + +module HeapSnapshot + +# SoA layout to reduce padding +struct Edges + type::Vector{Int8} # index into `snapshot.meta.edge_types` + name_or_index::Vector{UInt} # Either an index into `snapshot.strings`, or the index in an array, depending on edge_type + to_pos::Vector{UInt} # index into `snapshot.nodes` +end +function init_edges(n::Int) + Edges( + Vector{Int8}(undef, n), + Vector{UInt}(undef, n), + Vector{UInt}(undef, n), + ) +end +Base.length(n::Edges) = length(n.type) + +# trace_node_id and detachedness are always 0 in the snapshots Julia produces so we don't store them +struct Nodes + type::Vector{Int8} # index into `snapshot.meta.node_types` + name_idx::Vector{UInt32} # index into `snapshot.strings` + id::Vector{UInt} # unique id, in julia it is the address of the object + self_size::Vector{Int} # size of the object itself, not including the size of its fields + edge_count::Vector{UInt} # number of outgoing edges + edges::Edges # outgoing edges + # This is the main complexity of the .heapsnapshot format, and it's the reason we need + # to read in all the data before writing it out. The edges vector contains all edges, + # but organized by which node they came from. First, it contains all the edges coming + # out of node 0, then all edges leaving node 1, etc. So we need to have visited all + # edges, and assigned them to their corresponding nodes, before we can emit the file. + edge_idxs::Vector{Vector{UInt}} # indexes into edges, keeping per-node outgoing edge ids +end +function init_nodes(n::Int, e::Int) + Nodes( + Vector{Int8}(undef, n), + Vector{UInt32}(undef, n), + Vector{UInt}(undef, n), + Vector{Int}(undef, n), + Vector{UInt32}(undef, n), + init_edges(e), + [Vector{UInt}() for _ in 1:n], # Take care to construct n separate empty vectors + ) +end +Base.length(n::Nodes) = length(n.type) + +# Like Base.dec, but doesn't allocate a string and writes directly to the io object +# We know all of the numbers we're about to write fit into a UInt and are non-negative +let _dec_d100 = UInt16[(0x30 + i % 10) << 0x8 + (0x30 + i ÷ 10) for i = 0:99] + global _write_decimal_number + _write_decimal_number(io, x::Integer, buf) = _write_decimal_number(io, unsigned(x), buf) + function _write_decimal_number(io, x::Unsigned, digits_buf) + buf = digits_buf + n = ndigits(x) + i = n + @inbounds while i >= 2 + d, r = divrem(x, 0x64) + d100 = _dec_d100[(r % Int)::Int + 1] + buf[i-1] = d100 % UInt8 + buf[i] = (d100 >> 0x8) % UInt8 + x = oftype(x, d) + i -= 2 + end + if i > 0 + @inbounds buf[i] = 0x30 + (rem(x, 0xa) % UInt8)::UInt8 + end + write(io, @view buf[max(i, 1):n]) + end +end + +function assemble_snapshot(in_prefix, out_file::AbstractString = in_prefix) + open(out_file, "w") do io + assemble_snapshot(in_prefix, io) + end +end +# Manually parse and write the .json files, given that we don't have JSON import/export in +# julia's stdlibs. +function assemble_snapshot(in_prefix, io::IO) + preamble = read(string(in_prefix, ".metadata.json"), String) + pos = last(findfirst("node_count\":", preamble)) + 1 + endpos = findnext(==(','), preamble, pos) - 1 + node_count = parse(Int, String(@view preamble[pos:endpos])) + + pos = last(findnext("edge_count\":", preamble, endpos)) + 1 + endpos = findnext(==('}'), preamble, pos) - 1 + edge_count = parse(Int, String(@view preamble[pos:endpos])) + + nodes = init_nodes(node_count, edge_count) + + orphans = Set{UInt}() # nodes that have no incoming edges + # Parse nodes with empty edge counts that we need to fill later + nodes_file = open(string(in_prefix, ".nodes"), "r") + for i in 1:length(nodes) + node_type = read(nodes_file, Int8) + node_name_idx = read(nodes_file, UInt) + id = read(nodes_file, UInt) + self_size = read(nodes_file, Int) + @assert read(nodes_file, Int) == 0 # trace_node_id + @assert read(nodes_file, Int8) == 0 # detachedness + + nodes.type[i] = node_type + nodes.name_idx[i] = node_name_idx + nodes.id[i] = id + nodes.self_size[i] = self_size + nodes.edge_count[i] = 0 # edge_count + # populate the orphans set with node index + push!(orphans, i-1) + end + + # Parse the edges to fill in the edge counts for nodes and correct the to_node offsets + edges_file = open(string(in_prefix, ".edges"), "r") + for i in 1:length(nodes.edges) + edge_type = read(edges_file, Int8) + edge_name_or_index = read(edges_file, UInt) + from_node = read(edges_file, UInt) + to_node = read(edges_file, UInt) + + nodes.edges.type[i] = edge_type + nodes.edges.name_or_index[i] = edge_name_or_index + nodes.edges.to_pos[i] = to_node * 7 # 7 fields per node, the streaming format doesn't multiply the offset by 7 + nodes.edge_count[from_node + 1] += UInt32(1) # C and JSON use 0-based indexing + push!(nodes.edge_idxs[from_node + 1], i) # Index into nodes.edges + # remove the node from the orphans if it has at least one incoming edge + if to_node in orphans + delete!(orphans, to_node) + end + end + + _digits_buf = zeros(UInt8, ndigits(typemax(UInt))) + println(io, @view(preamble[1:end-2]), ",") # remove trailing "}\n", we don't end the snapshot here + println(io, "\"nodes\":[") + for i in 1:length(nodes) + i > 1 && println(io, ",") + _write_decimal_number(io, nodes.type[i], _digits_buf) + print(io, ",") + _write_decimal_number(io, nodes.name_idx[i], _digits_buf) + print(io, ",") + _write_decimal_number(io, nodes.id[i], _digits_buf) + print(io, ",") + _write_decimal_number(io, nodes.self_size[i], _digits_buf) + print(io, ",") + _write_decimal_number(io, nodes.edge_count[i], _digits_buf) + print(io, ",0,0") + end + print(io, "],\"edges\":[") + e = 1 + for n in 1:length(nodes) + count = nodes.edge_count[n] + len_edges = length(nodes.edge_idxs[n]) + @assert count == len_edges "For node $n: $count != $len_edges" + for i in nodes.edge_idxs[n] + e > 1 && print(io, ",") + println(io) + _write_decimal_number(io, nodes.edges.type[i], _digits_buf) + print(io, ",") + _write_decimal_number(io, nodes.edges.name_or_index[i], _digits_buf) + print(io, ",") + _write_decimal_number(io, nodes.edges.to_pos[i], _digits_buf) + if !(nodes.edges.to_pos[i] % 7 == 0) + @warn "Bug in to_pos for edge $i from node $n: $(nodes.edges.to_pos[i])" + end + e += 1 + end + end + println(io, "],") + open(string(in_prefix, ".strings.json"), "r") do strings_io + skip(strings_io, 2) # skip "{\n" + write(io, strings_io) # strings contain the trailing "}" so we close out what we opened in preamble + end + + # remove the uber node from the orphans + if 0 in orphans + delete!(orphans, 0) + end + + @assert isempty(orphans) "Orphaned nodes: $(orphans), node count: $(length(nodes)), orphan node count: $(length(orphans))" + + return nothing +end + +end diff --git a/stdlib/Profile/test/heapsnapshot_reassemble.jl b/stdlib/Profile/test/heapsnapshot_reassemble.jl new file mode 100644 index 0000000000000..bf1e6e76bce58 --- /dev/null +++ b/stdlib/Profile/test/heapsnapshot_reassemble.jl @@ -0,0 +1,31 @@ +using Test + +@testset "_write_decimal_number" begin + _digits_buf = zeros(UInt8, ndigits(typemax(UInt))) + io = IOBuffer() + + test_write(d) = begin + Profile.HeapSnapshot._write_decimal_number(io, d, _digits_buf) + s = String(take!(io)) + seekstart(io) + return s + end + @test test_write(0) == "0" + @test test_write(99) == "99" + + @test test_write(UInt8(0)) == "0" + @test test_write(UInt32(0)) == "0" + @test test_write(Int32(0)) == "0" + + @test test_write(UInt8(99)) == "99" + @test test_write(UInt32(99)) == "99" + @test test_write(Int32(99)) == "99" + + # Sample among possible UInts we might print + for x in typemin(UInt8):typemax(UInt8) + @test test_write(x) == string(x) + end + for x in typemin(UInt):typemax(UInt)÷10001:typemax(UInt) + @test test_write(x) == string(x) + end +end diff --git a/stdlib/Profile/test/runtests.jl b/stdlib/Profile/test/runtests.jl index d7358dc20e853..73b160b8329ca 100644 --- a/stdlib/Profile/test/runtests.jl +++ b/stdlib/Profile/test/runtests.jl @@ -300,3 +300,4 @@ end end include("allocs.jl") +include("heapsnapshot_reassemble.jl")