Skip to content

Commit

Permalink
[ENH] Break out the integrity check and fix obo
Browse files Browse the repository at this point in the history
Chasing a bug in the integrity check.  Prior to Monday, it was assert(data[j] > 0);
This checks and throws if data[j] <= 0.
  • Loading branch information
rescrv committed Jan 2, 2025
1 parent e89eaa8 commit 65b8d3d
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 4 deletions.
4 changes: 3 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -62,5 +62,7 @@ if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME)

add_executable(api_tests tests/cpp/api_test.cpp)
target_link_libraries(api_tests hnswlib)
endif()

add_executable(hnsw-fsck hnsw-fsck.cpp)
target_link_libraries(hnsw-fsck hnswlib)
endif()
40 changes: 40 additions & 0 deletions hnsw-fsck.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#include <iostream>
#include "hnswlib/hnswlib.h"

int main(int argc, const char* argv[]) {
if (argc != 4) {
std::cerr << "USAGE: hnsw-fsck <index_path> <space_name> <dims>\n";
return 1;
}
std::string index_path(argv[1]);
std::string space_name(argv[2]);
int dim = atoi(argv[3]);
std::string index_file = index_path;
hnswlib::SpaceInterface<float> *l2space;
bool normalize = false;

if (space_name == "l2")
{
l2space = new hnswlib::L2Space(dim);
normalize = false;
}
else if (space_name == "ip")
{
l2space = new hnswlib::InnerProductSpace(dim);
// For IP, we expect the vectors to be normalized
normalize = false;
}
else if (space_name == "cosine")
{
l2space = new hnswlib::InnerProductSpace(dim);
normalize = true;
}
else
{
std::cerr << "Unknown space name: " << space_name << std::endl;
return 2;
}

auto appr_alg = new hnswlib::HierarchicalNSW<float>(l2space, index_file, false, 0, false/*allow_replace_deleted*/, normalize, true /*is_persistent_index*/);
appr_alg->checkIntegrity();
}
15 changes: 12 additions & 3 deletions hnswlib/hnswalg.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <unordered_set>
#include <set>
#include <list>
#include <sstream>

namespace hnswlib
{
Expand Down Expand Up @@ -1835,8 +1836,10 @@ namespace hnswlib
std::unordered_set<tableint> s;
for (int j = 0; j < size; j++)
{
if (data[j] < 0 || data[j] >= cur_element_count || data[j] == i)
throw std::runtime_error("HNSW Integrity failure: invalid neighbor index");
if (data[j] >= cur_element_count)
throw std::runtime_error("HNSW Integrity failure: invalid neighbor index data[j] >= cur_element_count");
if (data[j] == i)
throw std::runtime_error("HNSW Integrity failure: invalid neighbor index data[j] == i");
inbound_connections_num[data[j]]++;
s.insert(data[j]);
connections_checked++;
Expand All @@ -1850,8 +1853,14 @@ namespace hnswlib
int min1 = inbound_connections_num[0], max1 = inbound_connections_num[0];
for (int i = 0; i < cur_element_count; i++)
{
/*
// This should always be true regardless the data is corrupted or not
assert(inbound_connections_num[i] > 0);
if (inbound_connections_num[i] <= 0) {
std::ostringstream ostr;
ostr << "HNSW Integrity failure: inbound_connections_num[" << i << "] = " << inbound_connections_num[i] << " <= 0";
throw std::runtime_error(ostr.str());
}
*/
min1 = std::min(inbound_connections_num[i], min1);
max1 = std::max(inbound_connections_num[i], max1);
}
Expand Down
1 change: 1 addition & 0 deletions tests/cpp/updates_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,7 @@ int main(int argc, char **argv)
// Adding enterpoint:

appr_alg.addPoint((void *)dummy_batch.data(), (size_t)0);
appr_alg.checkIntegrity();

StopW stopw = StopW();

Expand Down

0 comments on commit 65b8d3d

Please sign in to comment.