Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] Break out the integrity check and fix obo #28

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -62,5 +62,7 @@ if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME)

add_executable(api_tests tests/cpp/api_test.cpp)
target_link_libraries(api_tests hnswlib)
endif()

add_executable(hnsw-fsck hnsw-fsck.cpp)
target_link_libraries(hnsw-fsck hnswlib)
endif()
40 changes: 40 additions & 0 deletions hnsw-fsck.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#include <iostream>
#include "hnswlib/hnswlib.h"

int main(int argc, const char* argv[]) {
if (argc != 4) {
std::cerr << "USAGE: hnsw-fsck <index_path> <space_name> <dims>\n";
return 1;
}
std::string index_path(argv[1]);
std::string space_name(argv[2]);
int dim = atoi(argv[3]);
std::string index_file = index_path;
hnswlib::SpaceInterface<float> *l2space;
bool normalize = false;

if (space_name == "l2")
{
l2space = new hnswlib::L2Space(dim);
normalize = false;
}
else if (space_name == "ip")
{
l2space = new hnswlib::InnerProductSpace(dim);
// For IP, we expect the vectors to be normalized
normalize = false;
}
else if (space_name == "cosine")
{
l2space = new hnswlib::InnerProductSpace(dim);
normalize = true;
}
else
{
std::cerr << "Unknown space name: " << space_name << std::endl;
return 2;
}

auto appr_alg = new hnswlib::HierarchicalNSW<float>(l2space, index_file, false, 0, false/*allow_replace_deleted*/, normalize, true /*is_persistent_index*/);
appr_alg->checkIntegrity();
}
15 changes: 12 additions & 3 deletions hnswlib/hnswalg.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <unordered_set>
#include <set>
#include <list>
#include <sstream>

namespace hnswlib
{
Expand Down Expand Up @@ -1835,8 +1836,10 @@ namespace hnswlib
std::unordered_set<tableint> s;
for (int j = 0; j < size; j++)
{
if (data[j] < 0 || data[j] >= cur_element_count || data[j] == i)
throw std::runtime_error("HNSW Integrity failure: invalid neighbor index");
if (data[j] >= cur_element_count)
throw std::runtime_error("HNSW Integrity failure: invalid neighbor index data[j] >= cur_element_count");
if (data[j] == i)
throw std::runtime_error("HNSW Integrity failure: invalid neighbor index data[j] == i");
inbound_connections_num[data[j]]++;
s.insert(data[j]);
connections_checked++;
Expand All @@ -1850,8 +1853,14 @@ namespace hnswlib
int min1 = inbound_connections_num[0], max1 = inbound_connections_num[0];
for (int i = 0; i < cur_element_count; i++)
{
/*
// This should always be true regardless the data is corrupted or not
assert(inbound_connections_num[i] > 0);
if (inbound_connections_num[i] <= 0) {
std::ostringstream ostr;
ostr << "HNSW Integrity failure: inbound_connections_num[" << i << "] = " << inbound_connections_num[i] << " <= 0";
throw std::runtime_error(ostr.str());
}
*/
min1 = std::min(inbound_connections_num[i], min1);
max1 = std::max(inbound_connections_num[i], max1);
}
Expand Down
1 change: 1 addition & 0 deletions tests/cpp/updates_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,7 @@ int main(int argc, char **argv)
// Adding enterpoint:

appr_alg.addPoint((void *)dummy_batch.data(), (size_t)0);
appr_alg.checkIntegrity();

StopW stopw = StopW();

Expand Down
Loading