-
Notifications
You must be signed in to change notification settings - Fork 1.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add RPC command shard crawl (RIPD-1663) #2697
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -708,6 +708,102 @@ OverlayImpl::reportTraffic ( | |
m_traffic.addCount (cat, isInbound, number); | ||
} | ||
|
||
Json::Value | ||
OverlayImpl::crawlShards(bool pubKey, std::uint32_t hops) | ||
{ | ||
using namespace std::chrono; | ||
using namespace std::chrono_literals; | ||
|
||
Json::Value jv(Json::objectValue); | ||
auto const numPeers {size()}; | ||
if (numPeers == 0) | ||
return jv; | ||
|
||
// If greater than a hop away, we may need to gather or freshen data | ||
if (hops > 0) | ||
{ | ||
// Prevent crawl spamming | ||
clock_type::time_point const last(csLast_.load()); | ||
if ((clock_type::now() - last) > 60s) | ||
{ | ||
auto const timeout(seconds((hops * hops) * 10)); | ||
std::unique_lock<std::mutex> l {csMutex_}; | ||
|
||
// Check if already requested | ||
if (csIDs_.empty()) | ||
{ | ||
{ | ||
std::lock_guard <decltype(mutex_)> lock {mutex_}; | ||
for (auto& id : ids_) | ||
csIDs_.emplace(id.first); | ||
} | ||
|
||
// Relay request to active peers | ||
protocol::TMGetShardInfo tmGS; | ||
tmGS.set_hops(hops); | ||
foreach(send_always(std::make_shared<Message>( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same concern as above: we can't just send this to every peer we have unless we are prepared to have that connection close if they aren't running a version capable of understanding |
||
tmGS, protocol::mtGET_SHARD_INFO))); | ||
|
||
if (csCV_.wait_for(l, timeout) == std::cv_status::timeout) | ||
{ | ||
csIDs_.clear(); | ||
csCV_.notify_all(); | ||
} | ||
csLast_ = duration_cast<seconds>( | ||
clock_type::now().time_since_epoch()); | ||
} | ||
else | ||
csCV_.wait_for(l, timeout); | ||
} | ||
} | ||
|
||
// Combine the shard info from peers and their sub peers | ||
hash_map<PublicKey, PeerImp::ShardInfo> peerShardInfo; | ||
for_each([&](std::shared_ptr<PeerImp> const& peer) | ||
{ | ||
if (auto psi = peer->getPeerShardInfo()) | ||
{ | ||
for (auto const& e : *psi) | ||
{ | ||
auto it {peerShardInfo.find(e.first)}; | ||
if (it != peerShardInfo.end()) | ||
// The key exists so join the shard indexes. | ||
it->second.shardIndexes += e.second.shardIndexes; | ||
else | ||
peerShardInfo.emplace(std::move(e)); | ||
} | ||
} | ||
}); | ||
|
||
// Prepare json reply | ||
auto& av = jv[jss::peers] = Json::Value(Json::arrayValue); | ||
for (auto const& e : peerShardInfo) | ||
{ | ||
auto& pv {av.append(Json::Value(Json::objectValue))}; | ||
if (pubKey) | ||
pv[jss::public_key] = toBase58(TokenType::NodePublic, e.first); | ||
|
||
auto const& address {e.second.endpoint.address()}; | ||
if (!address.is_unspecified()) | ||
pv[jss::ip] = address.to_string(); | ||
|
||
pv[jss::complete_shards] = to_string(e.second.shardIndexes); | ||
} | ||
|
||
return jv; | ||
} | ||
|
||
void | ||
OverlayImpl::lastLink(std::uint32_t id) | ||
{ | ||
// Notify threads when every peer has received a last link. | ||
// This doesn't account for every node that might reply but | ||
// it is adequate. | ||
std::lock_guard<std::mutex> l {csMutex_}; | ||
if (csIDs_.erase(id) && csIDs_.empty()) | ||
csCV_.notify_all(); | ||
} | ||
|
||
std::size_t | ||
OverlayImpl::selectPeers (PeerSet& set, std::size_t limit, | ||
std::function<bool(std::shared_ptr<Peer> const&)> score) | ||
|
@@ -787,9 +883,12 @@ OverlayImpl::crawl() | |
sp->getRemoteAddress().port()); | ||
} | ||
} | ||
auto version = sp->getVersion (); | ||
if (! version.empty ()) | ||
pv[jss::version] = version; | ||
|
||
{ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why the extra scope? If it's lifetime management, why not just change to:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @nbougalis a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, d'oh. |
||
auto version {sp->getVersion()}; | ||
if (!version.empty()) | ||
pv[jss::version] = std::move(version); | ||
} | ||
|
||
std::uint32_t minSeq, maxSeq; | ||
sp->ledgerRange(minSeq, maxSeq); | ||
|
@@ -798,9 +897,8 @@ OverlayImpl::crawl() | |
std::to_string(minSeq) + "-" + | ||
std::to_string(maxSeq); | ||
|
||
auto shards = sp->getShards(); | ||
if (! shards.empty()) | ||
pv[jss::complete_shards] = shards; | ||
if (auto shardIndexes = sp->getShardIndexes()) | ||
pv[jss::complete_shards] = to_string(*shardIndexes); | ||
}); | ||
|
||
return jv; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -123,6 +123,13 @@ class OverlayImpl : public Overlay | |
std::atomic <uint64_t> peerDisconnects_ {0}; | ||
std::atomic <uint64_t> peerDisconnectsCharges_ {0}; | ||
|
||
// Last time we crawled peers for shard info. 'cs' = crawl shards | ||
std::atomic<std::chrono::seconds> csLast_{std::chrono::seconds{0}}; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What does There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a reason this is a duration and not a time_point? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Paging @HowardHinnant. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I remember this happening, and I don't recall the reason either. One can always load the duration into a time_point, do the computations, and then extract the duration from the time_point for storing. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @HowardHinnant Yeah, that is the solution I went with. |
||
std::mutex csMutex_; | ||
std::condition_variable csCV_; | ||
// Peer IDs expecting to receive a last link notification | ||
std::set<std::uint32_t> csIDs_; | ||
|
||
//-------------------------------------------------------------------------- | ||
|
||
public: | ||
|
@@ -221,15 +228,17 @@ class OverlayImpl : public Overlay | |
void | ||
for_each (UnaryFunc&& f) | ||
{ | ||
std::lock_guard <decltype(mutex_)> lock (mutex_); | ||
|
||
// Iterate over a copy of the peer list because peer | ||
// destruction can invalidate iterators. | ||
std::vector<std::weak_ptr<PeerImp>> wp; | ||
wp.reserve(ids_.size()); | ||
{ | ||
std::lock_guard<decltype(mutex_)> lock(mutex_); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This change has be carefully audited to make sure callers aren't expecting the lock to be held. I expect this is an OK change, but we can't push until we audit. I'll do so when I do another pass of the code, but I wanted to make a note so others can audit as well. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I assume you mean as the caller's function is called. Good point, requires an audit. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @miguelportilla Yes, that's what I mean - the parameter I just audited this and the change looks good to me. However, I think it's important enough that at least one other reviewer should also confirm that this change is safe. |
||
|
||
for (auto& x : ids_) | ||
wp.push_back(x.second); | ||
// Iterate over a copy of the peer list because peer | ||
// destruction can invalidate iterators. | ||
wp.reserve(ids_.size()); | ||
|
||
for (auto& x : ids_) | ||
wp.push_back(x.second); | ||
} | ||
|
||
for (auto& w : wp) | ||
{ | ||
|
@@ -340,6 +349,17 @@ class OverlayImpl : public Overlay | |
return peerDisconnectsCharges_; | ||
} | ||
|
||
Json::Value | ||
crawlShards(bool pubKey, std::uint32_t hops) override; | ||
|
||
|
||
/** Called when the last link from a peer chain is received. | ||
|
||
@param id peer id that received the shard info. | ||
*/ | ||
void | ||
lastLink(std::uint32_t id); | ||
|
||
private: | ||
std::shared_ptr<Writer> | ||
makeRedirectResponse (PeerFinder::Slot::ptr const& slot, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We can't arbitrarily send this message out to other peers; if they don't support
mtSHARD_INFO
it will result in the closing their connection to this server.We need to know if a peer supports sharding before we send them shard-related messages.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@nbougalis I can't find the code that disconnects upon receiving an unknown message and I've been unable to reproduce that behavior using the tip of develop.
Peerimp::onMessageUnknown
is called byinvokeProtocolMessage
when an unknown message is received but the function is just a stub with aTODO
. It seems someone intended on adding the behavior or I did I miss something else entirely? Thanks!