You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
When running a CAS with the following configuration, Chromium/Reclient builds stall workers. It appears that they are sat waiting for something but they have low CPU and network usage.
Running v0.5.3 worker and scheduler with a v0.4.0 CAS with the following configuration works just fine. So something weird is happening.
{
"stores": {
"CAS_MAIN_STORE": {
"fast_slow": {
"fast": {
// Don't in-memory cache over 100kb, and cache up to 100mb
"size_partitioning": {
"size": 100000,
"lower_store": {
"memory": {
"eviction_policy": {
"max_bytes": 100000000
}
}
},
"upper_store": {
"noop": {}
}
}
},
"slow": {
"filesystem": {
"content_path": "/root/.cache/turbo-cache/content_path-cas",
"temp_path": "/root/.cache/turbo-cache/tmp_path-cas",
"eviction_policy": {
// This uses around 8Gb of RAM for metadata. This is the actual size
// of the data ignoring inode sizing, so actual space used is higher.
// 110gb
"max_bytes": 110000000000,
// 2gb
"evict_bytes": 2000000000
}
}
}
}
},
"AC_MAIN_STORE": {
"fast_slow": {
"fast": {
// Don't in-memory cache over 4kb, and cache up to 10mb
"size_partitioning": {
"size": 4000,
"lower_store": {
"memory": {
"eviction_policy": {
"max_bytes": 10000000
}
}
},
"upper_store": {
"noop": {}
}
}
},
"slow": {
"filesystem": {
"content_path": "/root/.cache/turbo-cache/content_path-cas_ac",
"temp_path": "/root/.cache/turbo-cache/tmp_path-cas_ac",
"eviction_policy": {
// 10gb.
"max_bytes": 10000000000
}
}
}
}
}
},
"servers": [{
"listener": {
"http": {
"socket_address": "0.0.0.0:50052",
"advanced_http": {
"experimental_http2_max_pending_accept_reset_streams": 300
}
}
},
"services": {
"cas": {
"main": {
"cas_store": "CAS_MAIN_STORE"
}
},
"ac": {
"main": {
"ac_store": "AC_MAIN_STORE"
}
},
"capabilities": {},
"bytestream": {
"cas_stores": {
"main": "CAS_MAIN_STORE"
},
// According to https://github.com/grpc/grpc.github.io/issues/371 16KiB - 64KiB is optimal.
"max_bytes_per_stream": 65536
}
}
}]
}
The text was updated successfully, but these errors were encountered:
I have a hunch that this is caused by the same root cause that makes (Bazel) builds like this fail if they're started, canceled on the client side and then restarted:
When running a CAS with the following configuration, Chromium/Reclient builds stall workers. It appears that they are sat waiting for something but they have low CPU and network usage.
Running v0.5.3 worker and scheduler with a v0.4.0 CAS with the following configuration works just fine. So something weird is happening.
The text was updated successfully, but these errors were encountered: