-
Notifications
You must be signed in to change notification settings - Fork 20.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
cmd/swarm/swarm-smoke: Trigger chunk debug on timeout #19101
Changes from 7 commits
554aa7e
5b16c05
b5ec08d
b906f7a
d7429b1
3091572
dcdff86
7138213
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -37,15 +37,16 @@ var ( | |
) | ||
|
||
var ( | ||
allhosts string | ||
hosts []string | ||
filesize int | ||
syncDelay int | ||
httpPort int | ||
wsPort int | ||
verbosity int | ||
timeout int | ||
single bool | ||
allhosts string | ||
hosts []string | ||
filesize int | ||
syncDelay int | ||
httpPort int | ||
wsPort int | ||
verbosity int | ||
timeout int | ||
single bool | ||
getAllRefsTimeout int | ||
) | ||
|
||
func main() { | ||
|
@@ -102,6 +103,12 @@ func main() { | |
Usage: "whether to fetch content from a single node or from all nodes", | ||
Destination: &single, | ||
}, | ||
cli.IntFlag{ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure why we need this to be configurable at all? |
||
Name: "refs-timeout", | ||
Value: 5, | ||
Usage: "timeout in seconds to wait for GetAllReferences to return", | ||
Destination: &getAllRefsTimeout, | ||
}, | ||
} | ||
|
||
app.Flags = append(app.Flags, []cli.Flag{ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,13 +18,19 @@ package main | |
|
||
import ( | ||
"bytes" | ||
"context" | ||
"fmt" | ||
"io/ioutil" | ||
"math/rand" | ||
"os" | ||
"sync" | ||
"time" | ||
|
||
"github.com/ethereum/go-ethereum/log" | ||
"github.com/ethereum/go-ethereum/metrics" | ||
"github.com/ethereum/go-ethereum/rpc" | ||
"github.com/ethereum/go-ethereum/swarm/api" | ||
"github.com/ethereum/go-ethereum/swarm/storage" | ||
"github.com/ethereum/go-ethereum/swarm/testutil" | ||
"github.com/pborman/uuid" | ||
|
||
|
@@ -49,12 +55,75 @@ func uploadAndSyncCmd(ctx *cli.Context, tuid string) error { | |
case <-time.After(time.Duration(timeout) * time.Second): | ||
metrics.GetOrRegisterCounter(fmt.Sprintf("%s.timeout", commandName), nil).Inc(1) | ||
|
||
e := fmt.Errorf("timeout after %v sec", timeout) | ||
// trigger debug functionality on randomBytes | ||
err := triggerChunkDebug(randomBytes[:]) | ||
if err != nil { | ||
e = fmt.Errorf("%v; triggerChunkDebug failed: %v", e, err) | ||
} | ||
|
||
return fmt.Errorf("timeout after %v sec", timeout) | ||
return e | ||
} | ||
} | ||
|
||
func triggerChunkDebug(testData []byte) error { | ||
holisticode marked this conversation as resolved.
Show resolved
Hide resolved
|
||
log.Warn("Test timed out; running chunk debug sequence") | ||
|
||
addrs, err := getAllRefs(testData) | ||
if err != nil { | ||
return err | ||
} | ||
log.Trace("All references retrieved") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we really need these many logs? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I vote yes. Smoke tests are very difficult to debug, as they are spanning nodes, it's not just one go process. There are also no unit tests. |
||
|
||
// has-chunks | ||
for _, host := range hosts { | ||
httpHost := fmt.Sprintf("ws://%s:%d", host, 8546) | ||
log.Trace("Calling `Has` on host", "httpHost", httpHost) | ||
rpcClient, err := rpc.Dial(httpHost) | ||
if err != nil { | ||
log.Trace("Error dialing host", "err", err) | ||
return err | ||
} | ||
log.Trace("rpc dial ok") | ||
var hasInfo []api.HasInfo | ||
err = rpcClient.Call(&hasInfo, "bzz_has", addrs) | ||
if err != nil { | ||
log.Trace("Error calling host", "err", err) | ||
return err | ||
} | ||
log.Trace("rpc call ok") | ||
count := 0 | ||
for _, info := range hasInfo { | ||
if !info.Has { | ||
count++ | ||
log.Error("Host does not have chunk", "host", httpHost, "chunk", info.Addr) | ||
} | ||
} | ||
if count == 0 { | ||
log.Info("Host reported to have all chunks", "host", httpHost) | ||
} | ||
} | ||
return nil | ||
} | ||
|
||
func getAllRefs(testData []byte) (storage.AddressCollection, error) { | ||
log.Trace("Getting all references for given root hash") | ||
datadir, err := ioutil.TempDir("", "chunk-debug") | ||
if err != nil { | ||
return nil, fmt.Errorf("unable to create temp dir: %v", err) | ||
} | ||
defer os.RemoveAll(datadir) | ||
fileStore, err := storage.NewLocalFileStore(datadir, make([]byte, 32)) | ||
if err != nil { | ||
return nil, err | ||
} | ||
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(getAllRefsTimeout)*time.Second) | ||
defer cancel() | ||
|
||
reader := bytes.NewReader(testData) | ||
return fileStore.GetAllReferences(ctx, reader, false) | ||
} | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. could you please just fix the typo one line below? <3 |
||
func uplaodAndSync(c *cli.Context, randomBytes []byte, tuid string) error { | ||
log.Info("uploading to "+httpEndpoint(hosts[0])+" and syncing", "tuid", tuid, "seed", seed) | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
call it
trackTimeout
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
shall we not make
syncDelay, timeout, getAllRefsTimeout
duration?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I was going to do it right away, but then I started doubting. This could break some deployment scripts, so I would not do it without approval by @skylenet and/or @nonsense .
In fact, the command line args would have to change to support a string duration
https://golang.org/pkg/time/#ParseDuration
So I actually suggest to do this as a separate PR or any future PR touching these files after we know best the consequences
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, I agree on changing this to a duration on a separate PR. We'll update our helm charts accordingly.