diff --git a/engine/hatchery/marathon/marathon.go b/engine/hatchery/marathon/marathon.go index a5d2230487..af8668f518 100644 --- a/engine/hatchery/marathon/marathon.go +++ b/engine/hatchery/marathon/marathon.go @@ -115,6 +115,7 @@ func (h *HatcheryMarathon) CheckConfiguration(cfg interface{}) error { //Custom http client with 3 retries httpClient := &http.Client{ + Timeout: time.Minute, Transport: &httpcontrol.Transport{ RequestTimeout: time.Minute, MaxTries: 3, diff --git a/engine/hatchery/swarm/swarm.go b/engine/hatchery/swarm/swarm.go index f87f27fc8f..5d80c9cce7 100644 --- a/engine/hatchery/swarm/swarm.go +++ b/engine/hatchery/swarm/swarm.go @@ -15,6 +15,7 @@ import ( types "github.com/docker/docker/api/types" docker "github.com/docker/docker/client" "github.com/docker/go-connections/tlsconfig" + "github.com/facebookgo/httpcontrol" "github.com/gorilla/mux" context "golang.org/x/net/context" @@ -46,7 +47,9 @@ func (h *HatcherySwarm) Init() error { log.Error("hatchery> swarm> unable to connect to a docker client:%s", errc) return errc } - if _, errPing := d.Ping(context.Background()); errPing != nil { + ctxDocker, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + if _, errPing := d.Ping(ctxDocker); errPing != nil { log.Error("hatchery> swarm> unable to ping docker host:%s", errPing) return errPing } @@ -61,6 +64,7 @@ func (h *HatcherySwarm) Init() error { for hostName, cfg := range h.Config.DockerEngines { log.Info("hatchery> swarm> connecting to %s: %s", hostName, cfg.Host) httpClient := new(http.Client) + httpClient.Timeout = 30 * time.Second if cfg.CertPath != "" { options := tlsconfig.Options{ CAFile: filepath.Join(cfg.CertPath, "ca.pem"), @@ -108,18 +112,21 @@ func (h *HatcherySwarm) Init() error { continue } - httpClient.Transport = &http.Transport{ + httpClient.Transport = &httpcontrol.Transport{ + RequestTimeout: 30 * time.Second, TLSClientConfig: tlsc, } } else { - httpClient.Transport = &http.Transport{} + httpClient.Transport = &httpcontrol.Transport{RequestTimeout: 30 * time.Second} } d, errc := docker.NewClientWithOpts(docker.WithHost(cfg.Host), docker.WithVersion(cfg.APIVersion), docker.WithHTTPClient(httpClient)) if errc != nil { log.Error("hatchery> swarm> unable to connect to a docker client:%s for host %s (%s)", hostName, cfg.Host, errc) continue } - if _, errPing := d.Ping(context.Background()); errPing != nil { + ctxDocker, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + if _, errPing := d.Ping(ctxDocker); errPing != nil { log.Error("hatchery> swarm> unable to ping docker host:%s", errPing) continue } diff --git a/engine/hatchery/swarm/swarm_conf.go b/engine/hatchery/swarm/swarm_conf.go index beae5d0359..28a28e8ff5 100644 --- a/engine/hatchery/swarm/swarm_conf.go +++ b/engine/hatchery/swarm/swarm_conf.go @@ -49,7 +49,9 @@ func (h *HatcherySwarm) Status() sdk.MonitoringStatus { for dockerName, dockerClient := range h.dockerClients { //Check images status := sdk.MonitoringStatusOK - images, err := dockerClient.ImageList(context.Background(), types.ImageListOptions{All: true}) + ctxList, cancelList := context.WithTimeout(context.Background(), 20*time.Second) + defer cancelList() + images, err := dockerClient.ImageList(ctxList, types.ImageListOptions{All: true}) if err != nil { log.Warning("hatchery> swarm> %s> Status> Unable to list images on %s: %s", h.Name, dockerName, err) status = sdk.MonitoringStatusAlert diff --git a/engine/hatchery/swarm/swarm_util_create.go b/engine/hatchery/swarm/swarm_util_create.go index aec70145cb..014770f82c 100644 --- a/engine/hatchery/swarm/swarm_util_create.go +++ b/engine/hatchery/swarm/swarm_util_create.go @@ -131,7 +131,7 @@ checkImage: c, err := dockerClient.ContainerCreate(ctx, config, hostConfig, networkingConfig, name) if err != nil { next() - return sdk.WrapError(err, "createAndStartContainer> Unable to create container %s", name) + return sdk.WrapError(err, "createAndStartContainer> Unable to create container %s on %s", name, dockerClient.name) } next() diff --git a/engine/hatchery/swarm/swarm_util_get.go b/engine/hatchery/swarm/swarm_util_get.go index 9627dcbe05..63727ca690 100644 --- a/engine/hatchery/swarm/swarm_util_get.go +++ b/engine/hatchery/swarm/swarm_util_get.go @@ -2,6 +2,7 @@ package swarm import ( "strings" + "time" types "github.com/docker/docker/api/types" context "golang.org/x/net/context" @@ -10,9 +11,11 @@ import ( ) func (h *HatcherySwarm) getContainers(dockerClient *dockerClient, options types.ContainerListOptions) ([]types.Container, error) { - s, err := dockerClient.ContainerList(context.Background(), options) + ctxList, cancelList := context.WithTimeout(context.Background(), 10*time.Second) + defer cancelList() + s, err := dockerClient.ContainerList(ctxList, options) if err != nil { - return nil, sdk.WrapError(err, "hatchery> swarm> getContainers> unable to list containers") + return nil, sdk.WrapError(err, "hatchery> swarm> getContainers> unable to list containers on %s", dockerClient.name) } return s, nil } @@ -20,7 +23,7 @@ func (h *HatcherySwarm) getContainers(dockerClient *dockerClient, options types. func (h *HatcherySwarm) getContainer(dockerClient *dockerClient, name string, options types.ContainerListOptions) (*types.Container, error) { containers, err := h.getContainers(dockerClient, options) if err != nil { - return nil, sdk.WrapError(err, "hatchery> swarm> getContainer> cannot getContainers") + return nil, sdk.WrapError(err, "hatchery> swarm> getContainer> cannot getContainers on %s", dockerClient.name) } for i := range containers { diff --git a/engine/hatchery/swarm/swarm_util_kill.go b/engine/hatchery/swarm/swarm_util_kill.go index 0f55a06dc1..d77cc7cc10 100644 --- a/engine/hatchery/swarm/swarm_util_kill.go +++ b/engine/hatchery/swarm/swarm_util_kill.go @@ -19,14 +19,16 @@ const ( ) func (h *HatcherySwarm) killAndRemove(dockerClient *dockerClient, ID string) error { - container, err := dockerClient.ContainerInspect(context.Background(), ID) + ctxList, cancelList := context.WithTimeout(context.Background(), 3*time.Second) + defer cancelList() + container, err := dockerClient.ContainerInspect(ctxList, ID) if err != nil { //If there is an error, we try to remove the container if strings.Contains(err.Error(), "No such container") { - log.Debug("hatchery> swarm> killAndRemove> cannot InspectContainer: %v", err) + log.Debug("hatchery> swarm> killAndRemove> cannot InspectContainer: %v on %s", err, dockerClient.name) return nil } - log.Info("hatchery> swarm> killAndRemove> cannot InspectContainer: %v", err) + log.Info("hatchery> swarm> killAndRemove> cannot InspectContainer: %v on %s", err, dockerClient.name) } else { // If its a worker "register", check registration before deleting it if strings.Contains(container.Name, "register-") { @@ -50,7 +52,9 @@ func (h *HatcherySwarm) killAndRemove(dockerClient *dockerClient, ID string) err for _, cnetwork := range container.NetworkSettings.Networks { //Get the network - network, err := dockerClient.NetworkInspect(context.Background(), cnetwork.NetworkID, types.NetworkInspectOptions{}) + ctxList, cancelList := context.WithTimeout(context.Background(), 3*time.Second) + defer cancelList() + network, err := dockerClient.NetworkInspect(ctxList, cnetwork.NetworkID, types.NetworkInspectOptions{}) if err != nil { if !strings.Contains(err.Error(), "No such network") { return sdk.WrapError(err, "hatchery> swarm> killAndRemove> unable to get network for %s on %s", ID[:7], dockerClient.name) @@ -75,7 +79,9 @@ func (h *HatcherySwarm) killAndRemove(dockerClient *dockerClient, ID string) err //Finally remove the network log.Info("hatchery> swarm> remove network %s (%s)", network.Name, network.ID) - if err := dockerClient.NetworkRemove(context.Background(), network.ID); err != nil { + ctxDocker, cancelList := context.WithTimeout(context.Background(), 10*time.Second) + defer cancelList() + if err := dockerClient.NetworkRemove(ctxDocker, network.ID); err != nil { log.Error("hatchery> swarm> killAndRemove> unable to kill and remove network %s from %s err:%s", network.ID[:12], dockerClient.name, err) } } @@ -84,13 +90,17 @@ func (h *HatcherySwarm) killAndRemove(dockerClient *dockerClient, ID string) err func (h *HatcherySwarm) killAndRemoveContainer(dockerClient *dockerClient, ID string) error { log.Debug("hatchery> swarm> killAndRemove> remove container %s on %s", ID, dockerClient.name) - if err := dockerClient.ContainerKill(context.Background(), ID, "SIGKILL"); err != nil { + ctxDocker, cancelList := context.WithTimeout(context.Background(), 20*time.Second) + defer cancelList() + if err := dockerClient.ContainerKill(ctxDocker, ID, "SIGKILL"); err != nil { if !strings.Contains(err.Error(), "is not running") && !strings.Contains(err.Error(), "No such container") { return sdk.WrapError(err, "hatchery> swarm> killAndRemove> err on kill container %v from %s", err, dockerClient.name) } } - if err := dockerClient.ContainerRemove(context.Background(), ID, types.ContainerRemoveOptions{Force: true}); err != nil { + ctxDockerRemove, cancelList := context.WithTimeout(context.Background(), 20*time.Second) + defer cancelList() + if err := dockerClient.ContainerRemove(ctxDockerRemove, ID, types.ContainerRemoveOptions{Force: true}); err != nil { // container could be already removed by a previous call to docker if !strings.Contains(err.Error(), "No such container") { return sdk.WrapError(err, "hatchery> swarm> killAndRemove> Unable to remove container %s form %s", ID, dockerClient.name) @@ -103,14 +113,18 @@ func (h *HatcherySwarm) killAndRemoveContainer(dockerClient *dockerClient, ID st func (h *HatcherySwarm) killAwolNetworks() error { for _, dockerClient := range h.dockerClients { //Checking networks - nets, errLN := dockerClient.NetworkList(context.Background(), types.NetworkListOptions{}) + ctxDocker, cancelList := context.WithTimeout(context.Background(), 5*time.Second) + defer cancelList() + nets, errLN := dockerClient.NetworkList(ctxDocker, types.NetworkListOptions{}) if errLN != nil { log.Warning("hatchery> swarm> killAwolNetworks> Cannot get networks on %s: %s", dockerClient.name, errLN) return errLN } for i := range nets { - n, err := dockerClient.NetworkInspect(context.Background(), nets[i].ID, types.NetworkInspectOptions{}) + ctxDocker, cancelList := context.WithTimeout(context.Background(), 5*time.Second) + defer cancelList() + n, err := dockerClient.NetworkInspect(ctxDocker, nets[i].ID, types.NetworkInspectOptions{}) if err != nil { log.Warning("hatchery> swarm> killAwolNetworks> Unable to get network info: %v", err) continue @@ -134,7 +148,9 @@ func (h *HatcherySwarm) killAwolNetworks() error { } log.Info("hatchery> swarm> killAwolNetworks> remove network[%s] %s on %s (created on %v)", n.ID, n.Name, dockerClient.name, n.Created) - if err := dockerClient.NetworkRemove(context.Background(), n.ID); err != nil { + ctxDocker2, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + if err := dockerClient.NetworkRemove(ctxDocker2, n.ID); err != nil { log.Warning("hatchery> swarm> killAwolNetworks> Unable to delete network %s err:%s", n.Name, err) } } diff --git a/sdk/cdsclient/client.go b/sdk/cdsclient/client.go index 229792cc8d..8bdf3144aa 100644 --- a/sdk/cdsclient/client.go +++ b/sdk/cdsclient/client.go @@ -47,6 +47,7 @@ func NewService(endpoint string, timeout time.Duration, insecureSkipVerifyTLS bo cli := new(client) cli.config = conf cli.HTTPClient = &http.Client{ + Timeout: timeout, Transport: &httpcontrol.Transport{ RequestTimeout: timeout, MaxTries: 5,