Skip to content
This repository has been archived by the owner on Aug 17, 2023. It is now read-only.

I am building a image and pushing it to the docker registry. An error occurred. #537

Open
Morride opened this issue Sep 1, 2020 · 9 comments

Comments

@Morride
Copy link

Morride commented Sep 1, 2020

/kind bug

What steps did you take and what happened:
[A clear and concise description of what the bug is.]
I executed the following code

import uuid
from kubeflow import fairing   
from kubeflow.fairing.kubernetes.utils import mounting_pvc

tfjob_name = f'mnist-training-{uuid.uuid4().hex[:4]}'

output_map =  {
    "Dockerfile": "Dockerfile",
    "mnist.py": "mnist.py"
}

command=["python"
         "/opt/mnist.py",
         "--tf-model-dir=" + model_dir,
         "--tf-export-dir=" + export_path,
         "--tf-train-steps=" + train_steps,
         "--tf-batch-size=" + batch_size,
         "--tf-learning-rate=" + learning_rate]

fairing.config.set_preprocessor('python', command=command, path_prefix="/app", output_map=output_map)
fairing.config.set_builder(name='docker', registry=DOCKER_REGISTRY, base_image="",
                           image_name="mnist", dockerfile_path="Dockerfile")
fairing.config.set_deployer(name='tfjob', namespace=my_namespace, stream_log=False, job_name=tfjob_name,
                            chief_count=num_chief, worker_count=num_workers, ps_count=num_ps, 
                            pod_spec_mutators=[mounting_pvc(pvc_name=pvc_name, pvc_mount_path=model_dir)])
fairing.config.run()

see the error

[W 200901 07:34:19 utils:51] The function mounting_pvc has been deprecated,                     please use `volume_mounts`
[I 200901 07:34:19 config:134] Using preprocessor: <kubeflow.fairing.preprocessors.base.BasePreProcessor object at 0x7f3b19749828>
[I 200901 07:34:19 config:136] Using builder: <kubeflow.fairing.builders.docker.docker.DockerBuilder object at 0x7f3b400359b0>
[I 200901 07:34:19 config:138] Using deployer: <kubeflow.fairing.deployers.tfjob.tfjob.TfJob object at 0x7f3b19749b70>
[I 200901 07:34:19 docker:32] Building image using docker
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
~/.local/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    599                                                   body=body, headers=headers,
--> 600                                                   chunked=chunked)
    601 

~/.local/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
    353         else:
--> 354             conn.request(method, url, **httplib_request_kw)
    355 

/usr/lib/python3.6/http/client.py in request(self, method, url, body, headers, encode_chunked)
   1253         """Send a complete request to the server."""
-> 1254         self._send_request(method, url, body, headers, encode_chunked)
   1255 

/usr/lib/python3.6/http/client.py in _send_request(self, method, url, body, headers, encode_chunked)
   1299             body = _encode(body, 'body')
-> 1300         self.endheaders(body, encode_chunked=encode_chunked)
   1301 

/usr/lib/python3.6/http/client.py in endheaders(self, message_body, encode_chunked)
   1248             raise CannotSendHeader()
-> 1249         self._send_output(message_body, encode_chunked=encode_chunked)
   1250 

/usr/lib/python3.6/http/client.py in _send_output(self, message_body, encode_chunked)
   1035         del self._buffer[:]
-> 1036         self.send(msg)
   1037 

/usr/lib/python3.6/http/client.py in send(self, data)
    973             if self.auto_open:
--> 974                 self.connect()
    975             else:

/usr/local/lib/python3.6/dist-packages/docker/transport/unixconn.py in connect(self)
     42         sock.settimeout(self.timeout)
---> 43         sock.connect(self.unix_socket)
     44         self.sock = sock

FileNotFoundError: [Errno 2] No such file or directory

During handling of the above exception, another exception occurred:

ProtocolError                             Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
    448                     retries=self.max_retries,
--> 449                     timeout=timeout
    450                 )

~/.local/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    637             retries = retries.increment(method, url, error=e, _pool=self,
--> 638                                         _stacktrace=sys.exc_info()[2])
    639             retries.sleep()

~/.local/lib/python3.6/site-packages/urllib3/util/retry.py in increment(self, method, url, response, error, _pool, _stacktrace)
    367             if read is False or not self._is_method_retryable(method):
--> 368                 raise six.reraise(type(error), error, _stacktrace)
    369             elif read is not None:

~/.local/lib/python3.6/site-packages/urllib3/packages/six.py in reraise(tp, value, tb)
    684         if value.__traceback__ is not tb:
--> 685             raise value.with_traceback(tb)
    686         raise value

~/.local/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    599                                                   body=body, headers=headers,
--> 600                                                   chunked=chunked)
    601 

~/.local/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
    353         else:
--> 354             conn.request(method, url, **httplib_request_kw)
    355 

/usr/lib/python3.6/http/client.py in request(self, method, url, body, headers, encode_chunked)
   1253         """Send a complete request to the server."""
-> 1254         self._send_request(method, url, body, headers, encode_chunked)
   1255 

/usr/lib/python3.6/http/client.py in _send_request(self, method, url, body, headers, encode_chunked)
   1299             body = _encode(body, 'body')
-> 1300         self.endheaders(body, encode_chunked=encode_chunked)
   1301 

/usr/lib/python3.6/http/client.py in endheaders(self, message_body, encode_chunked)
   1248             raise CannotSendHeader()
-> 1249         self._send_output(message_body, encode_chunked=encode_chunked)
   1250 

/usr/lib/python3.6/http/client.py in _send_output(self, message_body, encode_chunked)
   1035         del self._buffer[:]
-> 1036         self.send(msg)
   1037 

/usr/lib/python3.6/http/client.py in send(self, data)
    973             if self.auto_open:
--> 974                 self.connect()
    975             else:

/usr/local/lib/python3.6/dist-packages/docker/transport/unixconn.py in connect(self)
     42         sock.settimeout(self.timeout)
---> 43         sock.connect(self.unix_socket)
     44         self.sock = sock

ProtocolError: ('Connection aborted.', FileNotFoundError(2, 'No such file or directory'))

During handling of the above exception, another exception occurred:

ConnectionError                           Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/docker/api/client.py in _retrieve_server_version(self)
    206         try:
--> 207             return self.version(api_version=False)["ApiVersion"]
    208         except KeyError:

/usr/local/lib/python3.6/dist-packages/docker/api/daemon.py in version(self, api_version)
    180         url = self._url("/version", versioned_api=api_version)
--> 181         return self._result(self._get(url), json=True)

/usr/local/lib/python3.6/dist-packages/docker/utils/decorators.py in inner(self, *args, **kwargs)
     45                 kwargs['headers'].update(self._general_configs['HttpHeaders'])
---> 46         return f(self, *args, **kwargs)
     47     return inner

/usr/local/lib/python3.6/dist-packages/docker/api/client.py in _get(self, url, **kwargs)
    229     def _get(self, url, **kwargs):
--> 230         return self.get(url, **self._set_request_timeout(kwargs))
    231 

/usr/local/lib/python3.6/dist-packages/requests/sessions.py in get(self, url, **kwargs)
    545         kwargs.setdefault('allow_redirects', True)
--> 546         return self.request('GET', url, **kwargs)
    547 

/usr/local/lib/python3.6/dist-packages/requests/sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
    532         send_kwargs.update(settings)
--> 533         resp = self.send(prep, **send_kwargs)
    534 

/usr/local/lib/python3.6/dist-packages/requests/sessions.py in send(self, request, **kwargs)
    645         # Send the request
--> 646         r = adapter.send(request, **kwargs)
    647 

/usr/local/lib/python3.6/dist-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
    497         except (ProtocolError, socket.error) as err:
--> 498             raise ConnectionError(err, request=request)
    499 

ConnectionError: ('Connection aborted.', FileNotFoundError(2, 'No such file or directory'))

During handling of the above exception, another exception occurred:

DockerException                           Traceback (most recent call last)
<ipython-input-21-7639ed9d8ad5> in <module>
     24                             chief_count=num_chief, worker_count=num_workers, ps_count=num_ps,
     25                             pod_spec_mutators=[mounting_pvc(pvc_name=pvc_name, pvc_mount_path=model_dir)])
---> 26 fairing.config.run()

~/.local/lib/python3.6/site-packages/kubeflow/fairing/config.py in run(self)
    138         logging.info("Using deployer: %s", deployer)
    139 
--> 140         builder.build()
    141         pod_spec = builder.generate_pod_spec()
    142         deployer.deploy(pod_spec)

~/.local/lib/python3.6/site-packages/kubeflow/fairing/builders/docker/docker.py in build(self)
     31     def build(self):
     32         logging.info("Building image using docker")
---> 33         self.docker_client = APIClient(version='auto')
     34         self._build()
     35         if self.push:

/usr/local/lib/python3.6/dist-packages/docker/api/client.py in __init__(self, base_url, version, timeout, tls, user_agent, num_pools, credstore_env)
    188         elif isinstance(version, six.string_types):
    189             if version.lower() == 'auto':
--> 190                 self._version = self._retrieve_server_version()
    191             else:
    192                 self._version = version

/usr/local/lib/python3.6/dist-packages/docker/api/client.py in _retrieve_server_version(self)
    213         except Exception as e:
    214             raise DockerException(
--> 215                 'Error while fetching server API version: {0}'.format(e)
    216             )
    217 

DockerException: Error while fetching server API version: ('Connection aborted.', FileNotFoundError(2, 'No such file or directory'))

What did you expect to happen:
This error looks like a directory or file is missing, but I cannot tell from this error what directory or file I am missing. So I cannot locate and solve the problem.Maybe it’s a problem with my previous settings?

Anything else you would like to add:
[Miscellaneous information that will assist in solving the issue.]
This is my configuration above and I manually created pv and pvc.

# Set docker registry to store image.
# Ensure you have permission for pushing docker image requests. 
DOCKER_REGISTRY = 'harbor.data.com/library'

# Set namespace. Note that the created PVC should be in the namespace.
my_namespace = 'anonymous'
# You also can get the default target namepspace using below API.
#namespace = fairing_utils.get_default_target_namespace()

Environment:

  • Fairing version: (use python -c "import kubeflow.fairing; print(kubeflow.fairing.__version__)"):
  • Kubeflow version: (version number can be found at the bottom left corner of the Kubeflow dashboard):
  • Minikube version:
  • Kubernetes version: (use kubectl version):
  • OS (e.g. from /etc/os-release):

NOTE: If you are using fair from master, please provide us the git commit hash.

@issue-label-bot
Copy link

Issue Label Bot is not confident enough to auto-label this issue.
See dashboard for more details.

@jinchihe
Copy link
Member

jinchihe commented Sep 1, 2020

FileNotFoundError: [Errno 2] No such file or directory

which file?

Are you running in the container? or Docker is not login?

@Morride
Copy link
Author

Morride commented Sep 1, 2020

@jinchihe I can't see what file is missing.
My jupyter notebook is running in a container.

@issue-label-bot
Copy link

Issue-Label Bot is automatically applying the labels:

Label Probability
area/jupyter 0.58

Please mark this comment with 👍 or 👎 to give our bot feedback!
Links: app homepage, dashboard and code for this bot.

@Morride
Copy link
Author

Morride commented Sep 1, 2020

@jinchihe Do you mean that jupyter notebook cannot run fairing when running in a container?

@jinchihe
Copy link
Member

jinchihe commented Sep 1, 2020

@Morride If you are running in container, you should use Cluster Builder, since the Docker Builder needs Docker daemon, so before running in container, you should install or map docker. Suggest to switch Cluster Builder directly.

@xinghalo
Copy link

xinghalo commented Sep 14, 2020

同样遇到这个问题,如 @jinchihe 所说,这是因为容器中的jupyter不能直接使用config+pvc的方式构建,需要使用cluster模式。
详情参考
我这边使用cluster + minio_context后没有问题。

代码参考:

preprocessor = base_preprocessor.BasePreProcessor(command=["python"], # The base class will set this.
                                                  input_files=[],
                                                  path_prefix="/app", # irrelevant since we aren't preprocessing any files
                                                  output_map={
                                                      "Dockerfile": "Dockerfile",
                                                      "mnist.py": "mnist.py"
                                                  })
# 这个ip是从k8s中手动查询到的
minio_context_source = MinioContextSource(endpoint_url="http://10.108.13.182:9000", 
                                          minio_secret="minio", 
                                          minio_secret_key="minio123", 
                                          region_name="test")

cluster_builder = cluster.cluster.ClusterBuilder(registry="${替换成自己的本地docker registry}", # 本地docker registry
                                                 base_image="", # 基础镜像,在Dockerfile中指定,${替换成自己的本地docker registry}/tensorflow:xxx
                                                 preprocessor=preprocessor, 
                                                 image_name="mnist", # 制作的镜像名称
                                                 dockerfile_path="Dockerfile", # 指定的dockerfile
                                                 context_source=minio_context_source) # 创建上下文

@jinchihe
Copy link
Member

@xinghalo That's great! Would you please update the E2E example to add the ClusterBuilder as your comments?

I think we can add comments in the example, such as

  • If in the container, suggest using the ClusterBuilder ....
  • Otherwise, use the DockerBuilder ...

@caseware66
Copy link

minio_context_source = MinioContextSource(endpoint_url="http://10.108.13.182:9000",
minio_secret="minio",
minio_secret_key="minio123",
region_name="test")

您好,endpoint_url,minio_secret,minio_secret_key,region_name 这些值如何获得?

Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.
Projects
None yet
Development

No branches or pull requests

5 participants