Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

file.GetContentFile: stream to disk, add callback #30

Merged
merged 15 commits into from
May 5, 2020
63 changes: 53 additions & 10 deletions pydrive2/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from googleapiclient import errors
from googleapiclient.http import MediaIoBaseUpload
from googleapiclient.http import MediaIoBaseDownload
from functools import wraps

from .apiattr import ApiAttribute
Expand Down Expand Up @@ -34,6 +35,10 @@ def __init__(self, http_error):
# Initialize args for backward compatibility
super().__init__(http_error)

def GetField(self, field):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we can simplify DVC code after release probably

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, on my post-merge list :)

"""Returns the `field` from the first error"""
return self.error.get("errors", [{}])[0].get(field, "")
shcheklein marked this conversation as resolved.
Show resolved Hide resolved


class FileNotDownloadableError(RuntimeError):
"""Error trying to download file that is not downloadable."""
Expand Down Expand Up @@ -220,7 +225,9 @@ def GetContentString(
self.FetchContent(mimetype, remove_bom)
return self.content.getvalue().decode(encoding)

def GetContentFile(self, filename, mimetype=None, remove_bom=False):
def GetContentFile(
self, filename, mimetype=None, remove_bom=False, callback=None
):
"""Save content of this file as a local file.

:param filename: name of the file to write to.
Expand All @@ -229,17 +236,53 @@ def GetContentFile(self, filename, mimetype=None, remove_bom=False):
:type mimetype: str
:param remove_bom: Whether to remove the byte order marking.
:type remove_bom: bool
:param callback: passed two arguments: (total trasferred, file size).
:type param: callable
:raises: ApiRequestError, FileNotUploadedError, FileNotDownloadableError
"""
if (
self.content is None
or type(self.content) is not io.BytesIO
or self.has_bom == remove_bom
):
self.FetchContent(mimetype, remove_bom)
f = open(filename, "wb")
f.write(self.content.getvalue())
f.close()
files = self.auth.service.files()
file_id = self.metadata.get("id") or self.get("id")

def download(fd, request):
downloader = MediaIoBaseDownload(fd, request)
done = False
while done is False:
status, done = downloader.next_chunk()
if callback:
callback(status.resumable_progress, status.total_size)

with open(filename, mode="w+b") as fd:
# Ideally would use files.export_media instead if
# metadata.get("mimeType").startswith("application/vnd.google-apps.")
# but that would first require a slow call to FetchMetadata()
try:
download(fd, files.get_media(fileId=file_id))
shcheklein marked this conversation as resolved.
Show resolved Hide resolved
except errors.HttpError as error:
exc = ApiRequestError(error)
if (
exc.error["code"] != 403
or exc.GetField("reason") != "fileNotDownloadable"
):
raise exc
mimetype = mimetype or "text/plain"
fd.seek(0) # just in case `download()` modified `fd`
try:
download(
fd,
files.export_media(fileId=file_id, mimeType=mimetype),
)
except errors.HttpError as error:
raise ApiRequestError(error)

if mimetype == "text/plain" and remove_bom:
fd.seek(0)
boms = [
bom[mimetype]
for bom in MIME_TYPE_TO_BOM.values()
if mimetype in bom
]
if boms:
self._RemovePrefix(fd, boms[0].encode("utf8"))

@LoadAuth
def FetchMetadata(self, fields=None, fetch_all=False):
Expand Down
6 changes: 1 addition & 5 deletions pydrive2/test/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,7 @@ def pydrive_retry(call):
try:
result = call()
except ApiRequestError as exception:
retry_codes = ["403", "500", "502", "503", "504"]
if any(
"HttpError {}".format(code) in str(exception)
for code in retry_codes
):
if exception.error["code"] in [403, 500, 502, 503, 504]:
raise PyDriveRetriableError("Google API request failed")
raise
return result
Expand Down