From d9b5609322af3f88d80d13a8359263ddd8f62d11 Mon Sep 17 00:00:00 2001 From: Michel Hua Date: Thu, 10 Nov 2022 18:33:19 +0100 Subject: [PATCH 1/3] typos in comments and docs --- awswrangler/s3/_read_parquet.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/awswrangler/s3/_read_parquet.py b/awswrangler/s3/_read_parquet.py index 69918a1d8..bac6c90ba 100644 --- a/awswrangler/s3/_read_parquet.py +++ b/awswrangler/s3/_read_parquet.py @@ -708,14 +708,14 @@ def read_parquet( Reading in chunks (Chunk by file) >>> import awswrangler as wr - >>> dfs = wr.s3.read_parquet(path=['s3://bucket/filename0.csv', 's3://bucket/filename1.csv'], chunked=True) + >>> dfs = wr.s3.read_parquet(path=['s3://bucket/filename0.parquet', 's3://bucket/filename1.parquet'], chunked=True) >>> for df in dfs: >>> print(df) # Smaller Pandas DataFrame Reading in chunks (Chunk by 1MM rows) >>> import awswrangler as wr - >>> dfs = wr.s3.read_parquet(path=['s3://bucket/filename0.csv', 's3://bucket/filename1.csv'], chunked=1_000_000) + >>> dfs = wr.s3.read_parquet(path=['s3://bucket/filename0.parquet', 's3://bucket/filename1.parquet'], chunked=1_000_000) >>> for df in dfs: >>> print(df) # 1MM Pandas DataFrame @@ -849,7 +849,7 @@ def read_parquet_table( Suffix or List of suffixes to be read (e.g. [".gz.parquet", ".snappy.parquet"]). If None, will try to read all files. (default) filename_ignore_suffix: Union[str, List[str], None] - Suffix or List of suffixes for S3 keys to be ignored.(e.g. [".csv", "_SUCCESS"]). + Suffix or List of suffixes for S3 keys to be ignored.(e.g. [".parquet", "_SUCCESS"]). If None, will try to read all files. (default) catalog_id : str, optional The ID of the Data Catalog from which to retrieve Databases. From dc68bdf303bcd848338e29107287f10076c24594 Mon Sep 17 00:00:00 2001 From: Michel Hua Date: Thu, 10 Nov 2022 20:06:03 +0100 Subject: [PATCH 2/3] noqa: E501 for docstring --- awswrangler/s3/_read_parquet.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/awswrangler/s3/_read_parquet.py b/awswrangler/s3/_read_parquet.py index bac6c90ba..d41f8fb32 100644 --- a/awswrangler/s3/_read_parquet.py +++ b/awswrangler/s3/_read_parquet.py @@ -715,7 +715,7 @@ def read_parquet( Reading in chunks (Chunk by 1MM rows) >>> import awswrangler as wr - >>> dfs = wr.s3.read_parquet(path=['s3://bucket/filename0.parquet', 's3://bucket/filename1.parquet'], chunked=1_000_000) + >>> dfs = wr.s3.read_parquet(path=['s3://bucket/filename0.parquet', 's3://bucket/filename1.parquet'], chunked=1_000_000) # pylint: disable=line-too-long >>> for df in dfs: >>> print(df) # 1MM Pandas DataFrame @@ -725,7 +725,7 @@ def read_parquet( >>> my_filter = lambda x: True if x["city"].startswith("new") else False >>> df = wr.s3.read_parquet(path, dataset=True, partition_filter=my_filter) - """ + """ # noqa: E501 session: boto3.Session = _utils.ensure_session(session=boto3_session) paths: List[str] = _path2list( path=path, @@ -849,7 +849,7 @@ def read_parquet_table( Suffix or List of suffixes to be read (e.g. [".gz.parquet", ".snappy.parquet"]). If None, will try to read all files. (default) filename_ignore_suffix: Union[str, List[str], None] - Suffix or List of suffixes for S3 keys to be ignored.(e.g. [".parquet", "_SUCCESS"]). + Suffix or List of suffixes for S3 keys to be ignored.(e.g. [".csv", "_SUCCESS"]). If None, will try to read all files. (default) catalog_id : str, optional The ID of the Data Catalog from which to retrieve Databases. From 6b10c153a5a7d9d34fccfa5732c75ddb8e1dd0be Mon Sep 17 00:00:00 2001 From: Michel Hua Date: Thu, 10 Nov 2022 21:06:21 +0100 Subject: [PATCH 3/3] use python REPL style multiline --- awswrangler/s3/_read_parquet.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/awswrangler/s3/_read_parquet.py b/awswrangler/s3/_read_parquet.py index d41f8fb32..3cb918216 100644 --- a/awswrangler/s3/_read_parquet.py +++ b/awswrangler/s3/_read_parquet.py @@ -715,7 +715,10 @@ def read_parquet( Reading in chunks (Chunk by 1MM rows) >>> import awswrangler as wr - >>> dfs = wr.s3.read_parquet(path=['s3://bucket/filename0.parquet', 's3://bucket/filename1.parquet'], chunked=1_000_000) # pylint: disable=line-too-long + >>> dfs = wr.s3.read_parquet( + ... path=['s3://bucket/filename0.parquet', 's3://bucket/filename1.parquet'], + ... chunked=1_000_000 + ... ) >>> for df in dfs: >>> print(df) # 1MM Pandas DataFrame @@ -725,7 +728,7 @@ def read_parquet( >>> my_filter = lambda x: True if x["city"].startswith("new") else False >>> df = wr.s3.read_parquet(path, dataset=True, partition_filter=my_filter) - """ # noqa: E501 + """ session: boto3.Session = _utils.ensure_session(session=boto3_session) paths: List[str] = _path2list( path=path,