Skip to content

Commit

Permalink
Add --humanize and --summarize options for s3 ls
Browse files Browse the repository at this point in the history
  • Loading branch information
chadhinton committed Jan 21, 2015
1 parent 4d91f36 commit efbecb7
Show file tree
Hide file tree
Showing 5 changed files with 125 additions and 8 deletions.
30 changes: 25 additions & 5 deletions awscli/customizations/s3/subcommands.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from awscli.customizations.s3.filters import create_filter
from awscli.customizations.s3.s3handler import S3Handler, S3StreamHandler
from awscli.customizations.s3.utils import find_bucket_key, uni_print, \
AppendFilter, find_dest_path_comp_key
AppendFilter, find_dest_path_comp_key, humanize
from awscli.customizations.s3.syncstrategy.base import MissingFileSync, \
SizeAndLastModifiedSync, NeverSync

Expand All @@ -38,6 +38,12 @@
"Command is performed on all files or objects "
"under the specified directory or prefix.")}

HUMANIZE = {'name': 'humanize', 'action': 'store_true', 'help_text': (
"Displays file sizes in human readable format.")}

SUMMARIZE = {'name': 'summarize', 'action': 'store_true', 'help_text': (
"Displays summary information (number of objects, total size).")}

DRYRUN = {'name': 'dryrun', 'action': 'store_true',
'help_text': (
"Displays the operations that would be performed using the "
Expand Down Expand Up @@ -242,13 +248,16 @@ class ListCommand(S3Command):
USAGE = "<S3Path> or NONE"
ARG_TABLE = [{'name': 'paths', 'nargs': '?', 'default': 's3://',
'positional_arg': True, 'synopsis': USAGE}, RECURSIVE,
PAGE_SIZE]
PAGE_SIZE, HUMANIZE, SUMMARIZE]
EXAMPLES = BasicCommand.FROM_FILE('s3/ls.rst')

def _run_main(self, parsed_args, parsed_globals):
super(ListCommand, self)._run_main(parsed_args, parsed_globals)
self._empty_result = False
self._at_first_page = True
self._size_accumulator = 0
self._total_objects = 0
self._humanize = parsed_args.humanize
path = parsed_args.paths
if path.startswith('s3://'):
path = path[5:]
Expand All @@ -261,6 +270,8 @@ def _run_main(self, parsed_args, parsed_globals):
parsed_args.page_size)
else:
self._list_all_objects(bucket, key, parsed_args.page_size)
if parsed_args.summarize:
self._print_summary()
if key:
# User specified a key to look for. We should return an rc of one
# if there are no matching keys and/or prefixes or return an rc
Expand All @@ -276,7 +287,6 @@ def _run_main(self, parsed_args, parsed_globals):
return 0

def _list_all_objects(self, bucket, key, page_size=None):

operation = self.service.get_operation('ListObjects')
iterator = operation.paginate(self.endpoint, bucket=bucket,
prefix=key, delimiter='/',
Expand All @@ -298,6 +308,8 @@ def _display_page(self, response_data, use_basename=True):
uni_print(print_str)
for content in contents:
last_mod_str = self._make_last_mod_str(content['LastModified'])
self._size_accumulator += int(content['Size'])
self._total_objects += 1
size_str = self._make_size_str(content['Size'])
if use_basename:
filename_components = content['Key'].split('/')
Expand Down Expand Up @@ -343,17 +355,25 @@ def _make_last_mod_str(self, last_mod):
str(last_mod.day).zfill(2),
str(last_mod.hour).zfill(2),
str(last_mod.minute).zfill(2),
str(last_mod.second).zfill(2))
str(last_mod.second).zfill(2))
last_mod_str = "%s-%s-%s %s:%s:%s" % last_mod_tup
return last_mod_str.ljust(19, ' ')

def _make_size_str(self, size):
"""
This function creates the size string when objects are being listed.
"""
size_str = str(size)
size_str = humanize(size) if self._humanize else str(size)
return size_str.rjust(10, ' ')

def _print_summary(self):
"""
This function prints a summary of total objects and total bytes
"""
print_str = str(self._total_objects)
uni_print("\nTotal Objects: ".rjust(15, ' ') + print_str + "\n")
print_str = humanize(self._size_accumulator) if self._humanize else str(self._size_accumulator)
uni_print("Total Size: ".rjust(15, ' ') + print_str + "\n")

class WebsiteCommand(S3Command):
NAME = 'website'
Expand Down
16 changes: 16 additions & 0 deletions awscli/customizations/s3/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,3 +456,19 @@ def __new__(cls, message, error=False, total_parts=None, warning=None):
class IOCloseRequest(_IOCloseRequest):
def __new__(cls, filename, desired_mtime=None):
return super(IOCloseRequest, cls).__new__(cls, filename, desired_mtime)



humanize_suffixes = ('kB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB')

def humanize(value):
format='%.1f'
base = 1000
bytes = float(value)

if bytes == 1: return '1 Byte'
elif bytes < base: return '%d Bytes' % bytes

for i,sfx in enumerate(humanize_suffixes):
unit = base ** (i+2)
if bytes < unit: return (format + ' %s') % ((base * bytes / unit), sfx)
21 changes: 21 additions & 0 deletions awscli/examples/s3/ls.rst
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,24 @@ Output::
2013-09-02 21:32:57 189 foo/bar/.baz/hooks/foo
2013-09-02 21:32:57 398 z.txt

The following ``ls`` command demonstrates the same command using the --humanize and --summarize options. --humanize
displays file size in Bytes/MB/KB/MB/GB/TB/PB/EB/ZB/YB. --summarize displays the total number of objects and total size
at the end of the result listing::

aws s3 ls s3://mybucket --recursive --humanize --summarize

Output::

2013-09-02 21:37:53 10 Bytes a.txt
2013-09-02 21:37:53 2.9 MB foo.zip
2013-09-02 21:32:57 23 Bytes foo/bar/.baz/a
2013-09-02 21:32:58 41 Bytes foo/bar/.baz/b
2013-09-02 21:32:57 281 Bytes foo/bar/.baz/c
2013-09-02 21:32:57 73 Bytes foo/bar/.baz/d
2013-09-02 21:32:57 452 Bytes foo/bar/.baz/e
2013-09-02 21:32:57 896 Bytes foo/bar/.baz/hooks/bar
2013-09-02 21:32:57 189 Bytes foo/bar/.baz/hooks/foo
2013-09-02 21:32:57 398 Bytes z.txt

Total Objects: 10
Total Size: 2.9 MB
57 changes: 57 additions & 0 deletions tests/unit/customizations/s3/test_ls_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,63 @@ def test_fail_rc_no_objects_nor_prefixes(self):
self.parsed_responses = [{}]
self.run_cmd('s3 ls s3://bucket/foo', expected_rc=1)

def test_humanize_file_size(self):
time_utc = "2014-01-09T20:45:49.000Z"
self.parsed_responses = [{"CommonPrefixes": [], "Contents": [
{"Key": "onebyte.txt", "Size": 1, "LastModified": time_utc},
{"Key": "onekilobyte.txt", "Size": 1000, "LastModified": time_utc},
{"Key": "onemegabyte.txt", "Size": 1000**2, "LastModified": time_utc},
{"Key": "onegigabyte.txt", "Size": 1000**3, "LastModified": time_utc},
{"Key": "oneterabyte.txt", "Size": 1000**4, "LastModified": time_utc},
{"Key": "onepetabyte.txt", "Size": 1000**5, "LastModified": time_utc} ]}]
stdout, _, _ = self.run_cmd('s3 ls s3://bucket/ --humanize', expected_rc=0)
call_args = self.operations_called[0][1]
# Time is stored in UTC timezone, but the actual time displayed
# is specific to your tzinfo, so shift the timezone to your local's.
time_local = parser.parse(time_utc).astimezone(tz.tzlocal())
time_fmt = time_local.strftime('%Y-%m-%d %H:%M:%S')
self.assertIn('%s 1 Byte onebyte.txt\n' % time_fmt, stdout)
self.assertIn('%s 1.0 kB onekilobyte.txt\n' % time_fmt, stdout)
self.assertIn('%s 1.0 MB onemegabyte.txt\n' % time_fmt, stdout)
self.assertIn('%s 1.0 GB onegigabyte.txt\n' % time_fmt, stdout)
self.assertIn('%s 1.0 TB oneterabyte.txt\n' % time_fmt, stdout)
self.assertIn('%s 1.0 PB onepetabyte.txt\n' % time_fmt, stdout)

def test_summarize(self):
time_utc = "2014-01-09T20:45:49.000Z"
self.parsed_responses = [{"CommonPrefixes": [], "Contents": [
{"Key": "onebyte.txt", "Size": 1, "LastModified": time_utc},
{"Key": "onekilobyte.txt", "Size": 1000, "LastModified": time_utc},
{"Key": "onemegabyte.txt", "Size": 1000**2, "LastModified": time_utc},
{"Key": "onegigabyte.txt", "Size": 1000**3, "LastModified": time_utc},
{"Key": "oneterabyte.txt", "Size": 1000**4, "LastModified": time_utc},
{"Key": "onepetabyte.txt", "Size": 1000**5, "LastModified": time_utc} ]}]
stdout, _, _ = self.run_cmd('s3 ls s3://bucket/ --summarize', expected_rc=0)
call_args = self.operations_called[0][1]
# Time is stored in UTC timezone, but the actual time displayed
# is specific to your tzinfo, so shift the timezone to your local's.
time_local = parser.parse(time_utc).astimezone(tz.tzlocal())
time_fmt = time_local.strftime('%Y-%m-%d %H:%M:%S')
self.assertIn('Total Objects: 6\n', stdout)
self.assertIn('Total Size: 1001001001001001\n', stdout)

def test_summarize_with_humanize(self):
time_utc = "2014-01-09T20:45:49.000Z"
self.parsed_responses = [{"CommonPrefixes": [], "Contents": [
{"Key": "onebyte.txt", "Size": 1, "LastModified": time_utc},
{"Key": "onekilobyte.txt", "Size": 1000, "LastModified": time_utc},
{"Key": "onemegabyte.txt", "Size": 1000**2, "LastModified": time_utc},
{"Key": "onegigabyte.txt", "Size": 1000**3, "LastModified": time_utc},
{"Key": "oneterabyte.txt", "Size": 1000**4, "LastModified": time_utc},
{"Key": "onepetabyte.txt", "Size": 1000**5, "LastModified": time_utc} ]}]
stdout, _, _ = self.run_cmd('s3 ls s3://bucket/ --humanize --summarize', expected_rc=0)
call_args = self.operations_called[0][1]
# Time is stored in UTC timezone, but the actual time displayed
# is specific to your tzinfo, so shift the timezone to your local's.
time_local = parser.parse(time_utc).astimezone(tz.tzlocal())
time_fmt = time_local.strftime('%Y-%m-%d %H:%M:%S')
self.assertIn('Total Objects: 6\n', stdout)
self.assertIn('Total Size: 1.0 PB\n', stdout)

if __name__ == "__main__":
unittest.main()
9 changes: 6 additions & 3 deletions tests/unit/customizations/s3/test_subcommands.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ def setUp(self):

def test_ls_command_for_bucket(self):
ls_command = ListCommand(self.session)
parsed_args = FakeArgs(paths='s3://mybucket/', dir_op=False, page_size='5')
parsed_args = FakeArgs(paths='s3://mybucket/', dir_op=False, page_size='5',
humanize=False, summarize=False)
parsed_globals = mock.Mock()
ls_command._run_main(parsed_args, parsed_globals)
call = self.session.get_service.return_value.get_operation\
Expand All @@ -78,7 +79,8 @@ def test_ls_command_for_bucket(self):
def test_ls_command_with_no_args(self):
ls_command = ListCommand(self.session)
parsed_global = FakeArgs(region=None, endpoint_url=None, verify_ssl=None)
parsed_args = FakeArgs(dir_op=False, paths='s3://')
parsed_args = FakeArgs(dir_op=False, paths='s3://', humanize=False,
summarize=False)
ls_command._run_main(parsed_args, parsed_global)
# We should only be a single call.
self.session.get_service.return_value.get_operation.assert_called_with(
Expand All @@ -98,7 +100,8 @@ def test_ls_with_verify_argument(self):
ls_command = ListCommand(self.session)
parsed_global = FakeArgs(region='us-west-2', endpoint_url=None,
verify_ssl=False)
parsed_args = FakeArgs(paths='s3://', dir_op=False)
parsed_args = FakeArgs(paths='s3://', dir_op=False, humanize=False,
summarize=False)
ls_command._run_main(parsed_args, parsed_global)
# Verify get_endpoint
get_endpoint = self.session.get_service.return_value.get_endpoint
Expand Down

0 comments on commit efbecb7

Please sign in to comment.