Skip to content

Commit

Permalink
Merge pull request #146 from SethMMorton/over-zealous-extension-split…
Browse files Browse the repository at this point in the history
…ting

Over zealous extension splitting
  • Loading branch information
SethMMorton authored Jan 30, 2022
2 parents 4f0b3a8 + 4832c15 commit 24d7a4c
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 18 deletions.
8 changes: 6 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,18 +1,22 @@
Unreleased
---

### Changed
- When using `ns.PATH`, only split off a maximum of two suffixes from
a file name (issues #145, #146).

[8.0.2] - 2021-12-14
---

### Fixed
- Bug where sorting paths fail if one of the paths is '.'.
- Bug where sorting paths fail if one of the paths is '.' (issues #142, #143)

[8.0.1] - 2021-12-10
---

### Fixed
- Compose unicode characters when using locale to ensure sorting is correct
across all locales.
across all locales (issues #140, #141)

[8.0.0] - 2021-11-03
---
Expand Down
29 changes: 17 additions & 12 deletions natsort/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -893,16 +893,21 @@ def path_splitter(
path_parts = []
base = str(s)

# Now, split off the file extensions until we reach a decimal number at
# the beginning of the suffix or there are no more extensions.
suffixes = PurePath(base).suffixes
try:
digit_index = next(i for i, x in enumerate(reversed(suffixes)) if _d_match(x))
except StopIteration:
pass
else:
digit_index = len(suffixes) - digit_index
suffixes = suffixes[digit_index:]

# Now, split off the file extensions until
# - we reach a decimal number at the beginning of the suffix
# - more than two suffixes have been seen
# - a suffix is more than five characters (including leading ".")
# - there are no more extensions
suffixes = []
for i, suffix in enumerate(reversed(PurePath(base).suffixes)):
if _d_match(suffix) or i > 1 or len(suffix) > 5:
break
suffixes.append(suffix)
suffixes.reverse()

# Remove the suffixes from the base component
base = base.replace("".join(suffixes), "")
return filter(None, ichain(path_parts, [base], suffixes))
base_component = [base] if base else []

# Join all path comonents in an iterator
return filter(None, ichain(path_parts, base_component, suffixes))
15 changes: 15 additions & 0 deletions tests/test_natsorted.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,21 @@ def test_natsorted_handles_numbers_and_filesystem_paths_simultaneously() -> None
assert natsorted(given, alg=ns.PATH) == expected


def test_natsorted_path_extensions_heuristic() -> None:
# https://github.com/SethMMorton/natsort/issues/145
given = [
"Try.Me.Bug - 09 - One.Two.Three.[text].mkv",
"Try.Me.Bug - 07 - One.Two.5.[text].mkv",
"Try.Me.Bug - 08 - One.Two.Three[text].mkv",
]
expected = [
"Try.Me.Bug - 07 - One.Two.5.[text].mkv",
"Try.Me.Bug - 08 - One.Two.Three[text].mkv",
"Try.Me.Bug - 09 - One.Two.Three.[text].mkv",
]
assert natsorted(given, alg=ns.PATH) == expected


@pytest.mark.parametrize(
"alg, expected",
[
Expand Down
25 changes: 21 additions & 4 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import string
from itertools import chain
from operator import neg as op_neg
from typing import List, Pattern, Union
from typing import List, Pattern, Tuple, Union

import pytest
from hypothesis import given
Expand Down Expand Up @@ -155,9 +155,26 @@ def test_path_splitter_splits_path_string_by_sep(x: List[str]) -> None:
assert tuple(utils.path_splitter(z)) == tuple(pathlib.Path(z).parts)


def test_path_splitter_splits_path_string_by_sep_and_removes_extension_example() -> None:
given = "/this/is/a/path/file.x1.10.tar.gz"
expected = (os.sep, "this", "is", "a", "path", "file.x1.10", ".tar", ".gz")
@pytest.mark.parametrize(
"given, expected",
[
(
"/this/is/a/path/file.x1.10.tar.gz",
(os.sep, "this", "is", "a", "path", "file.x1.10", ".tar", ".gz"),
),
(
"/this/is/a/path/file.x1.10.tar",
(os.sep, "this", "is", "a", "path", "file.x1.10", ".tar"),
),
(
"/this/is/a/path/file.x1.threethousand.tar",
(os.sep, "this", "is", "a", "path", "file.x1.threethousand", ".tar"),
),
],
)
def test_path_splitter_splits_path_string_by_sep_and_removes_extension_example(
given: str, expected: Tuple[str, ...]
) -> None:
assert tuple(utils.path_splitter(given)) == tuple(expected)


Expand Down

0 comments on commit 24d7a4c

Please sign in to comment.