Skip to content

Commit

Permalink
PEP 639: Implement License-Expression and License-File (#828)
Browse files Browse the repository at this point in the history
Co-authored-by: Brett Cannon <[email protected]>
  • Loading branch information
ewdurbin and brettcannon authored Oct 24, 2024
1 parent 6c338a8 commit 029f415
Show file tree
Hide file tree
Showing 11 changed files with 1,270 additions and 7 deletions.
1 change: 1 addition & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ The ``packaging`` library uses calendar-based versioning (``YY.N``).
version
specifiers
markers
licenses
requirements
metadata
tags
Expand Down
52 changes: 52 additions & 0 deletions docs/licenses.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
Licenses
=========

.. currentmodule:: packaging.licenses


Helper for canonicalizing SPDX
`License-Expression metadata <https://peps.python.org/pep-0639/#term-license-expression>`__
as `defined in PEP 639 <https://peps.python.org/pep-0639/#spdx>`__.


Reference
---------

.. class:: NormalizedLicenseExpression

A :class:`typing.NewType` of :class:`str`, representing a normalized
License-Expression.


.. exception:: InvalidLicenseExpression

Raised when a License-Expression is invalid.


.. function:: canonicalize_license_expression(raw_license_expression)

This function takes a valid License-Expression, and returns the normalized form of it.

The return type is typed as :class:`NormalizedLicenseExpression`. This allows type
checkers to help require that a string has passed through this function
before use.

:param str raw_license_expression: The License-Expression to canonicalize.
:raises InvalidLicenseExpression: If the License-Expression is invalid due to an
invalid/unknown license identifier or invalid syntax.

.. doctest::

>>> from packaging.licenses import canonicalize_license_expression
>>> canonicalize_license_expression("mit")
'MIT'
>>> canonicalize_license_expression("mit and (apache-2.0 or bsd-2-clause)")
'MIT AND (Apache-2.0 OR BSD-2-Clause)'
>>> canonicalize_license_expression("(mit")
Traceback (most recent call last):
...
InvalidLicenseExpression: Invalid license expression: '(mit'
>>> canonicalize_license_expression("Use-it-after-midnight")
Traceback (most recent call last):
...
InvalidLicenseExpression: Unknown license: 'Use-it-after-midnight'
6 changes: 6 additions & 0 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,12 @@ def release(session):
webbrowser.open("https://github.com/pypa/packaging/releases")


@nox.session
def update_licenses(session: nox.Session) -> None:
session.install("httpx")
session.run("python", "tasks/licenses.py")


# -----------------------------------------------------------------------------
# Helpers
# -----------------------------------------------------------------------------
Expand Down
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,11 @@ warn_unused_ignores = true
module = ["_manylinux"]
ignore_missing_imports = true


[tool.ruff]
src = ["src"]
extend-exclude = [
"src/packaging/licenses/_spdx.py"
]

[tool.ruff.lint]
extend-select = [
Expand Down
145 changes: 145 additions & 0 deletions src/packaging/licenses/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
#######################################################################################
#
# Adapted from:
# https://github.com/pypa/hatch/blob/5352e44/backend/src/hatchling/licenses/parse.py
#
# MIT License
#
# Copyright (c) 2017-present Ofek Lev <[email protected]>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this
# software and associated documentation files (the "Software"), to deal in the Software
# without restriction, including without limitation the rights to use, copy, modify,
# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to the following
# conditions:
#
# The above copyright notice and this permission notice shall be included in all copies
# or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
# OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
#
# With additional allowance of arbitrary `LicenseRef-` identifiers, not just
# `LicenseRef-Public-Domain` and `LicenseRef-Proprietary`.
#
#######################################################################################
from __future__ import annotations

import re
from typing import NewType, cast

from packaging.licenses._spdx import EXCEPTIONS, LICENSES

__all__ = [
"NormalizedLicenseExpression",
"InvalidLicenseExpression",
"canonicalize_license_expression",
]

license_ref_allowed = re.compile("^[A-Za-z0-9.-]*$")

NormalizedLicenseExpression = NewType("NormalizedLicenseExpression", str)


class InvalidLicenseExpression(ValueError):
"""Raised when a license-expression string is invalid
>>> canonicalize_license_expression("invalid")
Traceback (most recent call last):
...
packaging.licenses.InvalidLicenseExpression: Invalid license expression: 'invalid'
"""


def canonicalize_license_expression(
raw_license_expression: str,
) -> NormalizedLicenseExpression:
if not raw_license_expression:
message = f"Invalid license expression: {raw_license_expression!r}"
raise InvalidLicenseExpression(message)

# Pad any parentheses so tokenization can be achieved by merely splitting on
# whitespace.
license_expression = raw_license_expression.replace("(", " ( ").replace(")", " ) ")
licenseref_prefix = "LicenseRef-"
license_refs = {
ref.lower(): "LicenseRef-" + ref[len(licenseref_prefix) :]
for ref in license_expression.split()
if ref.lower().startswith(licenseref_prefix.lower())
}

# Normalize to lower case so we can look up licenses/exceptions
# and so boolean operators are Python-compatible.
license_expression = license_expression.lower()

tokens = license_expression.split()

# Rather than implementing boolean logic, we create an expression that Python can
# parse. Everything that is not involved with the grammar itself is treated as
# `False` and the expression should evaluate as such.
python_tokens = []
for token in tokens:
if token not in {"or", "and", "with", "(", ")"}:
python_tokens.append("False")
elif token == "with":
python_tokens.append("or")
elif token == "(" and python_tokens and python_tokens[-1] not in {"or", "and"}:
message = f"Invalid license expression: {raw_license_expression!r}"
raise InvalidLicenseExpression(message)
else:
python_tokens.append(token)

python_expression = " ".join(python_tokens)
try:
invalid = eval(python_expression, globals(), locals())
except Exception:
invalid = True

if invalid is not False:
message = f"Invalid license expression: {raw_license_expression!r}"
raise InvalidLicenseExpression(message) from None

# Take a final pass to check for unknown licenses/exceptions.
normalized_tokens = []
for token in tokens:
if token in {"or", "and", "with", "(", ")"}:
normalized_tokens.append(token.upper())
continue

if normalized_tokens and normalized_tokens[-1] == "WITH":
if token not in EXCEPTIONS:
message = f"Unknown license exception: {token!r}"
raise InvalidLicenseExpression(message)

normalized_tokens.append(EXCEPTIONS[token]["id"])
else:
if token.endswith("+"):
final_token = token[:-1]
suffix = "+"
else:
final_token = token
suffix = ""

if final_token.startswith("licenseref-"):
if not license_ref_allowed.match(final_token):
message = f"Invalid licenseref: {final_token!r}"
raise InvalidLicenseExpression(message)
normalized_tokens.append(license_refs[final_token] + suffix)
else:
if final_token not in LICENSES:
message = f"Unknown license: {final_token!r}"
raise InvalidLicenseExpression(message)
normalized_tokens.append(LICENSES[final_token]["id"] + suffix)

normalized_expression = " ".join(normalized_tokens)

return cast(
NormalizedLicenseExpression,
normalized_expression.replace("( ", "(").replace(" )", ")"),
)
Loading

0 comments on commit 029f415

Please sign in to comment.