From 4e77427e1613077e2fd3162c10a6386520fdba4a Mon Sep 17 00:00:00 2001 From: Myhailo Chernyshov Date: Sat, 14 Sep 2024 02:04:42 +0300 Subject: [PATCH 1/9] feat: Source specifying for relative links is allowed --- README.rst | 6 ++++++ src/cc2olx/cli.py | 6 ++++++ src/cc2olx/constants.py | 2 ++ src/cc2olx/main.py | 23 +++++++++++++++++++---- src/cc2olx/models.py | 5 +++-- src/cc2olx/olx.py | 18 +++++++++++++++++- src/cc2olx/settings.py | 1 + 7 files changed, 54 insertions(+), 7 deletions(-) diff --git a/README.rst b/README.rst index def057c0..de8201c3 100644 --- a/README.rst +++ b/README.rst @@ -58,6 +58,12 @@ The link map file can be supplied using `-f` or `--link_file`:: cc2olx -r zip -i -f +If the original course content contains relative links and the resources +(images, documents etc) the links point to are not included into the exported +course dump, you can specify their source using `-s` flag: + + cc2olx -i -s + Dockerization ------------- diff --git a/src/cc2olx/cli.py b/src/cc2olx/cli.py index fb804f28..34d21fcd 100644 --- a/src/cc2olx/cli.py +++ b/src/cc2olx/cli.py @@ -69,4 +69,10 @@ def parse_args(args=None): "should contain consumer_id, consumer_key and consumer_secret." ), ) + parser.add_argument( + "-s", + "--relative_links_source", + nargs="?", + help="The relative links source in the format '://', e.g. 'https://example.com'", + ) return parser.parse_args(args) diff --git a/src/cc2olx/constants.py b/src/cc2olx/constants.py index 1b956935..c96e8937 100644 --- a/src/cc2olx/constants.py +++ b/src/cc2olx/constants.py @@ -1 +1,3 @@ CDATA_PATTERN = r".*?)\]\]>" +OLX_STATIC_DIR = "static" +OLX_STATIC_PATH_TEMPLATE = f"/{OLX_STATIC_DIR}/{{static_filename}}" diff --git a/src/cc2olx/main.py b/src/cc2olx/main.py index 524a7ab3..8eeda3bb 100644 --- a/src/cc2olx/main.py +++ b/src/cc2olx/main.py @@ -8,18 +8,25 @@ from cc2olx import filesystem from cc2olx import olx from cc2olx.cli import parse_args, RESULT_TYPE_FOLDER, RESULT_TYPE_ZIP -from cc2olx.models import Cartridge, OLX_STATIC_DIR +from cc2olx.constants import OLX_STATIC_DIR +from cc2olx.models import Cartridge from cc2olx.settings import collect_settings -def convert_one_file(input_file, workspace, link_file=None, passport_file=None): +def convert_one_file( + input_file, + workspace, + link_file=None, + passport_file=None, + relative_links_source=None, +): filesystem.create_directory(workspace) cartridge = Cartridge(input_file, workspace) cartridge.load_manifest_extracted() cartridge.normalize() - olx_export = olx.OlxExport(cartridge, link_file, passport_file) + olx_export = olx.OlxExport(cartridge, link_file, passport_file, relative_links_source) olx_filename = cartridge.directory.parent / (cartridge.directory.name + "-course.xml") policy_filename = cartridge.directory.parent / "policy.json" @@ -53,6 +60,7 @@ def main(): workspace = settings["workspace"] link_file = settings["link_file"] passport_file = settings["passport_file"] + relative_links_source = settings["relative_links_source"] # setup logger logging_config = settings["logging_config"] @@ -64,7 +72,14 @@ def main(): for input_file in settings["input_files"]: try: - convert_one_file(input_file, temp_workspace, link_file, passport_file) + convert_one_file( + input_file, + temp_workspace, + link_file, + passport_file, + relative_links_source, + ) + except Exception: logger.exception("Error while converting %s file", input_file) diff --git a/src/cc2olx/models.py b/src/cc2olx/models.py index c8510d0a..c89a6bbf 100644 --- a/src/cc2olx/models.py +++ b/src/cc2olx/models.py @@ -6,6 +6,7 @@ import zipfile from cc2olx import filesystem +from cc2olx.constants import OLX_STATIC_PATH_TEMPLATE from cc2olx.external.canvas import ModuleMeta from cc2olx.qti import QtiParser from cc2olx.utils import clean_file_name @@ -343,7 +344,7 @@ def get_resource_content(self, identifier): return "html", {"html": html} elif "web_resources" in str(res_filename) and imghdr.what(str(res_filename)): static_filename = str(res_filename).split("web_resources/")[1] - olx_static_path = "/{}/{}".format(OLX_STATIC_DIR, static_filename) + olx_static_path = OLX_STATIC_PATH_TEMPLATE.format(static_filename=static_filename) html = ( '' '

{}

'.format(olx_static_path, static_filename) @@ -353,7 +354,7 @@ def get_resource_content(self, identifier): # This webcontent is outside of ``web_resources`` directory # So we need to manually copy it to OLX_STATIC_DIR self.extra_static_files.append(res_relative_path) - olx_static_path = "/{}/{}".format(OLX_STATIC_DIR, res_relative_path) + olx_static_path = OLX_STATIC_PATH_TEMPLATE.format(static_filename=res_relative_path) html = ( '' '

{}

'.format( diff --git a/src/cc2olx/olx.py b/src/cc2olx/olx.py index f447a0f1..585bec3f 100644 --- a/src/cc2olx/olx.py +++ b/src/cc2olx/olx.py @@ -7,6 +7,7 @@ from lxml import html from cc2olx.iframe_link_parser import KalturaIframeLinkParser +from cc2olx.constants import OLX_STATIC_PATH_TEMPLATE from cc2olx.qti import QtiExport from cc2olx.utils import clean_from_cdata, element_builder, passport_file_parser @@ -36,11 +37,12 @@ class OlxExport: QTI = "qti" DISCUSSION = "discussion" - def __init__(self, cartridge, link_file=None, passport_file=None): + def __init__(self, cartridge, link_file=None, passport_file=None, relative_links_source=None): self.cartridge = cartridge self.doc = None self.link_file = link_file self.passport_file = passport_file + self.relative_links_source = relative_links_source self.iframe_link_parser = None if link_file: self.iframe_link_parser = KalturaIframeLinkParser(self.link_file) @@ -250,6 +252,18 @@ def process_external_tools_link(item, html): html = html.replace(item, external_tool_url) return html + def process_extra_static_files(item, html): + if self.relative_links_source is None: + return html + + for static_file in self.cartridge.extra_static_files: + if item == OLX_STATIC_PATH_TEMPLATE.format(static_filename=static_file): + return html + + url = urllib.parse.urljoin(self.relative_links_source, item) + html = html.replace(item, url) + return html + for _, item in items: if "IMS-CC-FILEBASE" in item: html = process_ims_cc_filebase(item, html) @@ -259,6 +273,8 @@ def process_external_tools_link(item, html): html = process_external_tools_link(item, html) elif "CANVAS_OBJECT_REFERENCE" in item: html = process_canvas_reference(item, html) + else: + html = process_extra_static_files(item, html) return html diff --git a/src/cc2olx/settings.py b/src/cc2olx/settings.py index 6435581b..5055a01c 100644 --- a/src/cc2olx/settings.py +++ b/src/cc2olx/settings.py @@ -47,5 +47,6 @@ def collect_settings(parsed_args): "workspace": Path.cwd() / parsed_args.output, "link_file": parsed_args.link_file, "passport_file": parsed_args.passport_file, + "relative_links_source": parsed_args.relative_links_source, } return settings From 03fde256da57240590a17043cff98b0f8d14db60 Mon Sep 17 00:00:00 2001 From: Myhailo Chernyshov Date: Wed, 4 Dec 2024 21:53:22 +0200 Subject: [PATCH 2/9] feat: `relative_links_source` CLI argument is validated --- src/cc2olx/cli.py | 3 ++ src/cc2olx/models.py | 16 -------- src/cc2olx/olx.py | 9 +++++ src/cc2olx/utils.py | 16 +++++++- src/cc2olx/validators/__init__.py | 0 src/cc2olx/validators/cli.py | 64 +++++++++++++++++++++++++++++++ 6 files changed, 90 insertions(+), 18 deletions(-) create mode 100644 src/cc2olx/validators/__init__.py create mode 100644 src/cc2olx/validators/cli.py diff --git a/src/cc2olx/cli.py b/src/cc2olx/cli.py index 34d21fcd..2e18a4e5 100644 --- a/src/cc2olx/cli.py +++ b/src/cc2olx/cli.py @@ -2,6 +2,8 @@ from pathlib import Path +from cc2olx.validators.cli import LinkSourceValidator + RESULT_TYPE_FOLDER = "folder" RESULT_TYPE_ZIP = "zip" @@ -73,6 +75,7 @@ def parse_args(args=None): "-s", "--relative_links_source", nargs="?", + type=LinkSourceValidator(), help="The relative links source in the format '://', e.g. 'https://example.com'", ) return parser.parse_args(args) diff --git a/src/cc2olx/models.py b/src/cc2olx/models.py index c89a6bbf..d134642f 100644 --- a/src/cc2olx/models.py +++ b/src/cc2olx/models.py @@ -25,22 +25,6 @@ DIFFUSE_SHALLOW_SECTIONS = False DIFFUSE_SHALLOW_SUBSECTIONS = True -OLX_STATIC_DIR = "static" - -OLX_DIRECTORIES = [ - "about", - "assets", - "chapter", - "course", - "html", - "info", - "policies", - "problem", - "sequential", - OLX_STATIC_DIR, - "vertical", -] - def is_leaf(container): return "identifierref" in container diff --git a/src/cc2olx/olx.py b/src/cc2olx/olx.py index 585bec3f..12c4a6ab 100644 --- a/src/cc2olx/olx.py +++ b/src/cc2olx/olx.py @@ -253,6 +253,15 @@ def process_external_tools_link(item, html): return html def process_extra_static_files(item, html): + """ + Turn static file URLs outside OLX_STATIC_DIR into absolute URLs. + + Allow to avoid a situation when the original course page links have + relative URLs, such URLs weren't included into the exported Common + Cartridge course file that causes broken URLs in the imported OeX + course. The function adds the origin source to URLs to make them + absolute ones. + """ if self.relative_links_source is None: return html diff --git a/src/cc2olx/utils.py b/src/cc2olx/utils.py index 40cf8c58..d3b54e73 100644 --- a/src/cc2olx/utils.py +++ b/src/cc2olx/utils.py @@ -1,9 +1,10 @@ """ Utility functions for cc2olx""" -import logging -import string import csv +import ipaddress +import logging import re +import string from cc2olx.constants import CDATA_PATTERN @@ -123,3 +124,14 @@ def clean_from_cdata(xml_string: str) -> str: str: cleaned XML string. """ return re.sub(CDATA_PATTERN, r"\g", xml_string, flags=re.DOTALL) + + +def is_valid_ipv6_address(value: str) -> bool: + """ + Return whether the value is a valid IPv6 address. + """ + try: + ipaddress.IPv6Address(value) + except ValueError: + return False + return True diff --git a/src/cc2olx/validators/__init__.py b/src/cc2olx/validators/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/cc2olx/validators/cli.py b/src/cc2olx/validators/cli.py new file mode 100644 index 00000000..26d079c2 --- /dev/null +++ b/src/cc2olx/validators/cli.py @@ -0,0 +1,64 @@ +import argparse +import re + +from cc2olx.utils import is_valid_ipv6_address + + +class LinkSourceValidator: + """ + Validate a link source. + """ + + UL = "\u00a1-\uffff" # Unicode letters range (must not be a raw string). + + # IP patterns + IPV4_REGEX = ( + r"(?:0|25[0-5]|2[0-4][0-9]|1[0-9]?[0-9]?|[1-9][0-9]?)" + r"(?:\.(?:0|25[0-5]|2[0-4][0-9]|1[0-9]?[0-9]?|[1-9][0-9]?)){3}" + ) + IPV6_REGEX = r"\[[0-9a-f:.]+\]" # (simple regex, validated later) + + # Host patterns + HOSTNAME_REGEX = ( + rf"[a-z{UL}0-9](?:[a-z{UL}0-9-]{{0,61}}[a-z{UL}0-9])?" + ) + # Max length for domain name labels is 63 characters per RFC 1034 sec. 3.1 + DOMAIN_REGEX = rf"(?:\.(?!-)[a-z{UL}0-9-]{{1,63}}(?{IPV4_REGEX}|{IPV6_REGEX}|{HOST_REGEX})" + r"(?::[0-9]{1,5})?" # port + r"/?" # trailing slash + r"\Z" + ) + + message = "Enter a valid URL." + + def __call__(self, value: str) -> str: + if not (link_source_match := re.match(self.LINK_SOURCE_REGEX, value, re.IGNORECASE)): + raise argparse.ArgumentTypeError(self.message) + + self._validate_ipv6_address(link_source_match.group("netloc")) + + return value + + def _validate_ipv6_address(self, netloc: str) -> None: + """ + Check netloc correctness if it's an IPv6 address. + """ + potential_ipv6_regex = r"^\[(.+)\](?::[0-9]{1,5})?$" + if netloc_match := re.search(potential_ipv6_regex, netloc): + potential_ip = netloc_match[1] + if is_valid_ipv6_address(potential_ip): + raise argparse.ArgumentTypeError(self.message) From 1e0ff005595128a284931bb8480f9424707eadfb Mon Sep 17 00:00:00 2001 From: Myhailo Chernyshov Date: Wed, 4 Dec 2024 21:54:34 +0200 Subject: [PATCH 3/9] test: Existed tests are fixed --- tests/test_cli.py | 10 +++++++++- tests/test_settings.py | 1 + 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index c555e27f..3dd35c5d 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -16,7 +16,13 @@ def test_parse_args(imscc_file): ) assert parsed_args == Namespace( - inputs=[imscc_file], loglevel="INFO", result="folder", link_file=None, passport_file=None, output="output" + inputs=[imscc_file], + loglevel="INFO", + result="folder", + link_file=None, + passport_file=None, + output="output", + relative_links_source=None, ) @@ -34,6 +40,7 @@ def test_parse_args_csv_file(imscc_file, link_map_csv): link_file=link_map_csv, passport_file=None, output="output", + relative_links_source=None, ) @@ -49,4 +56,5 @@ def test_parse_args_passport_file(imscc_file, passports_csv): link_file=None, passport_file=passports_csv, output="output", + relative_links_source=None, ) diff --git a/tests/test_settings.py b/tests/test_settings.py index c1d6058f..468a7b1d 100644 --- a/tests/test_settings.py +++ b/tests/test_settings.py @@ -19,4 +19,5 @@ def test_collect_settings(imscc_file): "level": parsed_args.loglevel, "format": "{%(filename)s:%(lineno)d} - %(message)s", }, + "relative_links_source": None, } From 290c6510839d7530bd11390f2c1f152798b87ad1 Mon Sep 17 00:00:00 2001 From: Myhailo Chernyshov Date: Wed, 4 Dec 2024 22:17:12 +0200 Subject: [PATCH 4/9] fix: IPv6 validation is fixed --- src/cc2olx/validators/cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cc2olx/validators/cli.py b/src/cc2olx/validators/cli.py index 26d079c2..ee8482f6 100644 --- a/src/cc2olx/validators/cli.py +++ b/src/cc2olx/validators/cli.py @@ -46,7 +46,7 @@ class LinkSourceValidator: message = "Enter a valid URL." def __call__(self, value: str) -> str: - if not (link_source_match := re.match(self.LINK_SOURCE_REGEX, value, re.IGNORECASE)): + if not (link_source_match := re.fullmatch(self.LINK_SOURCE_REGEX, value, re.IGNORECASE)): raise argparse.ArgumentTypeError(self.message) self._validate_ipv6_address(link_source_match.group("netloc")) @@ -60,5 +60,5 @@ def _validate_ipv6_address(self, netloc: str) -> None: potential_ipv6_regex = r"^\[(.+)\](?::[0-9]{1,5})?$" if netloc_match := re.search(potential_ipv6_regex, netloc): potential_ip = netloc_match[1] - if is_valid_ipv6_address(potential_ip): + if not is_valid_ipv6_address(potential_ip): raise argparse.ArgumentTypeError(self.message) From 26468d0dc3354e8541b81cb7d4c85dc7b006c2f9 Mon Sep 17 00:00:00 2001 From: Myhailo Chernyshov Date: Wed, 4 Dec 2024 22:47:04 +0200 Subject: [PATCH 5/9] test: Relative links source validation is tested --- tests/test_cli.py | 32 ++++++++++++++++++++ tests/test_validators/__init__.py | 0 tests/test_validators/test_cli.py | 49 +++++++++++++++++++++++++++++++ 3 files changed, 81 insertions(+) create mode 100644 tests/test_validators/__init__.py create mode 100644 tests/test_validators/test_cli.py diff --git a/tests/test_cli.py b/tests/test_cli.py index 3dd35c5d..5cf6a2a4 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,4 +1,7 @@ from argparse import Namespace +from pathlib import Path + +import pytest from cc2olx.cli import parse_args @@ -58,3 +61,32 @@ def test_parse_args_passport_file(imscc_file, passports_csv): output="output", relative_links_source=None, ) + + +def test_parse_args_with_correct_relative_links_source(imscc_file: Path) -> None: + """ + Positive input test for relative links source argument. + """ + relative_links_source = "https://example.com" + + parsed_args = parse_args(["-i", str(imscc_file), "-s", relative_links_source]) + + assert parsed_args == Namespace( + inputs=[imscc_file], + loglevel="INFO", + result="folder", + link_file=None, + passport_file=None, + output="output", + relative_links_source=relative_links_source, + ) + + +def test_parse_args_with_incorrect_relative_links_source(imscc_file: Path) -> None: + """ + Test arguments parser detects incorrect relative links sources. + """ + relative_links_source = "ws://example.com", + + with pytest.raises(SystemExit): + parse_args(["-i", str(imscc_file), "-s", relative_links_source]) diff --git a/tests/test_validators/__init__.py b/tests/test_validators/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_validators/test_cli.py b/tests/test_validators/test_cli.py new file mode 100644 index 00000000..b75a0f52 --- /dev/null +++ b/tests/test_validators/test_cli.py @@ -0,0 +1,49 @@ +import argparse + +import pytest + +from cc2olx.validators.cli import LinkSourceValidator + + +class TestLinkSourceValidator: + """ + Test link source validator. + """ + + @pytest.mark.parametrize( + "links_source", + ( + "http://example.com", + "http://example.com/", + "https://example.com", + "http://192.168.0.1", + "http://192.168.0.1:8000/", + "https://[2001:0db8:85a3::8a2e:0370:7334]/", + ) + ) + def test_original_value_is_returned_if_it_is_valid(self, links_source: str) -> None: + """ + Test whether the validator returns original value it is valid. + """ + assert LinkSourceValidator()(links_source) == links_source + + @pytest.mark.parametrize( + "links_source", + ( + "ftp://example.com", + "ws://example.com", + "just_string", + "http://192.168.0.1.9", + "http://192.168.0.1:-56", + "http://192.999.0.1", + "https://m192.168.0.1", + "https://2001:0db8:85a3::8a2e:0370:7334/", + "https://[2001:db8:85a3::8a2e:0370:7334::]/", + ) + ) + def test_wrong_values_are_detected(self, links_source) -> None: + """ + Test whether the validator raises an error if the value is invalid. + """ + with pytest.raises(argparse.ArgumentTypeError, match="Enter a valid URL."): + LinkSourceValidator()(links_source) From f98977e581bd4967aa1b6c209792cfe8211e7ec1 Mon Sep 17 00:00:00 2001 From: Myhailo Chernyshov Date: Thu, 5 Dec 2024 18:36:34 +0200 Subject: [PATCH 6/9] style: Code style is improved --- src/cc2olx/validators/cli.py | 4 +--- tests/test_cli.py | 2 +- tests/test_validators/test_cli.py | 4 ++-- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/cc2olx/validators/cli.py b/src/cc2olx/validators/cli.py index ee8482f6..bfa1e232 100644 --- a/src/cc2olx/validators/cli.py +++ b/src/cc2olx/validators/cli.py @@ -19,9 +19,7 @@ class LinkSourceValidator: IPV6_REGEX = r"\[[0-9a-f:.]+\]" # (simple regex, validated later) # Host patterns - HOSTNAME_REGEX = ( - rf"[a-z{UL}0-9](?:[a-z{UL}0-9-]{{0,61}}[a-z{UL}0-9])?" - ) + HOSTNAME_REGEX = rf"[a-z{UL}0-9](?:[a-z{UL}0-9-]{{0,61}}[a-z{UL}0-9])?" # Max length for domain name labels is 63 characters per RFC 1034 sec. 3.1 DOMAIN_REGEX = rf"(?:\.(?!-)[a-z{UL}0-9-]{{1,63}}(? No """ Test arguments parser detects incorrect relative links sources. """ - relative_links_source = "ws://example.com", + relative_links_source = "ws://example.com" with pytest.raises(SystemExit): parse_args(["-i", str(imscc_file), "-s", relative_links_source]) diff --git a/tests/test_validators/test_cli.py b/tests/test_validators/test_cli.py index b75a0f52..5d11159d 100644 --- a/tests/test_validators/test_cli.py +++ b/tests/test_validators/test_cli.py @@ -19,7 +19,7 @@ class TestLinkSourceValidator: "http://192.168.0.1", "http://192.168.0.1:8000/", "https://[2001:0db8:85a3::8a2e:0370:7334]/", - ) + ), ) def test_original_value_is_returned_if_it_is_valid(self, links_source: str) -> None: """ @@ -39,7 +39,7 @@ def test_original_value_is_returned_if_it_is_valid(self, links_source: str) -> N "https://m192.168.0.1", "https://2001:0db8:85a3::8a2e:0370:7334/", "https://[2001:db8:85a3::8a2e:0370:7334::]/", - ) + ), ) def test_wrong_values_are_detected(self, links_source) -> None: """ From dbf34b3cbb0ee78a60b6feb4ab93e61be39cf8da Mon Sep 17 00:00:00 2001 From: Myhailo Chernyshov Date: Thu, 19 Dec 2024 07:58:39 +0200 Subject: [PATCH 7/9] fix: All static files are considered during relative external links processing --- src/cc2olx/dataclasses.py | 18 ++++++++++++++++++ src/cc2olx/main.py | 4 ++-- src/cc2olx/models.py | 9 +++++---- src/cc2olx/olx.py | 11 +++-------- 4 files changed, 28 insertions(+), 14 deletions(-) create mode 100644 src/cc2olx/dataclasses.py diff --git a/src/cc2olx/dataclasses.py b/src/cc2olx/dataclasses.py new file mode 100644 index 00000000..c53870f7 --- /dev/null +++ b/src/cc2olx/dataclasses.py @@ -0,0 +1,18 @@ +from dataclasses import dataclass, field +from collections import ChainMap +from typing import Dict + + +@dataclass +class OlxToOriginalStaticFilePaths: + """ + Provide OLX static file to Common cartridge static file mappings. + """ + + # Static files from `web_resources` directory + web_resources: Dict[str, str] = field(default_factory=dict) + # Static files that are outside of `web_resources` directory, but still required + extra: Dict[str, str] = field(default_factory=dict) + + def __post_init__(self) -> None: + self.all = ChainMap(self.extra, self.web_resources) diff --git a/src/cc2olx/main.py b/src/cc2olx/main.py index 8eeda3bb..cfe02104 100644 --- a/src/cc2olx/main.py +++ b/src/cc2olx/main.py @@ -46,8 +46,8 @@ def convert_one_file( # Add static files that are outside of web_resources directory file_list += [ - (str(cartridge.directory / filepath), "/static/{}".format(filepath)) - for filepath in cartridge.extra_static_files + (str(cartridge.directory / original_filepath), olx_static_path) + for olx_static_path, original_filepath in cartridge.olx_to_original_static_file_paths.extra.items() ] filesystem.add_in_tar_gz(str(tgz_filename), file_list) diff --git a/src/cc2olx/models.py b/src/cc2olx/models.py index d134642f..02dbed9b 100644 --- a/src/cc2olx/models.py +++ b/src/cc2olx/models.py @@ -7,6 +7,7 @@ from cc2olx import filesystem from cc2olx.constants import OLX_STATIC_PATH_TEMPLATE +from cc2olx.dataclasses import OlxToOriginalStaticFilePaths from cc2olx.external.canvas import ModuleMeta from cc2olx.qti import QtiParser from cc2olx.utils import clean_file_name @@ -70,8 +71,7 @@ def __init__(self, cartridge_file, workspace): self.is_canvas_flavor = False self.module_meta = {} - # List of static files that are outside of `web_resources` directory, but still required - self.extra_static_files = [] + self.olx_to_original_static_file_paths = OlxToOriginalStaticFilePaths() self.workspace = workspace @@ -329,16 +329,17 @@ def get_resource_content(self, identifier): elif "web_resources" in str(res_filename) and imghdr.what(str(res_filename)): static_filename = str(res_filename).split("web_resources/")[1] olx_static_path = OLX_STATIC_PATH_TEMPLATE.format(static_filename=static_filename) + self.olx_to_original_static_file_paths.web_resources[olx_static_path] = static_filename html = ( '' '

{}

'.format(olx_static_path, static_filename) ) return "html", {"html": html} elif "web_resources" not in str(res_filename): + olx_static_path = OLX_STATIC_PATH_TEMPLATE.format(static_filename=res_relative_path) # This webcontent is outside of ``web_resources`` directory # So we need to manually copy it to OLX_STATIC_DIR - self.extra_static_files.append(res_relative_path) - olx_static_path = OLX_STATIC_PATH_TEMPLATE.format(static_filename=res_relative_path) + self.olx_to_original_static_file_paths.extra[olx_static_path] = res_relative_path html = ( '' '

{}

'.format( diff --git a/src/cc2olx/olx.py b/src/cc2olx/olx.py index 12c4a6ab..8cb285ee 100644 --- a/src/cc2olx/olx.py +++ b/src/cc2olx/olx.py @@ -7,7 +7,6 @@ from lxml import html from cc2olx.iframe_link_parser import KalturaIframeLinkParser -from cc2olx.constants import OLX_STATIC_PATH_TEMPLATE from cc2olx.qti import QtiExport from cc2olx.utils import clean_from_cdata, element_builder, passport_file_parser @@ -252,7 +251,7 @@ def process_external_tools_link(item, html): html = html.replace(item, external_tool_url) return html - def process_extra_static_files(item, html): + def process_relative_external_links(item, html): """ Turn static file URLs outside OLX_STATIC_DIR into absolute URLs. @@ -262,13 +261,9 @@ def process_extra_static_files(item, html): course. The function adds the origin source to URLs to make them absolute ones. """ - if self.relative_links_source is None: + if self.relative_links_source is None or item in self.cartridge.olx_to_original_static_file_paths.all: return html - for static_file in self.cartridge.extra_static_files: - if item == OLX_STATIC_PATH_TEMPLATE.format(static_filename=static_file): - return html - url = urllib.parse.urljoin(self.relative_links_source, item) html = html.replace(item, url) return html @@ -283,7 +278,7 @@ def process_extra_static_files(item, html): elif "CANVAS_OBJECT_REFERENCE" in item: html = process_canvas_reference(item, html) else: - html = process_extra_static_files(item, html) + html = process_relative_external_links(item, html) return html From bb55b48a41999ff7dbd26e0f24edfb4b05e340e9 Mon Sep 17 00:00:00 2001 From: Myhailo Chernyshov Date: Mon, 13 Jan 2025 23:01:35 +0200 Subject: [PATCH 8/9] feat: Django library is set up --- pytest.ini | 1 + requirements/base.in | 1 + requirements/base.txt | 22 ++- requirements/ci.txt | 120 +++++++------ requirements/common_constraints.txt | 17 +- requirements/dev.txt | 255 +++++++++++++++------------- requirements/pip-tools.txt | 17 +- requirements/pip.txt | 6 +- requirements/quality.txt | 91 ++++++---- requirements/test.txt | 57 ++++--- src/cc2olx/django_settings.py | 0 src/cc2olx/main.py | 14 +- 12 files changed, 348 insertions(+), 253 deletions(-) create mode 100644 src/cc2olx/django_settings.py diff --git a/pytest.ini b/pytest.ini index 9c702950..11c3a49d 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,2 +1,3 @@ [pytest] usefixtures = chdir_to_workspace +DJANGO_SETTINGS_MODULE = cc2olx.django_settings diff --git a/requirements/base.in b/requirements/base.in index 40babb63..775e0bfe 100644 --- a/requirements/base.in +++ b/requirements/base.in @@ -1,5 +1,6 @@ # Core requirements for this package +Django lxml requests youtube-dl diff --git a/requirements/base.txt b/requirements/base.txt index 03b675a6..b8ebc4a2 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -4,17 +4,27 @@ # # make upgrade # -certifi==2024.2.2 +asgiref==3.8.1 + # via django +backports-zoneinfo==0.2.1 + # via django +certifi==2024.12.14 # via requests -charset-normalizer==3.3.2 +charset-normalizer==3.4.1 # via requests -idna==3.6 +django==4.2.17 + # via -r requirements/base.in +idna==3.10 # via requests -lxml==5.1.0 +lxml==5.3.0 # via -r requirements/base.in -requests==2.31.0 +requests==2.32.3 # via -r requirements/base.in -urllib3==2.2.1 +sqlparse==0.5.3 + # via django +typing-extensions==4.12.2 + # via asgiref +urllib3==2.2.3 # via requests youtube-dl==2021.12.17 # via -r requirements/base.in diff --git a/requirements/ci.txt b/requirements/ci.txt index d5ac826a..1d623dbb 100644 --- a/requirements/ci.txt +++ b/requirements/ci.txt @@ -4,125 +4,141 @@ # # make upgrade # -black==24.3.0 - # via -r requirements/quality.txt -cachetools==5.3.3 +asgiref==3.8.1 + # via + # -r /home/misha/work/cc2olx/requirements/quality.txt + # django +backports-zoneinfo==0.2.1 + # via + # -r /home/misha/work/cc2olx/requirements/quality.txt + # django +black==24.8.0 + # via -r /home/misha/work/cc2olx/requirements/quality.txt +cachetools==5.5.0 # via tox -certifi==2024.2.2 +certifi==2024.12.14 # via - # -r requirements/quality.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt # requests chardet==5.2.0 # via tox -charset-normalizer==3.3.2 +charset-normalizer==3.4.1 # via - # -r requirements/quality.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt # requests -click==8.1.7 +click==8.1.8 # via - # -r requirements/quality.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt # black colorama==0.4.6 # via tox -coverage[toml]==7.4.4 +coverage[toml]==7.6.1 # via + # -r /home/misha/work/cc2olx/requirements/quality.txt # -r requirements/ci.in - # -r requirements/quality.txt # pytest-cov -distlib==0.3.8 +distlib==0.3.9 # via virtualenv -exceptiongroup==1.2.0 +django==4.2.17 + # via -r /home/misha/work/cc2olx/requirements/quality.txt +exceptiongroup==1.2.2 # via - # -r requirements/quality.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt # pytest -filelock==3.13.3 +filelock==3.16.1 # via # tox # virtualenv -flake8==7.0.0 - # via -r requirements/quality.txt -idna==3.6 +flake8==7.1.1 + # via -r /home/misha/work/cc2olx/requirements/quality.txt +idna==3.10 # via - # -r requirements/quality.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt # requests iniconfig==2.0.0 # via - # -r requirements/quality.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt # pytest -lxml==5.1.0 - # via -r requirements/quality.txt +lxml==5.3.0 + # via -r /home/misha/work/cc2olx/requirements/quality.txt mccabe==0.7.0 # via - # -r requirements/quality.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt # flake8 mypy-extensions==1.0.0 # via - # -r requirements/quality.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt # black -packaging==24.0 +packaging==24.2 # via - # -r requirements/quality.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt # black # pyproject-api # pytest # tox pathspec==0.12.1 # via - # -r requirements/quality.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt # black -platformdirs==4.2.0 +platformdirs==4.3.6 # via - # -r requirements/quality.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt # black # tox # virtualenv -pluggy==1.4.0 +pluggy==1.5.0 # via - # -r requirements/quality.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt # pytest # tox -pycodestyle==2.11.1 +pycodestyle==2.12.1 # via - # -r requirements/quality.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt # flake8 pyflakes==3.2.0 # via - # -r requirements/quality.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt # flake8 -pyproject-api==1.6.1 +pyproject-api==1.8.0 # via tox -pytest==8.1.1 +pytest==8.3.4 # via - # -r requirements/quality.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt # pytest-cov # pytest-mock pytest-cov==5.0.0 - # via -r requirements/quality.txt + # via -r /home/misha/work/cc2olx/requirements/quality.txt pytest-mock==3.14.0 - # via -r requirements/quality.txt -requests==2.31.0 - # via -r requirements/quality.txt -tomli==2.0.1 + # via -r /home/misha/work/cc2olx/requirements/quality.txt +requests==2.32.3 + # via -r /home/misha/work/cc2olx/requirements/quality.txt +sqlparse==0.5.3 # via - # -r requirements/quality.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt + # django +tomli==2.2.1 + # via + # -r /home/misha/work/cc2olx/requirements/quality.txt # black # coverage # pyproject-api # pytest # tox -tox==4.14.2 +tox==4.23.2 # via -r requirements/ci.in -typing-extensions==4.10.0 +typing-extensions==4.12.2 # via - # -r requirements/quality.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt + # asgiref # black -urllib3==2.2.1 + # tox +urllib3==2.2.3 # via - # -r requirements/quality.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt # requests -virtualenv==20.25.1 +virtualenv==20.28.1 # via tox -xmlformatter==0.2.6 - # via -r requirements/quality.txt +xmlformatter==0.2.8 + # via -r /home/misha/work/cc2olx/requirements/quality.txt youtube-dl==2021.12.17 - # via -r requirements/quality.txt + # via -r /home/misha/work/cc2olx/requirements/quality.txt diff --git a/requirements/common_constraints.txt b/requirements/common_constraints.txt index e3bf8eae..1511019d 100644 --- a/requirements/common_constraints.txt +++ b/requirements/common_constraints.txt @@ -11,22 +11,21 @@ # Note: Changes to this file will automatically be used by other repos, referencing # this file from Github directly. It does not require packaging in edx-lint. - # using LTS django version Django<5.0 # elasticsearch>=7.14.0 includes breaking changes in it which caused issues in discovery upgrade process. # elastic search changelog: https://www.elastic.co/guide/en/enterprise-search/master/release-notes-7.14.0.html +# See https://github.com/openedx/edx-platform/issues/35126 for more info elasticsearch<7.14.0 # django-simple-history>3.0.0 adds indexing and causes a lot of migrations to be affected django-simple-history==3.0.0 -# opentelemetry requires version 6.x at the moment: -# https://github.com/open-telemetry/opentelemetry-python/issues/3570 -# Normally this could be added as a constraint in edx-django-utils, where we're -# adding the opentelemetry dependency. However, when we compile pip-tools.txt, -# that uses version 7.x, and then there's no undoing that when compiling base.txt. -# So we need to pin it globally, for now. -# Ticket for unpinning: https://github.com/openedx/edx-lint/issues/407 -importlib-metadata<7 +# Cause: https://github.com/openedx/edx-lint/issues/458 +# This can be unpinned once https://github.com/openedx/edx-lint/issues/459 has been resolved. +pip<24.3 + +# Cause: https://github.com/openedx/edx-lint/issues/475 +# This can be unpinned once https://github.com/openedx/edx-lint/issues/476 has been resolved. +urllib3<2.3.0 diff --git a/requirements/dev.txt b/requirements/dev.txt index 7bb7049b..c5ed0049 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -4,264 +4,287 @@ # # make upgrade # -black==24.3.0 +asgiref==3.8.1 # via - # -r requirements/ci.txt - # -r requirements/quality.txt -build==1.1.1 + # -r /home/misha/work/cc2olx/requirements/ci.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt + # django +backports-tarfile==1.2.0 + # via jaraco-context +backports-zoneinfo==0.2.1 # via - # -r requirements/pip-tools.txt + # -r /home/misha/work/cc2olx/requirements/ci.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt + # django +black==24.8.0 + # via + # -r /home/misha/work/cc2olx/requirements/ci.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt +build==1.2.2.post1 + # via + # -r /home/misha/work/cc2olx/requirements/pip-tools.txt # pip-tools bump2version==1.0.1 # via -r requirements/dev.in -cachetools==5.3.3 +cachetools==5.5.0 # via - # -r requirements/ci.txt + # -r /home/misha/work/cc2olx/requirements/ci.txt # tox -certifi==2024.2.2 +certifi==2024.12.14 # via - # -r requirements/ci.txt - # -r requirements/quality.txt + # -r /home/misha/work/cc2olx/requirements/ci.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt # requests -cffi==1.16.0 +cffi==1.17.1 # via cryptography chardet==5.2.0 # via - # -r requirements/ci.txt + # -r /home/misha/work/cc2olx/requirements/ci.txt # tox -charset-normalizer==3.3.2 +charset-normalizer==3.4.1 # via - # -r requirements/ci.txt - # -r requirements/quality.txt + # -r /home/misha/work/cc2olx/requirements/ci.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt # requests -click==8.1.7 +click==8.1.8 # via - # -r requirements/ci.txt - # -r requirements/pip-tools.txt - # -r requirements/quality.txt + # -r /home/misha/work/cc2olx/requirements/ci.txt + # -r /home/misha/work/cc2olx/requirements/pip-tools.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt # black # pip-tools colorama==0.4.6 # via - # -r requirements/ci.txt + # -r /home/misha/work/cc2olx/requirements/ci.txt # tox -coverage[toml]==7.4.4 +coverage[toml]==7.6.1 # via - # -r requirements/ci.txt - # -r requirements/quality.txt + # -r /home/misha/work/cc2olx/requirements/ci.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt # pytest-cov -cryptography==42.0.5 +cryptography==44.0.0 # via secretstorage -distlib==0.3.8 +distlib==0.3.9 # via - # -r requirements/ci.txt + # -r /home/misha/work/cc2olx/requirements/ci.txt # virtualenv +django==4.2.17 + # via + # -r /home/misha/work/cc2olx/requirements/ci.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt docutils==0.20.1 # via readme-renderer -exceptiongroup==1.2.0 +exceptiongroup==1.2.2 # via - # -r requirements/ci.txt - # -r requirements/quality.txt + # -r /home/misha/work/cc2olx/requirements/ci.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt # pytest -filelock==3.13.3 +filelock==3.16.1 # via - # -r requirements/ci.txt + # -r /home/misha/work/cc2olx/requirements/ci.txt # tox # virtualenv -flake8==7.0.0 +flake8==7.1.1 # via - # -r requirements/ci.txt - # -r requirements/quality.txt -idna==3.6 + # -r /home/misha/work/cc2olx/requirements/ci.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt +idna==3.10 # via - # -r requirements/ci.txt - # -r requirements/quality.txt + # -r /home/misha/work/cc2olx/requirements/ci.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt # requests -importlib-metadata==7.1.0 +importlib-metadata==8.5.0 # via - # -r requirements/pip-tools.txt + # -r /home/misha/work/cc2olx/requirements/pip-tools.txt # build # keyring # twine -importlib-resources==6.4.0 +importlib-resources==6.4.5 # via keyring iniconfig==2.0.0 # via - # -r requirements/ci.txt - # -r requirements/quality.txt + # -r /home/misha/work/cc2olx/requirements/ci.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt # pytest -jaraco-classes==3.3.1 +jaraco-classes==3.4.0 # via keyring -jaraco-context==4.3.0 +jaraco-context==6.0.1 # via keyring -jaraco-functools==4.0.0 +jaraco-functools==4.1.0 # via keyring jeepney==0.8.0 # via # keyring # secretstorage -keyring==25.0.0 +keyring==25.5.0 # via twine -lxml==5.1.0 +lxml==5.3.0 # via - # -r requirements/ci.txt - # -r requirements/quality.txt + # -r /home/misha/work/cc2olx/requirements/ci.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt markdown-it-py==3.0.0 # via rich mccabe==0.7.0 # via - # -r requirements/ci.txt - # -r requirements/quality.txt + # -r /home/misha/work/cc2olx/requirements/ci.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt # flake8 mdurl==0.1.2 # via markdown-it-py -more-itertools==10.2.0 +more-itertools==10.5.0 # via # jaraco-classes # jaraco-functools mypy-extensions==1.0.0 # via - # -r requirements/ci.txt - # -r requirements/quality.txt + # -r /home/misha/work/cc2olx/requirements/ci.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt # black -nh3==0.2.17 +nh3==0.2.20 # via readme-renderer -packaging==24.0 +packaging==24.2 # via - # -r requirements/ci.txt - # -r requirements/pip-tools.txt - # -r requirements/quality.txt + # -r /home/misha/work/cc2olx/requirements/ci.txt + # -r /home/misha/work/cc2olx/requirements/pip-tools.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt # black # build # pyproject-api # pytest # tox + # twine pathspec==0.12.1 # via - # -r requirements/ci.txt - # -r requirements/quality.txt + # -r /home/misha/work/cc2olx/requirements/ci.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt # black pip-tools==7.4.1 - # via -r requirements/pip-tools.txt -pkginfo==1.10.0 + # via -r /home/misha/work/cc2olx/requirements/pip-tools.txt +pkginfo==1.12.0 # via twine -platformdirs==4.2.0 +platformdirs==4.3.6 # via - # -r requirements/ci.txt - # -r requirements/quality.txt + # -r /home/misha/work/cc2olx/requirements/ci.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt # black # tox # virtualenv -pluggy==1.4.0 +pluggy==1.5.0 # via - # -r requirements/ci.txt - # -r requirements/quality.txt + # -r /home/misha/work/cc2olx/requirements/ci.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt # pytest # tox -pycodestyle==2.11.1 +pycodestyle==2.12.1 # via - # -r requirements/ci.txt - # -r requirements/quality.txt + # -r /home/misha/work/cc2olx/requirements/ci.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt # flake8 -pycparser==2.21 +pycparser==2.22 # via cffi pyflakes==3.2.0 # via - # -r requirements/ci.txt - # -r requirements/quality.txt + # -r /home/misha/work/cc2olx/requirements/ci.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt # flake8 -pygments==2.17.2 +pygments==2.19.1 # via # readme-renderer # rich -pyproject-api==1.6.1 +pyproject-api==1.8.0 # via - # -r requirements/ci.txt + # -r /home/misha/work/cc2olx/requirements/ci.txt # tox -pyproject-hooks==1.0.0 +pyproject-hooks==1.2.0 # via - # -r requirements/pip-tools.txt + # -r /home/misha/work/cc2olx/requirements/pip-tools.txt # build # pip-tools -pytest==8.1.1 +pytest==8.3.4 # via - # -r requirements/ci.txt - # -r requirements/quality.txt + # -r /home/misha/work/cc2olx/requirements/ci.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt # pytest-cov # pytest-mock pytest-cov==5.0.0 # via - # -r requirements/ci.txt - # -r requirements/quality.txt + # -r /home/misha/work/cc2olx/requirements/ci.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt pytest-mock==3.14.0 # via - # -r requirements/ci.txt - # -r requirements/quality.txt + # -r /home/misha/work/cc2olx/requirements/ci.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt readme-renderer==43.0 # via twine -requests==2.31.0 +requests==2.32.3 # via - # -r requirements/ci.txt - # -r requirements/quality.txt + # -r /home/misha/work/cc2olx/requirements/ci.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt # requests-toolbelt # twine requests-toolbelt==1.0.0 # via twine rfc3986==2.0.0 # via twine -rich==13.7.1 +rich==13.9.4 # via twine secretstorage==3.3.3 # via keyring -tomli==2.0.1 +sqlparse==0.5.3 + # via + # -r /home/misha/work/cc2olx/requirements/ci.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt + # django +tomli==2.2.1 # via - # -r requirements/ci.txt - # -r requirements/pip-tools.txt - # -r requirements/quality.txt + # -r /home/misha/work/cc2olx/requirements/ci.txt + # -r /home/misha/work/cc2olx/requirements/pip-tools.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt # black # build # coverage # pip-tools # pyproject-api - # pyproject-hooks # pytest # tox -tox==4.14.2 - # via -r requirements/ci.txt -twine==5.0.0 +tox==4.23.2 + # via -r /home/misha/work/cc2olx/requirements/ci.txt +twine==6.0.1 # via -r requirements/dev.in -typing-extensions==4.10.0 +typing-extensions==4.12.2 # via - # -r requirements/ci.txt - # -r requirements/quality.txt + # -r /home/misha/work/cc2olx/requirements/ci.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt + # asgiref # black # rich -urllib3==2.2.1 + # tox +urllib3==2.2.3 # via - # -r requirements/ci.txt - # -r requirements/quality.txt + # -r /home/misha/work/cc2olx/requirements/ci.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt # requests # twine -virtualenv==20.25.1 +virtualenv==20.28.1 # via - # -r requirements/ci.txt + # -r /home/misha/work/cc2olx/requirements/ci.txt # tox -wheel==0.43.0 +wheel==0.45.1 # via + # -r /home/misha/work/cc2olx/requirements/pip-tools.txt # -r requirements/dev.in - # -r requirements/pip-tools.txt # pip-tools -xmlformatter==0.2.6 +xmlformatter==0.2.8 # via - # -r requirements/ci.txt - # -r requirements/quality.txt + # -r /home/misha/work/cc2olx/requirements/ci.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt youtube-dl==2021.12.17 # via - # -r requirements/ci.txt - # -r requirements/quality.txt -zipp==3.18.1 + # -r /home/misha/work/cc2olx/requirements/ci.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt +zipp==3.20.2 # via - # -r requirements/pip-tools.txt + # -r /home/misha/work/cc2olx/requirements/pip-tools.txt # importlib-metadata # importlib-resources diff --git a/requirements/pip-tools.txt b/requirements/pip-tools.txt index 8931dc2e..5fb5b3fa 100644 --- a/requirements/pip-tools.txt +++ b/requirements/pip-tools.txt @@ -4,28 +4,27 @@ # # make upgrade # -build==1.1.1 +build==1.2.2.post1 # via pip-tools -click==8.1.7 +click==8.1.8 # via pip-tools -importlib-metadata==7.1.0 +importlib-metadata==8.5.0 # via build -packaging==24.0 +packaging==24.2 # via build pip-tools==7.4.1 # via -r requirements/pip-tools.in -pyproject-hooks==1.0.0 +pyproject-hooks==1.2.0 # via # build # pip-tools -tomli==2.0.1 +tomli==2.2.1 # via # build # pip-tools - # pyproject-hooks -wheel==0.43.0 +wheel==0.45.1 # via pip-tools -zipp==3.18.1 +zipp==3.20.2 # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: diff --git a/requirements/pip.txt b/requirements/pip.txt index cf449024..e7868ed4 100644 --- a/requirements/pip.txt +++ b/requirements/pip.txt @@ -4,11 +4,11 @@ # # make upgrade # -wheel==0.43.0 +wheel==0.45.1 # via -r requirements/pip.in # The following packages are considered to be unsafe in a requirements file: -pip==24.0 +pip==24.3.1 # via -r requirements/pip.in -setuptools==69.2.0 +setuptools==75.3.0 # via -r requirements/pip.in diff --git a/requirements/quality.txt b/requirements/quality.txt index 81f2a7fa..6801274c 100644 --- a/requirements/quality.txt +++ b/requirements/quality.txt @@ -4,83 +4,100 @@ # # make upgrade # -black==24.3.0 +asgiref==3.8.1 + # via + # -r /home/misha/work/cc2olx/requirements/test.txt + # django +backports-zoneinfo==0.2.1 + # via + # -r /home/misha/work/cc2olx/requirements/test.txt + # django +black==24.8.0 # via -r requirements/quality.in -certifi==2024.2.2 +certifi==2024.12.14 # via - # -r requirements/test.txt + # -r /home/misha/work/cc2olx/requirements/test.txt # requests -charset-normalizer==3.3.2 +charset-normalizer==3.4.1 # via - # -r requirements/test.txt + # -r /home/misha/work/cc2olx/requirements/test.txt # requests -click==8.1.7 +click==8.1.8 # via black -coverage[toml]==7.4.4 +coverage[toml]==7.6.1 # via - # -r requirements/test.txt + # -r /home/misha/work/cc2olx/requirements/test.txt # pytest-cov -exceptiongroup==1.2.0 +django==4.2.17 + # via -r /home/misha/work/cc2olx/requirements/test.txt +exceptiongroup==1.2.2 # via - # -r requirements/test.txt + # -r /home/misha/work/cc2olx/requirements/test.txt # pytest -flake8==7.0.0 +flake8==7.1.1 # via -r requirements/quality.in -idna==3.6 +idna==3.10 # via - # -r requirements/test.txt + # -r /home/misha/work/cc2olx/requirements/test.txt # requests iniconfig==2.0.0 # via - # -r requirements/test.txt + # -r /home/misha/work/cc2olx/requirements/test.txt # pytest -lxml==5.1.0 - # via -r requirements/test.txt +lxml==5.3.0 + # via -r /home/misha/work/cc2olx/requirements/test.txt mccabe==0.7.0 # via flake8 mypy-extensions==1.0.0 # via black -packaging==24.0 +packaging==24.2 # via - # -r requirements/test.txt + # -r /home/misha/work/cc2olx/requirements/test.txt # black # pytest pathspec==0.12.1 # via black -platformdirs==4.2.0 +platformdirs==4.3.6 # via black -pluggy==1.4.0 +pluggy==1.5.0 # via - # -r requirements/test.txt + # -r /home/misha/work/cc2olx/requirements/test.txt # pytest -pycodestyle==2.11.1 +pycodestyle==2.12.1 # via flake8 pyflakes==3.2.0 # via flake8 -pytest==8.1.1 +pytest==8.3.4 # via - # -r requirements/test.txt + # -r /home/misha/work/cc2olx/requirements/test.txt # pytest-cov # pytest-mock pytest-cov==5.0.0 - # via -r requirements/test.txt + # via -r /home/misha/work/cc2olx/requirements/test.txt pytest-mock==3.14.0 - # via -r requirements/test.txt -requests==2.31.0 - # via -r requirements/test.txt -tomli==2.0.1 + # via -r /home/misha/work/cc2olx/requirements/test.txt +requests==2.32.3 + # via -r /home/misha/work/cc2olx/requirements/test.txt +sqlparse==0.5.3 + # via + # -r /home/misha/work/cc2olx/requirements/test.txt + # django +tomli==2.2.1 # via - # -r requirements/test.txt + # -r /home/misha/work/cc2olx/requirements/test.txt # black # coverage # pytest -typing-extensions==4.10.0 - # via black -urllib3==2.2.1 +typing-extensions==4.12.2 + # via + # -r /home/misha/work/cc2olx/requirements/test.txt + # asgiref + # black +urllib3==2.2.3 # via - # -r requirements/test.txt + # -r /home/misha/work/cc2olx/requirements/test.txt # requests -xmlformatter==0.2.6 - # via -r requirements/test.txt +xmlformatter==0.2.8 + # via -r /home/misha/work/cc2olx/requirements/test.txt youtube-dl==2021.12.17 - # via -r requirements/test.txt + # via -r /home/misha/work/cc2olx/requirements/test.txt diff --git a/requirements/test.txt b/requirements/test.txt index f3e10d65..b025e7b1 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -4,33 +4,42 @@ # # make upgrade # -certifi==2024.2.2 +asgiref==3.8.1 # via - # -r requirements/base.txt + # -r /home/misha/work/cc2olx/requirements/base.txt + # django +backports-zoneinfo==0.2.1 + # via + # -r /home/misha/work/cc2olx/requirements/base.txt + # django +certifi==2024.12.14 + # via + # -r /home/misha/work/cc2olx/requirements/base.txt # requests -charset-normalizer==3.3.2 +charset-normalizer==3.4.1 # via - # -r requirements/base.txt + # -r /home/misha/work/cc2olx/requirements/base.txt # requests -coverage[toml]==7.4.4 +coverage[toml]==7.6.1 # via # -r requirements/test.in # pytest-cov -exceptiongroup==1.2.0 + # via -r /home/misha/work/cc2olx/requirements/base.txt +exceptiongroup==1.2.2 # via pytest -idna==3.6 +idna==3.10 # via - # -r requirements/base.txt + # -r /home/misha/work/cc2olx/requirements/base.txt # requests iniconfig==2.0.0 # via pytest -lxml==5.1.0 - # via -r requirements/base.txt -packaging==24.0 +lxml==5.3.0 + # via -r /home/misha/work/cc2olx/requirements/base.txt +packaging==24.2 # via pytest -pluggy==1.4.0 +pluggy==1.5.0 # via pytest -pytest==8.1.1 +pytest==8.3.4 # via # -r requirements/test.in # pytest-cov @@ -39,17 +48,25 @@ pytest-cov==5.0.0 # via -r requirements/test.in pytest-mock==3.14.0 # via -r requirements/test.in -requests==2.31.0 - # via -r requirements/base.txt -tomli==2.0.1 +requests==2.32.3 + # via -r /home/misha/work/cc2olx/requirements/base.txt +sqlparse==0.5.3 + # via + # -r /home/misha/work/cc2olx/requirements/base.txt + # django +tomli==2.2.1 # via # coverage # pytest -urllib3==2.2.1 +typing-extensions==4.12.2 + # via + # -r /home/misha/work/cc2olx/requirements/base.txt + # asgiref +urllib3==2.2.3 # via - # -r requirements/base.txt + # -r /home/misha/work/cc2olx/requirements/base.txt # requests -xmlformatter==0.2.6 +xmlformatter==0.2.8 # via -r requirements/test.in youtube-dl==2021.12.17 - # via -r requirements/base.txt + # via -r /home/misha/work/cc2olx/requirements/base.txt diff --git a/src/cc2olx/django_settings.py b/src/cc2olx/django_settings.py new file mode 100644 index 00000000..e69de29b diff --git a/src/cc2olx/main.py b/src/cc2olx/main.py index cfe02104..981955d7 100644 --- a/src/cc2olx/main.py +++ b/src/cc2olx/main.py @@ -1,10 +1,12 @@ import logging +import os import shutil import sys import tempfile - from pathlib import Path +import django + from cc2olx import filesystem from cc2olx import olx from cc2olx.cli import parse_args, RESULT_TYPE_FOLDER, RESULT_TYPE_ZIP @@ -54,6 +56,8 @@ def convert_one_file( def main(): + initialize_django() + parsed_args = parse_args() settings = collect_settings(parsed_args) @@ -95,5 +99,13 @@ def main(): return 0 +def initialize_django(): + """ + Initialize the Django package. + """ + os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cc2olx.django_settings") + django.setup() + + if __name__ == "__main__": sys.exit(main()) From 22884d8efbb7d7ecaf932def79499d254be9af46 Mon Sep 17 00:00:00 2001 From: Myhailo Chernyshov Date: Mon, 13 Jan 2025 23:06:01 +0200 Subject: [PATCH 9/9] refactor: Django validator is used for link source validation --- requirements/ci.txt | 3 ++ requirements/dev.txt | 5 ++ requirements/quality.txt | 3 ++ requirements/test.in | 1 + requirements/test.txt | 3 ++ src/cc2olx/cli.py | 4 +- src/cc2olx/django_settings.py | 2 + src/cc2olx/utils.py | 12 ----- src/cc2olx/validators/cli.py | 82 +++++++++++-------------------- tests/test_validators/test_cli.py | 27 ++++++++-- 10 files changed, 73 insertions(+), 69 deletions(-) diff --git a/requirements/ci.txt b/requirements/ci.txt index 1d623dbb..01968445 100644 --- a/requirements/ci.txt +++ b/requirements/ci.txt @@ -105,9 +105,12 @@ pytest==8.3.4 # via # -r /home/misha/work/cc2olx/requirements/quality.txt # pytest-cov + # pytest-django # pytest-mock pytest-cov==5.0.0 # via -r /home/misha/work/cc2olx/requirements/quality.txt +pytest-django==4.9.0 + # via -r /home/misha/work/cc2olx/requirements/quality.txt pytest-mock==3.14.0 # via -r /home/misha/work/cc2olx/requirements/quality.txt requests==2.32.3 diff --git a/requirements/dev.txt b/requirements/dev.txt index c5ed0049..e212fcf2 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -205,11 +205,16 @@ pytest==8.3.4 # -r /home/misha/work/cc2olx/requirements/ci.txt # -r /home/misha/work/cc2olx/requirements/quality.txt # pytest-cov + # pytest-django # pytest-mock pytest-cov==5.0.0 # via # -r /home/misha/work/cc2olx/requirements/ci.txt # -r /home/misha/work/cc2olx/requirements/quality.txt +pytest-django==4.9.0 + # via + # -r /home/misha/work/cc2olx/requirements/ci.txt + # -r /home/misha/work/cc2olx/requirements/quality.txt pytest-mock==3.14.0 # via # -r /home/misha/work/cc2olx/requirements/ci.txt diff --git a/requirements/quality.txt b/requirements/quality.txt index 6801274c..d493f5a5 100644 --- a/requirements/quality.txt +++ b/requirements/quality.txt @@ -71,9 +71,12 @@ pytest==8.3.4 # via # -r /home/misha/work/cc2olx/requirements/test.txt # pytest-cov + # pytest-django # pytest-mock pytest-cov==5.0.0 # via -r /home/misha/work/cc2olx/requirements/test.txt +pytest-django==4.9.0 + # via -r /home/misha/work/cc2olx/requirements/test.txt pytest-mock==3.14.0 # via -r /home/misha/work/cc2olx/requirements/test.txt requests==2.32.3 diff --git a/requirements/test.in b/requirements/test.in index b5e4094e..b1692615 100644 --- a/requirements/test.in +++ b/requirements/test.in @@ -6,5 +6,6 @@ coverage pytest pytest-cov +pytest-django pytest-mock xmlformatter diff --git a/requirements/test.txt b/requirements/test.txt index b025e7b1..1c6cc62f 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -43,9 +43,12 @@ pytest==8.3.4 # via # -r requirements/test.in # pytest-cov + # pytest-django # pytest-mock pytest-cov==5.0.0 # via -r requirements/test.in +pytest-django==4.9.0 + # via -r requirements/test.in pytest-mock==3.14.0 # via -r requirements/test.in requests==2.32.3 diff --git a/src/cc2olx/cli.py b/src/cc2olx/cli.py index 2e18a4e5..49afe5c6 100644 --- a/src/cc2olx/cli.py +++ b/src/cc2olx/cli.py @@ -2,7 +2,7 @@ from pathlib import Path -from cc2olx.validators.cli import LinkSourceValidator +from cc2olx.validators.cli import link_source_validator RESULT_TYPE_FOLDER = "folder" RESULT_TYPE_ZIP = "zip" @@ -75,7 +75,7 @@ def parse_args(args=None): "-s", "--relative_links_source", nargs="?", - type=LinkSourceValidator(), + type=link_source_validator, help="The relative links source in the format '://', e.g. 'https://example.com'", ) return parser.parse_args(args) diff --git a/src/cc2olx/django_settings.py b/src/cc2olx/django_settings.py index e69de29b..c13f3ec2 100644 --- a/src/cc2olx/django_settings.py +++ b/src/cc2olx/django_settings.py @@ -0,0 +1,2 @@ +USE_I18N = False +USE_TZ = False diff --git a/src/cc2olx/utils.py b/src/cc2olx/utils.py index d3b54e73..520c66a7 100644 --- a/src/cc2olx/utils.py +++ b/src/cc2olx/utils.py @@ -1,7 +1,6 @@ """ Utility functions for cc2olx""" import csv -import ipaddress import logging import re import string @@ -124,14 +123,3 @@ def clean_from_cdata(xml_string: str) -> str: str: cleaned XML string. """ return re.sub(CDATA_PATTERN, r"\g", xml_string, flags=re.DOTALL) - - -def is_valid_ipv6_address(value: str) -> bool: - """ - Return whether the value is a valid IPv6 address. - """ - try: - ipaddress.IPv6Address(value) - except ValueError: - return False - return True diff --git a/src/cc2olx/validators/cli.py b/src/cc2olx/validators/cli.py index bfa1e232..47424121 100644 --- a/src/cc2olx/validators/cli.py +++ b/src/cc2olx/validators/cli.py @@ -1,62 +1,40 @@ import argparse import re +from typing import Callable -from cc2olx.utils import is_valid_ipv6_address +from django.core.exceptions import ValidationError +from django.core.validators import URLValidator -class LinkSourceValidator: +def convert_to_argparse_validator(django_validator: Callable) -> Callable: """ - Validate a link source. - """ - - UL = "\u00a1-\uffff" # Unicode letters range (must not be a raw string). - - # IP patterns - IPV4_REGEX = ( - r"(?:0|25[0-5]|2[0-4][0-9]|1[0-9]?[0-9]?|[1-9][0-9]?)" - r"(?:\.(?:0|25[0-5]|2[0-4][0-9]|1[0-9]?[0-9]?|[1-9][0-9]?)){3}" - ) - IPV6_REGEX = r"\[[0-9a-f:.]+\]" # (simple regex, validated later) - - # Host patterns - HOSTNAME_REGEX = rf"[a-z{UL}0-9](?:[a-z{UL}0-9-]{{0,61}}[a-z{UL}0-9])?" - # Max length for domain name labels is 63 characters per RFC 1034 sec. 3.1 - DOMAIN_REGEX = rf"(?:\.(?!-)[a-z{UL}0-9-]{{1,63}}(?{IPV4_REGEX}|{IPV6_REGEX}|{HOST_REGEX})" - r"(?::[0-9]{1,5})?" # port - r"/?" # trailing slash - r"\Z" - ) - - message = "Enter a valid URL." - - def __call__(self, value: str) -> str: - if not (link_source_match := re.fullmatch(self.LINK_SOURCE_REGEX, value, re.IGNORECASE)): - raise argparse.ArgumentTypeError(self.message) + Convert a Django validator to an argparse validator. - self._validate_ipv6_address(link_source_match.group("netloc")) + If a Django ValidationError is raised during the validator call, it is + intercepted and an ArgumentTypeError is raised with the same message. + """ + def argparse_validator(value): + try: + django_validator(value) + except ValidationError as exc: + raise argparse.ArgumentTypeError(exc.message) from exc return value - def _validate_ipv6_address(self, netloc: str) -> None: - """ - Check netloc correctness if it's an IPv6 address. - """ - potential_ipv6_regex = r"^\[(.+)\](?::[0-9]{1,5})?$" - if netloc_match := re.search(potential_ipv6_regex, netloc): - potential_ip = netloc_match[1] - if not is_valid_ipv6_address(potential_ip): - raise argparse.ArgumentTypeError(self.message) + return argparse_validator + + +link_source_validator = convert_to_argparse_validator( + URLValidator( + schemes=["http", "https"], + regex=( + r"^(?:[a-z0-9.+-]*)://" # scheme is validated separately + r"(?:[^\s:@/]+(?::[^\s:@/]*)?@)?" # user:pass authentication + r"(?:" + URLValidator.ipv4_re + "|" + URLValidator.ipv6_re + "|" + URLValidator.host_re + ")" + r"(?::[0-9]{1,5})?" # port + r"/?" # trailing slash + r"\Z" + ), + flags=re.IGNORECASE, + ) +) diff --git a/tests/test_validators/test_cli.py b/tests/test_validators/test_cli.py index 5d11159d..630587bc 100644 --- a/tests/test_validators/test_cli.py +++ b/tests/test_validators/test_cli.py @@ -1,8 +1,29 @@ import argparse +from unittest.mock import Mock import pytest +from django.core.exceptions import ValidationError -from cc2olx.validators.cli import LinkSourceValidator +from cc2olx.validators.cli import convert_to_argparse_validator, link_source_validator + + +class TestConvertToArgparseValidator: + def test_original_value_is_return_after_successful_call(self): + django_validator = Mock() + argparse_validator = convert_to_argparse_validator(django_validator) + value_mock = Mock() + + assert argparse_validator(value_mock) == value_mock + + def test_argument_type_error_is_raised_instead_of_intercepted_django_validation_error(self): + error_message_mock = Mock() + django_validator = Mock(side_effect=ValidationError(error_message_mock)) + argparse_validator = convert_to_argparse_validator(django_validator) + + with pytest.raises(argparse.ArgumentTypeError) as exc_info: + argparse_validator(Mock()) + + assert exc_info.value.args[0] == error_message_mock class TestLinkSourceValidator: @@ -25,7 +46,7 @@ def test_original_value_is_returned_if_it_is_valid(self, links_source: str) -> N """ Test whether the validator returns original value it is valid. """ - assert LinkSourceValidator()(links_source) == links_source + assert link_source_validator(links_source) == links_source @pytest.mark.parametrize( "links_source", @@ -46,4 +67,4 @@ def test_wrong_values_are_detected(self, links_source) -> None: Test whether the validator raises an error if the value is invalid. """ with pytest.raises(argparse.ArgumentTypeError, match="Enter a valid URL."): - LinkSourceValidator()(links_source) + link_source_validator(links_source)