Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Proxito: remove redirect for README.html files #11443

Merged
merged 5 commits into from
Jul 11, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions readthedocs/projects/tasks/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,8 +203,7 @@ def _create_imported_files_and_search_index(

# Create the imported file only if it's a top-level 404 file,
# or if it's an index file. We don't need to keep track of all files.
# TODO: delete README.html from this list after deprecation.
tryfiles = ["index.html", "README.html"]
tryfiles = ["index.html"]
if relpath == "404.html" or filename in tryfiles:
html_files_to_save.append(html_file)

Expand Down
91 changes: 0 additions & 91 deletions readthedocs/proxito/tests/test_full.py
Original file line number Diff line number Diff line change
Expand Up @@ -994,32 +994,6 @@ def test_versioned_no_slash(self):
"/en/latest/",
)

@mock.patch.object(BuildMediaFileSystemStorageTest, "open")
def test_directory_indexes_readme_serving(self, storage_open):
self.project.versions.update(active=True, built=True)

get(
HTMLFile,
project=self.project,
version=self.version,
path="readme-exists/README.html",
name="README.html",
)

# Confirm we've serving from storage for the `index-exists/index.html` file
response = self.client.get(
reverse(
"proxito_404_handler",
kwargs={"proxito_path": "/en/latest/readme-exists"},
),
headers={"host": "project.readthedocs.io"},
)
self.assertEqual(response.status_code, 302)
self.assertEqual(
response["location"],
"/en/latest/readme-exists/README.html",
)

def test_directory_indexes_get_args(self):
self.project.versions.update(active=True, built=True)
get(
Expand Down Expand Up @@ -1075,71 +1049,6 @@ def test_404_storage_serves_custom_404_sphinx(self, storage_open):
self.assertEqual(response.status_code, 404)
storage_open.assert_called_once_with("html/project/fancy-version/404.html")

def test_redirects_to_correct_index_ending_with_slash(self):
"""When the path ends with a slash, we try README.html as index."""
self.project.versions.update(active=True, built=True)
version = fixture.get(
Version,
slug="fancy-version",
privacy_level=constants.PUBLIC,
active=True,
built=True,
project=self.project,
documentation_type=SPHINX,
)

get(
HTMLFile,
project=self.project,
version=version,
path="not-found/README.html",
name="README.html",
)
response = self.client.get(
reverse(
"proxito_404_handler",
kwargs={"proxito_path": "/en/fancy-version/not-found/"},
),
headers={"host": "project.readthedocs.io"},
)
self.assertEqual(response.status_code, 302)
self.assertEqual(
response["location"], "/en/fancy-version/not-found/README.html"
)

def test_redirects_to_correct_index_ending_without_slash(self):
"""When the path doesn't end with a slash, we try both, index.html and README.html."""
self.project.versions.update(active=True, built=True)
version = fixture.get(
Version,
slug="fancy-version",
privacy_level=constants.PUBLIC,
active=True,
built=True,
project=self.project,
documentation_type=SPHINX,
)

get(
HTMLFile,
project=self.project,
version=version,
path="not-found/README.html",
name="README.html",
)

response = self.client.get(
reverse(
"proxito_404_handler",
kwargs={"proxito_path": "/en/fancy-version/not-found"},
),
headers={"host": "project.readthedocs.io"},
)
self.assertEqual(response.status_code, 302)
self.assertEqual(
response["location"], "/en/fancy-version/not-found/README.html"
)

@mock.patch.object(BuildMediaFileSystemStorageTest, "open")
def test_404_index_redirect_skips_not_built_versions(self, storage_open):
self.version.built = False
Expand Down
70 changes: 23 additions & 47 deletions readthedocs/proxito/views/serve.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
ServeRedirectMixin,
StorageFileNotFound,
)
from readthedocs.proxito.views.utils import allow_readme_html_as_index
from readthedocs.redirects.exceptions import InfiniteRedirectException
from readthedocs.storage import build_media_storage

Expand Down Expand Up @@ -389,7 +388,6 @@ def get(self, request, proxito_path):
This does a couple of things:

* Handles directory indexing for URLs that don't end in a slash
* Handles directory indexing for README.html (for now)
* Check for user redirects
* Record the broken link for analytics
* Handles custom 404 serving
Expand Down Expand Up @@ -491,7 +489,7 @@ def get(self, request, proxito_path):

# Check and perform redirects on 404 handler for non-external domains only.
# NOTE: This redirect check must be done after trying files like
# ``index.html`` and ``README.html`` to emulate the behavior we had when
# ``index.html`` to emulate the behavior we had when
# serving directly from NGINX without passing through Python.
if not unresolved_domain.is_from_external_domain:
try:
Expand Down Expand Up @@ -642,53 +640,31 @@ def _get_index_file_redirect(self, request, project, version, filename, full_pat
For example:
humitos marked this conversation as resolved.
Show resolved Hide resolved

- /en/latest/foo -> /en/latest/foo/index.html
- /en/latest/foo -> /en/latest/foo/README.html
- /en/latest/foo/ -> /en/latest/foo/README.html
"""
# If the path ends with `/`, we already tried to serve
# the `/index.html` file.
if full_path.endswith("/"):
return None

if allow_readme_html_as_index():
tryfiles = ["index.html", "README.html"]
# If the path ends with `/`, we already tried to serve
# the `/index.html` file, so we only need to test for
# the `/README.html` file.
if full_path.endswith("/"):
tryfiles = ["README.html"]
else:
tryfiles = ["index.html"]

tryfiles = [
(filename.rstrip("/") + f"/{tryfile}").lstrip("/") for tryfile in tryfiles
]
available_index_files = list(
HTMLFile.objects.filter(version=version, path__in=tryfiles).values_list(
"path", flat=True
)
)

for tryfile in tryfiles:
if tryfile not in available_index_files:
continue

log.info("Redirecting to index file.", tryfile=tryfile)
# Use urlparse so that we maintain GET args in our redirect
parts = urlparse(full_path)
if allow_readme_html_as_index() and tryfile.endswith("README.html"):
new_path = parts.path.rstrip("/") + "/README.html"
else:
new_path = parts.path.rstrip("/") + "/"

# `full_path` doesn't include query params.`
query = urlparse(request.get_full_path()).query
redirect_url = parts._replace(
path=new_path,
query=query,
).geturl()

# TODO: decide if we need to check for infinite redirect here
# (from URL == to URL)
return HttpResponseRedirect(redirect_url)
tryfile = (filename.rstrip("/") + "/index.html").lstrip("/")
humitos marked this conversation as resolved.
Show resolved Hide resolved
if not HTMLFile.objects.filter(version=version, path=tryfile).exists():
return None

return None
log.info("Redirecting to index file.", tryfile=tryfile)
# Use urlparse so that we maintain GET args in our redirect
parts = urlparse(full_path)
new_path = parts.path.rstrip("/") + "/"

# `full_path` doesn't include query params.`
query = urlparse(request.get_full_path()).query
redirect_url = parts._replace(
path=new_path,
query=query,
).geturl()

# TODO: decide if we need to check for infinite redirect here
# (from URL == to URL)
return HttpResponseRedirect(redirect_url)


class ServeError404(SettingsOverrideObject):
Expand Down
26 changes: 0 additions & 26 deletions readthedocs/proxito/views/utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
import datetime

import pytz
import structlog
from django.conf import settings
from django.http import HttpResponse
from django.shortcuts import render

Expand Down Expand Up @@ -60,25 +56,3 @@ def proxito_404_page_handler(
)
r.status_code = http_status
return r


def allow_readme_html_as_index():
if not settings.RTD_ENFORCE_BROWNOUTS_FOR_DEPRECATIONS:
return True

tzinfo = pytz.timezone("America/Los_Angeles")
now = datetime.datetime.now(tz=tzinfo)

# Brownout dates as published in https://about.readthedocs.com/blog/2024/05/readme-html-deprecated/
# fmt: off
return not any([
# 12 hours browndate
datetime.datetime(2024, 6, 10, 0, 0, 0, tzinfo=tzinfo) < now < datetime.datetime(2024, 6, 10, 12, 0, 0, tzinfo=tzinfo),
# 24 hours browndate
datetime.datetime(2024, 6, 17, 0, 0, 0, tzinfo=tzinfo) < now < datetime.datetime(2024, 6, 18, 0, 0, 0, tzinfo=tzinfo),
# 48 hours browndate
datetime.datetime(2024, 6, 24, 0, 0, 0, tzinfo=tzinfo) < now < datetime.datetime(2024, 6, 26, 0, 0, 0, tzinfo=tzinfo),
# Deprecated after July 1st
datetime.datetime(2024, 7, 1, 0, 0, 0, tzinfo=tzinfo) < now,
])
# fmt: on