pydata · drammock · Oct 7, 2024 · Aug 29, 2024 · Sep 12, 2024 · Sep 30, 2024
diff --git a/docs/community/topics/dependencies-js.md b/docs/community/topics/dependencies-js.md
@@ -8,7 +8,7 @@ There are two kinds of dependency definitions in this theme:
 To update or add a JS dependency, follow these steps:
 
 1. **Edit `package.json`** by adding or modifying a dependency.
-2. **Re-generate `package-lock.json`** in order to create a new set of frozen dependencies for the theme. To do this, run the following command from [the Sphinx Theme Builder](https://github.com/pradyunsg/sphinx-theme-builder).
+2. **Re-generate `package-lock.json`** in order to create a new set of frozen dependencies for the theme. To do this, run the following command from [the Sphinx Theme Builder](https://sphinx-theme-builder.readthedocs.io/en/latest/).
 
    ```
    stb npm install --include=dev

diff --git a/docs/community/topics/manual-dev.md b/docs/community/topics/manual-dev.md
@@ -18,7 +18,7 @@ To do so, use a tool like [conda](https://docs.conda.io/en/latest/), [mamba](htt
 Before you start, ensure that you have the following installed:
 
 - Python >= 3.9
-- [Pandoc](https://pandoc.org/installing.html): we use `nbsphinx` to support notebook (.ipynb) files in the documentation, which requires [installing Pandoc](https://pandoc.org/installing.html) at a system level (or within a Conda environment).
+- [Pandoc](https://pandoc.org/): we use `nbsphinx` to support notebook (`.ipynb`) files in the documentation, which requires [installing Pandoc](https://pandoc.org/installing.html) at a system level (or within a Conda environment).
 
 ## Clone the repository locally
 
@@ -66,7 +66,7 @@ To manually open a server to watch your documentation for changes, build them, a
 $ stb serve docs --open-browser
 ```
 
-## Run the tests
+## Manually Run the tests
 
 To manually run the tests for this theme, first set up your environment locally, and then run:
 

diff --git a/docs/user_guide/accessibility.md b/docs/user_guide/accessibility.md
@@ -69,7 +69,7 @@ Site maps, usually served from a file called `sitemap.xml` are a broadly-employe
 approach to telling programs like search engines and assistive technologies where
 different content appears on a website.
 
-If using a service like [ReadTheDocs](https://readthedocs.com), these files
+If using a service like [ReadTheDocs](https://about.readthedocs.com/), these files
 will be created for you _automatically_, but for some other approaches below,
 it's handy to generate a `sitemap.xml` locally or in CI with a tool like
 [sphinx-sitemap](https://pypi.org/project/sphinx-sitemap/).

diff --git a/docs/user_guide/indices.rst b/docs/user_guide/indices.rst
@@ -19,4 +19,4 @@ By design the indices pages are not linked in a documentation generated with thi
 
 .. note::
 
-    Don't forget to add back the ``"sidebar-ethical-ads.html"`` template if you are serving your documentation using `ReadTheDocs <https://readthedocs.org>`__.
+    Don't forget to add back the ``"sidebar-ethical-ads.html"`` template if you are serving your documentation using `ReadTheDocs <https://about.readthedocs.com/>`__.
diff --git a/tools/divergent_links.py b/tools/divergent_links.py
@@ -0,0 +1,106 @@
+"""This script help checking divergent links.
+
+That is to say, links to the same page,
+that have different titles.
+"""
+
+import os
+import sys
+from collections import defaultdict
+
+from bs4 import BeautifulSoup
+
+ignores = [
+    "#",
+    "next",
+    "previous",
+    "[source]",
+    "edit on github",
+    "[docs]",
+    "read more ...",
+    "show source",
+    "module",
+]
+
+
+def find_html_files(folder_path):
+    """Find all html files in given folder."""
+    html_files = []
+    for root, dirs, files in os.walk(folder_path):
+        for file in files:
+            if file.endswith(".html"):
+                html_files.append(os.path.join(root, file))
+    return html_files
+
+
+class Checker:
+    """Link checker."""
+
+    links: dict[str, list]
+
+    def __init__(self):
+        self.links = defaultdict(list)
+
+    def scan(self, html_content, identifier):
+        """Scan given file for html links."""
+        # Parse the HTML content using BeautifulSoup
+        soup = BeautifulSoup(html_content, "html.parser")
+
+        # Dictionary to store URLs and their corresponding titles
+
+        # Extract all anchor tags
+        for a_tag in soup.find_all("a", href=True):
+            url = a_tag["href"]
+            if url.startswith("#"):
+                continue
+            content = a_tag.text.strip().lower()
+            if content in ignores:
+                continue
+            if content.split("\n")[0] in ignores:
+                continue
+            from urllib.parse import urljoin
+
+            fullurl = urljoin(identifier, url)
+            self.links[content].append((fullurl, identifier))
+
+    def duplicates(self):
+        """Print potential duplicates."""
+        for content, url_pages in self.links.items():
+            uniq_url = {u for u, _ in url_pages}
+            if len(uniq_url) >= 2:
+                print(
+                    f"{len(url_pages)} time {content!r} has {len(uniq_url)} on divergent url on :"
+                )
+                dct = defaultdict(list)
+                for u, p in url_pages:
+                    dct[u].append(p)
+                for u, ps in dct.items():
+                    print(" ", u, "in")
+                    for p in ps:
+                        print("    ", p)
+
+
+# Example usage
+data = """
+<html>
+  <body>
+    <a href="https://example.com" title="Example Site">Visit Example</a>
+    <a href="https://example.com" title="Example Website">Check Example</a>
+    <a href="https://openai.com" title="OpenAI">Visit OpenAI</a>
+    <a href="https://openai.com" title="OpenAI">Learn about OpenAI</a>
+  </body>
+</html>
+"""
+
+c = Checker()
+# Call the function and print results
+# inconsistencies = c.scan(data, "C0")
+
+print(sys.argv)
+
+for file in find_html_files(sys.argv[1]):
+    with open(file) as f:
+        data = f.read()
+    c.scan(data, file)
+
+c.duplicates()
Original file line number	Diff line number	Diff line change
Expand Up		@@ -19,4 +19,4 @@ By design the indices pages are not linked in a documentation generated with thi

		.. note::

		Don't forget to add back the ``"sidebar-ethical-ads.html"`` template if you are serving your documentation using `ReadTheDocs <https://readthedocs.org>`__.
		Don't forget to add back the ``"sidebar-ethical-ads.html"`` template if you are serving your documentation using `ReadTheDocs <https://about.readthedocs.com/>`__.