openstax · TylerZeroMaster · Apr 1, 2025 · Mar 25, 2025 · Mar 25, 2025 · Mar 31, 2025
diff --git a/bakery-src/scripts/link_rex.py b/bakery-src/scripts/link_rex.py
@@ -4,17 +4,14 @@
 
 from lxml import etree
 
-from .utils import unformatted_rex_links
+from .utils import unformatted_rex_links, build_rex_url
 from .profiler import timed
 
 
 @timed
 def update_doc_links(doc, book_slugs_by_uuid=None):
     """Modify links in doc"""
 
-    def _rex_url_builder(book, page):
-        return f"http://openstax.org/books/{book}/pages/{page}"
-
     external_link_elems = unformatted_rex_links(doc)
 
     for node in external_link_elems:
@@ -27,7 +24,7 @@ def _rex_url_builder(book, page):
             external_book_slug = book_slugs_by_uuid[
                 external_book_uuid] if book_slugs_by_uuid else node.attrib["data-book-slug"]
             external_page_slug = node.attrib["data-page-slug"]
-            node.attrib["href"] = _rex_url_builder(
+            node.attrib["href"] = build_rex_url(
                 external_book_slug, external_page_slug
             )
             print('AFTER!!:')

diff --git a/bakery-src/scripts/link_single.py b/bakery-src/scripts/link_single.py
@@ -13,6 +13,7 @@
 from .cnx_models import flatten_to_documents
 from lxml import etree
 from .profiler import timed
+from .utils import build_rex_url
 
 
 @timed
@@ -144,6 +145,7 @@ def transform_links(
     book_metadata = parse_book_metadata(binders, baked_meta_dir)
 
     uuid_by_slug = {entry["slug"]: entry["id"] for entry in book_metadata}
+    slug_by_uuid = dict(zip(*list(zip(*uuid_by_slug.items()))[::-1]))
     book_tree_by_uuid = {
         entry["id"]: entry["tree"] for entry in book_metadata
     }
@@ -152,6 +154,26 @@ def transform_links(
         book_tree_by_uuid
     )
 
+    for node in doc.xpath(
+        '//x:a[@data-needs-rex-link="true"]',
+        namespaces={"x": "http://www.w3.org/1999/xhtml"},
+    ):
+        target_module_uuid = node.xpath(
+            'ancestor::*[@data-type="page"]/@id',
+            namespaces={"x": "http://www.w3.org/1999/xhtml"},
+        )[0]
+        if target_module_uuid.startswith("page_"):
+            target_module_uuid = target_module_uuid[5:]
+        canonical_book_uuid = canonical_map.get(target_module_uuid)
+        assert canonical_book_uuid, \
+            f'Could not find book for page: {target_module_uuid}'
+        book_slug = slug_by_uuid.get(canonical_book_uuid)
+        assert book_slug, f'Could not find slug for book: {canonical_book_uuid}'
+        page_slug = page_slug_resolver(canonical_book_uuid, target_module_uuid)
+        assert page_slug, f'Could not find slug for page: {target_module_uuid}'
+        node.attrib['href'] = build_rex_url(book_slug, page_slug)
+        del node.attrib['data-needs-rex-link']
+
     # look up uuids for external module links
     for node in doc.xpath(
             '//x:a[@href and starts-with(@href, "/contents/")]',
@@ -173,9 +195,7 @@ def transform_links(
             raise Exception(
                 f"Could not find canonical book for {target_module_uuid}"
             )
-        canonical_book_slug = next(
-            (slug for slug, uuid in uuid_by_slug.items()
-             if uuid == canonical_book_uuid))
+        canonical_book_slug = slug_by_uuid[canonical_book_uuid]
 
         page_slug = page_slug_resolver(canonical_book_uuid, target_module_uuid)
         if page_slug is None:

diff --git a/bakery-src/scripts/utils.py b/bakery-src/scripts/utils.py
@@ -382,3 +382,7 @@ def patch_math_for_pandoc(doc, math_el_namespace):
     ):
         print("Found \\u0338 in math: converting to mtext", file=sys.stderr)
         node.tag = f"{{{math_el_namespace}}}mtext"
+
+
+def build_rex_url(book, page):
+    return f"http://openstax.org/books/{book}/pages/{page}"
diff --git a/bakery-src/tests/test_bakery_scripts.py b/bakery-src/tests/test_bakery_scripts.py
@@ -2857,6 +2857,7 @@ def test_link_single(tmp_path, mocker):
         <div data-type="metadata" style="display: none;">
         <h1 data-type="document-title" itemprop="name">Page1</h1>
         <span data-type="canonical-book-uuid" data-value="1ba7e813-2d8a-4b73-87a1-876cfb5e7b58"/>
+        <a data-check-rex-link="true" data-needs-rex-link="true">LINK 1</a>
         </div>
         <p><a id="l1"
             href="/contents/4aa9351c-019f-4c06-bb40-d58262ea7ec7"
@@ -2869,6 +2870,7 @@ def test_link_single(tmp_path, mocker):
         <div data-type="metadata" style="display: none;">
         <h1 data-type="document-title" itemprop="name">Page2</h1>
         <span data-type="canonical-book-uuid" data-value="1ba7e813-2d8a-4b73-87a1-876cfb5e7b58"/>
+        <a data-check-rex-link="true" data-needs-rex-link="true">LINK 2</a>
         </div>
         <p><a id="l3"
             href="/contents/9f049b16-15e9-4725-8c8b-4908a3e2be5e"
@@ -3009,6 +3011,30 @@ def test_link_single(tmp_path, mocker):
     check_links = [link.items() for link in parsed_links]
 
     assert check_links == expected_links
+    # GIVEN: Anchors marked to be updated with rex links
+    rex_links = tree.xpath(
+        '//x:a[@data-check-rex-link = "true"]',
+        namespaces={"x": "http://www.w3.org/1999/xhtml"},
+    )
+    # WHEN: The links are updated
+    # THEN: 1. Their href changes
+    #       2. The marker attribute is removed
+    #       3. Their text is unchanged (this text may be localized)
+    #       4. Other attributes are unchanged
+    check_links = [list(link.items()) + [('text', link.text)] for link in rex_links]
+    expected_links = [
+        [
+            ('data-check-rex-link', 'true'),
+            ('href', 'http://openstax.org/books/book1/pages/book1-page1'),
+            ('text', 'LINK 1'),
+        ],
+        [
+            ('data-check-rex-link', 'true'),
+            ('href', 'http://openstax.org/books/book1/pages/book1-page2'),
+            ('text', 'LINK 2'),
+        ],
+    ]
+    assert check_links == expected_links
 
 
 def test_link_single_with_flag(tmp_path, mocker):