|
37 | 37 | from sphinxcontrib.confluencebuilder.storage.translator import ConfluenceStorageFormatTranslator |
38 | 38 | from sphinxcontrib.confluencebuilder.transmute import doctree_transmute |
39 | 39 | from sphinxcontrib.confluencebuilder.util import ConfluenceUtil |
| 40 | +from sphinxcontrib.confluencebuilder.util import ascii_quote |
40 | 41 | from sphinxcontrib.confluencebuilder.util import detect_cloud |
41 | 42 | from sphinxcontrib.confluencebuilder.util import extract_strings_from_file |
42 | 43 | from sphinxcontrib.confluencebuilder.util import first |
43 | 44 | from sphinxcontrib.confluencebuilder.util import handle_cli_file_subset |
44 | 45 | from sphinxcontrib.confluencebuilder.writer import ConfluenceWriter |
45 | | -from urllib.parse import quote |
46 | 46 | import os |
47 | 47 | import tempfile |
48 | 48 | import time |
@@ -1318,6 +1318,30 @@ def _register_doctree_targets(self, docname, doctree, title_track=None): |
1318 | 1318 | if last_title_postfix > 0: |
1319 | 1319 | title_target = f'{title_target}.{last_title_postfix}' |
1320 | 1320 |
|
| 1321 | + # The value of 'title_target' should now be what the name of the |
| 1322 | + # Confluence-header should be. However, there are a couple of cases |
| 1323 | + # where this (anchor) target cannot be used. In Confluence's v2 |
| 1324 | + # editor, if a title includes non-basic characters, anchor links |
| 1325 | + # to the title can require encoding. Otherwise, Confluence will |
| 1326 | + # ignore/drop the provided anchor pages in the page. On top of |
| 1327 | + # this, if a section has some non-basic characters, Confluence (on |
| 1328 | + # v2) may also inject one or more `[inlineExtension]` string |
| 1329 | + # prefixes into a heading's identifier. |
| 1330 | + # |
| 1331 | + # Instead of dealing with any of this, if we detect a section is |
| 1332 | + # using any characters which may cause issues in building anchors |
| 1333 | + # or causing issues building links to these sections, we will not |
| 1334 | + # use the Confluence-generated identifier. Instead, we will pull |
| 1335 | + # the first section identifier value and use that as a target |
| 1336 | + # instead (as we now inject detected ids anchor values in headers). |
| 1337 | + # |
| 1338 | + # See: https://jira.atlassian.com/browse/CONFCLOUD-74698 |
| 1339 | + if title_target != ascii_quote(title_target): |
| 1340 | + if 'ids' in section_node: |
| 1341 | + old_target = title_target |
| 1342 | + title_target = first(node.parent['ids']) |
| 1343 | + self.verbose(f'target replace {title_target}: {old_target}') |
| 1344 | + |
1321 | 1345 | # If this section is the (first) root section, register a target |
1322 | 1346 | # for a "root" anchor point. This is important for references that |
1323 | 1347 | # link to documents (e.g. `:doc:<>`). For example, if "page-a" |
@@ -1359,43 +1383,6 @@ def _register_doctree_targets(self, docname, doctree, title_track=None): |
1359 | 1383 | self._register_target(editor, full_id, node_refid) |
1360 | 1384 |
|
1361 | 1385 | def _register_target(self, editor, refid, target): |
1362 | | - # v2 editor does not link anchors with select characters; |
1363 | | - # provide a workaround that url encodes targets |
1364 | | - # |
1365 | | - # See: https://jira.atlassian.com/browse/CONFCLOUD-74698 |
1366 | | - if not self.config.confluence_adv_disable_confcloud_74698: |
1367 | | - if editor == 'v2': |
1368 | | - # We originally encoded specific characters to prevent |
1369 | | - # Confluence from suppressing anchors for select characters, |
1370 | | - # but it is unknown the extensive list of characters Confluence |
1371 | | - # was not happy with. We then switch to `quote` which worked |
1372 | | - # for the most part, but when users used Emoji's, these |
1373 | | - # characters would become encoded and generate anchor targets |
1374 | | - # with incorrect values. Now, we do a partial quote in an |
1375 | | - # attempt to be flexible -- we quote the standard ASCII range |
1376 | | - # using Python default safe sets and anything beyond it, we |
1377 | | - # will just leave as is. |
1378 | | - def partial_quote(s): |
1379 | | - chars = [quote(x) if ord(x) < 128 else x for x in s] |
1380 | | - return ''.join(chars) |
1381 | | - |
1382 | | - new_target = partial_quote(target) |
1383 | | - |
1384 | | - # So... related to CONFCLOUD-74698, something about anchors |
1385 | | - # with special characters will cause some pain for links. |
1386 | | - # This has been observed in the past, was removed after |
1387 | | - # thinking it was not an issue but is now being added again. |
1388 | | - # It appears that when a header is generated an identifier in |
1389 | | - # Confluence Cloud that has special characters, we can observe |
1390 | | - # Confluence prefixing these identifiers with two copies of |
1391 | | - # `[inlineExtension]`. Cannot explain why, so if this situation |
1392 | | - # occurs, just add the prefix data to help ensure links work. |
1393 | | - if not self.config.confluence_adv_disable_confcloud_ieaj: |
1394 | | - if new_target != target: |
1395 | | - new_target = 2 * '[inlineExtension]' + new_target |
1396 | | - |
1397 | | - target = new_target |
1398 | | - |
1399 | 1386 | self.state.register_target(refid, target) |
1400 | 1387 |
|
1401 | 1388 | # For singleconfluence, register global fallbacks for targets |
|
0 commit comments