Skip to content

Commit 6608086

Browse files
committed
replace special anchor handling
This extension has tried to utilizing Confluence-generated anchor targets for sections over the years. However, there have been a lot of inconsistent target values with Confluence Cloud/v2 when sections have some characters (e.g. parenthesis). Confluence may inject `[inlineExtension]` prefixes in header identifiers or completely ignore anchor values that have these characters. While we attempted to account for these unique situations, issues keep arising as both Confluence and this extension evolves. To try to avoid any more issues, we will no longer attempt to map directly to pre-made section identifiers when these characters are detected. Since a recent change [1] now force-adds all docutils anchor entries for sections, when an issued section target is detected, we will instead use the section identifier value instead. [1]: ca031fa Signed-off-by: James Knight <git@jdknight.me>
1 parent eb44821 commit 6608086

File tree

3 files changed

+50
-42
lines changed

3 files changed

+50
-42
lines changed

sphinxcontrib/confluencebuilder/__init__.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -295,10 +295,6 @@ def setup(app):
295295
cm.add_conf_bool('confluence_adv_cloud')
296296
# Disable any delays when publishing property updates on Cloud
297297
cm.add_conf_bool('confluence_adv_disable_cloud_prop_delay')
298-
# Disable workaround for: https://jira.atlassian.com/browse/CONFCLOUD-74698
299-
cm.add_conf_bool('confluence_adv_disable_confcloud_74698')
300-
# Disable workaround for inline-extension anchor injection
301-
cm.add_conf_bool('confluence_adv_disable_confcloud_ieaj')
302298
# Disable any attempts to initialize this extension's custom entities.
303299
cm.add_conf_bool('confluence_adv_disable_init')
304300
# Flag to permit the use of embedded certificates from requests.
@@ -342,6 +338,8 @@ def setup(app):
342338
# replaced by confluence_space_key
343339
cm.add_conf('confluence_space_name')
344340
# dropped
341+
cm.add_conf_bool('confluence_adv_disable_confcloud_74698')
342+
cm.add_conf_bool('confluence_adv_disable_confcloud_ieaj')
345343
cm.add_conf_int('confluence_max_doc_depth')
346344

347345
# ##########################################################################

sphinxcontrib/confluencebuilder/builder.py

Lines changed: 25 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,12 @@
3737
from sphinxcontrib.confluencebuilder.storage.translator import ConfluenceStorageFormatTranslator
3838
from sphinxcontrib.confluencebuilder.transmute import doctree_transmute
3939
from sphinxcontrib.confluencebuilder.util import ConfluenceUtil
40+
from sphinxcontrib.confluencebuilder.util import ascii_quote
4041
from sphinxcontrib.confluencebuilder.util import detect_cloud
4142
from sphinxcontrib.confluencebuilder.util import extract_strings_from_file
4243
from sphinxcontrib.confluencebuilder.util import first
4344
from sphinxcontrib.confluencebuilder.util import handle_cli_file_subset
4445
from sphinxcontrib.confluencebuilder.writer import ConfluenceWriter
45-
from urllib.parse import quote
4646
import os
4747
import tempfile
4848
import time
@@ -1318,6 +1318,30 @@ def _register_doctree_targets(self, docname, doctree, title_track=None):
13181318
if last_title_postfix > 0:
13191319
title_target = f'{title_target}.{last_title_postfix}'
13201320

1321+
# The value of 'title_target' should now be what the name of the
1322+
# Confluence-header should be. However, there are a couple of cases
1323+
# where this (anchor) target cannot be used. In Confluence's v2
1324+
# editor, if a title includes non-basic characters, anchor links
1325+
# to the title can require encoding. Otherwise, Confluence will
1326+
# ignore/drop the provided anchor pages in the page. On top of
1327+
# this, if a section has some non-basic characters, Confluence (on
1328+
# v2) may also inject one or more `[inlineExtension]` string
1329+
# prefixes into a heading's identifier.
1330+
#
1331+
# Instead of dealing with any of this, if we detect a section is
1332+
# using any characters which may cause issues in building anchors
1333+
# or causing issues building links to these sections, we will not
1334+
# use the Confluence-generated identifier. Instead, we will pull
1335+
# the first section identifier value and use that as a target
1336+
# instead (as we now inject detected ids anchor values in headers).
1337+
#
1338+
# See: https://jira.atlassian.com/browse/CONFCLOUD-74698
1339+
if title_target != ascii_quote(title_target):
1340+
if 'ids' in section_node:
1341+
old_target = title_target
1342+
title_target = first(node.parent['ids'])
1343+
self.verbose(f'target replace {title_target}: {old_target}')
1344+
13211345
# If this section is the (first) root section, register a target
13221346
# for a "root" anchor point. This is important for references that
13231347
# link to documents (e.g. `:doc:<>`). For example, if "page-a"
@@ -1359,43 +1383,6 @@ def _register_doctree_targets(self, docname, doctree, title_track=None):
13591383
self._register_target(editor, full_id, node_refid)
13601384

13611385
def _register_target(self, editor, refid, target):
1362-
# v2 editor does not link anchors with select characters;
1363-
# provide a workaround that url encodes targets
1364-
#
1365-
# See: https://jira.atlassian.com/browse/CONFCLOUD-74698
1366-
if not self.config.confluence_adv_disable_confcloud_74698:
1367-
if editor == 'v2':
1368-
# We originally encoded specific characters to prevent
1369-
# Confluence from suppressing anchors for select characters,
1370-
# but it is unknown the extensive list of characters Confluence
1371-
# was not happy with. We then switch to `quote` which worked
1372-
# for the most part, but when users used Emoji's, these
1373-
# characters would become encoded and generate anchor targets
1374-
# with incorrect values. Now, we do a partial quote in an
1375-
# attempt to be flexible -- we quote the standard ASCII range
1376-
# using Python default safe sets and anything beyond it, we
1377-
# will just leave as is.
1378-
def partial_quote(s):
1379-
chars = [quote(x) if ord(x) < 128 else x for x in s]
1380-
return ''.join(chars)
1381-
1382-
new_target = partial_quote(target)
1383-
1384-
# So... related to CONFCLOUD-74698, something about anchors
1385-
# with special characters will cause some pain for links.
1386-
# This has been observed in the past, was removed after
1387-
# thinking it was not an issue but is now being added again.
1388-
# It appears that when a header is generated an identifier in
1389-
# Confluence Cloud that has special characters, we can observe
1390-
# Confluence prefixing these identifiers with two copies of
1391-
# `[inlineExtension]`. Cannot explain why, so if this situation
1392-
# occurs, just add the prefix data to help ensure links work.
1393-
if not self.config.confluence_adv_disable_confcloud_ieaj:
1394-
if new_target != target:
1395-
new_target = 2 * '[inlineExtension]' + new_target
1396-
1397-
target = new_target
1398-
13991386
self.state.register_target(refid, target)
14001387

14011388
# For singleconfluence, register global fallbacks for targets

sphinxcontrib/confluencebuilder/util.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from sphinxcontrib.confluencebuilder.std.confluence import FONT_X_HEIGHT
1212
from subprocess import check_call
1313
from hashlib import sha256
14+
from urllib.parse import quote
1415
from urllib.parse import urlparse
1516
import getpass
1617
import os
@@ -92,6 +93,28 @@ def normalize_base_url(url):
9293
return url
9394

9495

96+
def ascii_quote(text):
97+
"""
98+
quote the ascii character range of a string
99+
100+
This utility calls will return a URL quoted value of a string for all
101+
detected ASCII characters.
102+
103+
This is primarily used to help detect prospect anchor targets in Confluence
104+
where Confluence may silently remove anchors with unsupported characters
105+
(varies per editor).
106+
107+
Args:
108+
text: the text to quote
109+
110+
Returns:
111+
the quoted text
112+
"""
113+
114+
chars = [quote(x) if ord(x) < 128 else x for x in text]
115+
return ''.join(chars)
116+
117+
95118
def convert_length(value, unit, pct=True):
96119
"""
97120
convert a length value to a confluence-supported integer-equivalent value

0 commit comments

Comments
 (0)