@@ -40,7 +40,7 @@ function update_canonical_links_for_version(
4040 canonical_href = Gumbo. getattr (e, " href" , nothing )
4141 if canonical_href != new_canonical_href
4242 Gumbo. setattr! (e, " href" , new_canonical_href)
43- @warn " canonical_href updated" canonical_href new_canonical_href fileinfo. relpath
43+ @debug " update_canonical_links_for_version: canonical_href updated" canonical_href new_canonical_href fileinfo. relpath
4444 dom_updated = true
4545 end
4646 end
@@ -55,7 +55,7 @@ function update_canonical_links_for_version(
5555 )
5656 )
5757 push! (e. children, canonical_href_element)
58- @warn " Added new canonical_href" new_canonical_href fileinfo. relpath
58+ @debug " update_canonical_links_for_version: added new canonical_href" new_canonical_href fileinfo. relpath
5959 dom_updated = true
6060 break
6161 end
7272is_canonical_element (e) = (e isa Gumbo. HTMLElement) && (Gumbo. tag (e) == :link ) && (Gumbo. getattr (e, " rel" , nothing ) == " canonical" )
7373joinurl (ps:: AbstractString... ) = join (ps, ' /' )
7474
75+ """
76+ Takes the multi-versioned Documenter site in `docs_directory` and updates the HTML canonical URLs
77+ to point to `canonical`.
78+ """
7579function update_canonical_links (
7680 docs_directory:: AbstractString ;
7781 canonical:: AbstractString
@@ -85,67 +89,7 @@ function update_canonical_links(
8589 redirect_url = get_meta_redirect_url (redirect_index_html_path)
8690 splitpath (normpath (redirect_url))
8791 else
88- # Try to extract the list of versions from versions.js
89- versions_js = joinpath (docs_directory, " versions.js" )
90- isfile (versions_js) || throw (ArgumentError (" versions.js is missing in $(docs_directory) " ))
91- versions = map (extract_versions_list (versions_js)) do version_str
92- isversion, version_number = if occursin (Base. VERSION_REGEX, version_str)
93- true , VersionNumber (version_str)
94- else
95- false , nothing
96- end
97- fullpath = joinpath (docs_directory, version_str)
98- return (;
99- path = version_str,
100- path_exists = isdir (fullpath) || islink (fullpath),
101- symlink = islink (fullpath),
102- isversion,
103- version_number,
104- fullpath,
105- )
106- end
107- # We'll filter out a couple of potential bad cases and issue warnings
108- filter (versions) do vi
109- if ! vi. path_exists
110- @warn " update_canonical_links: path does not exists or is not a directory" docs_directory vi
111- return false
112- end
113- return true
114- end
115- # We need to determine the canonical path. This would usually be something like the stable/
116- # directory, but it can have a different name, including being a version number. So first we
117- # try to find a non-version directory _that is a symlink_ (so that it wouldn't get confused)
118- # previews/ or dev builds. If that fails, we try to find the directory matching `v[0-9]+`,
119- # with the highest version number. This does not cover all possible cases, but should be good
120- # enough for now.
121- if isempty (versions)
122- error (" Unable to determine the canonical path. Found no version directories" )
123- end
124-
125- non_version_symlinks = filter (vi -> ! vi. isversion && vi. symlink, versions)
126- canonical_version = if isempty (non_version_symlinks)
127- # We didn't find any non-version symlinks, so we'll try to find the vN directory now
128- # as a fallback.
129- version_symlinks = map (versions) do vi
130- if ! (vi. symlink && vi. isversion)
131- return nothing
132- end
133- m = match (r" ^([0-9]+)$" , vi. path)
134- isnothing (m) && return nothing
135- parse (Int, m[1 ]) => vi
136- end
137- filter! (! isnothing, version_symlinks)
138- if isempty (version_symlinks)
139- error (" Unable to determine the canonical path. Found no version directories" )
140- end
141- _, idx = findmax (first, version_symlinks)
142- version_symlinks[idx][2 ]
143- elseif length (non_version_symlinks) > 1
144- error (" Unable to determine the canonical path. Found multiple non-version symlinks.\n $(non_version_symlinks) " )
145- else
146- only (non_version_symlinks)
147- end
148- (canonical_version. path,)
92+ canonical_version_from_versions_js (docs_directory)
14993 end
15094 canonical_full_root = joinurl (canonical, canonical_path... )
15195 # If we have determined which version should be the canonical version, we can actually
@@ -170,31 +114,22 @@ function update_canonical_links(
170114 continue
171115 end
172116 # Finally, we can run update_canonical_links_for_version on the directory.
173- @info " Updating canonical URLs for" docs_directory filename canonical_full_root
117+ @debug " Updating canonical URLs for version " docs_directory filename canonical_full_root
174118 update_canonical_links_for_version (path; canonical = canonical_full_root)
175119 end
176120end
177121
178- function extract_versions_list (versions_js:: AbstractString )
179- versions_js = abspath (versions_js)
180- isfile (versions_js) || throw (ArgumentError (" No such file: $(versions_js) " ))
181- versions_js_content = read (versions_js, String)
182- m = match (r" var\s +DOC_VERSIONS\s *=\s *\[ ([0-9A-Za-z\"\s .,+-]+)\] " , versions_js_content)
183- if isnothing (m)
184- throw (ArgumentError ("""
185- Could not find DOC_VERSIONS in $(versions_js) :
186- $(versions_js_content) """ ))
187- end
188- versions = strip .(c -> isspace (c) || (c == ' "' ), split (m[1 ], " ," ))
189- filter! (! isempty, versions)
190- if isempty (versions)
191- throw (ArgumentError ("""
192- DOC_VERSIONS empty in $(versions_js) :
193- $(versions_js_content) """ ))
194- end
195- return versions
122+ function canonical_directory_from_redirect_index_html (docs_directory:: AbstractString )
123+ redirect_index_html_path = joinpath (docs_directory, " index.html" )
124+ isfile (redirect_index_html_path) || return nothing
125+ redirect_url = get_meta_redirect_url (redirect_index_html_path)
126+ splitpath (normpath (redirect_url))
196127end
197128
129+ """
130+ Parses the HTML file at `indexhtml_path` and tries to extract the `url=...` value
131+ of the redirect `<meta http-equiv="refresh" ...>` tag.
132+ """
198133function get_meta_redirect_url (indexhtml_path:: AbstractString )
199134 html = Gumbo. parsehtml (read (indexhtml_path, String))
200135 for e in AbstractTrees. PreOrderDFS (html. root)
@@ -215,3 +150,86 @@ function get_meta_redirect_url(indexhtml_path::AbstractString)
215150 end
216151 return nothing
217152end
153+
154+ function canonical_version_from_versions_js (docs_directory)
155+ isdir (docs_directory) || throw (ArgumentError (" Not a directory: $(docs_directory) " ))
156+ # Try to extract the list of versions from versions.js
157+ versions_js = joinpath (docs_directory, " versions.js" )
158+ isfile (versions_js) || throw (ArgumentError (" versions.js is missing in $(docs_directory) " ))
159+ versions = map (extract_versions_list (versions_js)) do version_str
160+ isversion, version_number = if occursin (Base. VERSION_REGEX, version_str)
161+ true , VersionNumber (version_str)
162+ else
163+ false , nothing
164+ end
165+ fullpath = joinpath (docs_directory, version_str)
166+ return (;
167+ path = version_str,
168+ path_exists = isdir (fullpath) || islink (fullpath),
169+ symlink = islink (fullpath),
170+ isversion,
171+ version_number,
172+ fullpath,
173+ )
174+ end
175+ # We'll filter out a couple of potential bad cases and issue warnings
176+ filter (versions) do vi
177+ if ! vi. path_exists
178+ @warn " update_canonical_links: path does not exists or is not a directory" docs_directory vi
179+ return false
180+ end
181+ return true
182+ end
183+ # We need to determine the canonical path. This would usually be something like the stable/
184+ # directory, but it can have a different name, including being a version number. So first we
185+ # try to find a non-version directory _that is a symlink_ (so that it wouldn't get confused)
186+ # previews/ or dev builds. If that fails, we try to find the directory matching `v[0-9]+`,
187+ # with the highest version number. This does not cover all possible cases, but should be good
188+ # enough for now.
189+ if isempty (versions)
190+ error (" Unable to determine the canonical path. Found no version directories" )
191+ end
192+
193+ non_version_symlinks = filter (vi -> ! vi. isversion && vi. symlink, versions)
194+ canonical_version = if isempty (non_version_symlinks)
195+ # We didn't find any non-version symlinks, so we'll try to find the vN directory now
196+ # as a fallback.
197+ version_symlinks = map (versions) do vi
198+ m = match (r" ^v([0-9]+)$" , vi. path)
199+ isnothing (m) && return nothing
200+ parse (Int, m[1 ]) => vi
201+ end
202+ filter! (! isnothing, version_symlinks)
203+ if isempty (version_symlinks)
204+ error (" Unable to determine the canonical path. Found no version directories" )
205+ end
206+ _, idx = findmax (first, version_symlinks)
207+ version_symlinks[idx][2 ]
208+ elseif length (non_version_symlinks) > 1
209+ error (" Unable to determine the canonical path. Found multiple non-version symlinks.\n $(non_version_symlinks) " )
210+ else
211+ only (non_version_symlinks)
212+ end
213+
214+ return canonical_version. path
215+ end
216+
217+ function extract_versions_list (versions_js:: AbstractString )
218+ versions_js = abspath (versions_js)
219+ isfile (versions_js) || throw (ArgumentError (" No such file: $(versions_js) " ))
220+ versions_js_content = read (versions_js, String)
221+ m = match (r" var\s +DOC_VERSIONS\s *=\s *\[ ([0-9A-Za-z\"\s .,+-]+)\] " , versions_js_content)
222+ if isnothing (m)
223+ throw (ArgumentError ("""
224+ Could not find DOC_VERSIONS in $(versions_js) :
225+ $(versions_js_content) """ ))
226+ end
227+ versions = strip .(c -> isspace (c) || (c == ' "' ), split (m[1 ], " ," ))
228+ filter! (! isempty, versions)
229+ if isempty (versions)
230+ throw (ArgumentError ("""
231+ DOC_VERSIONS empty in $(versions_js) :
232+ $(versions_js_content) """ ))
233+ end
234+ return versions
235+ end
0 commit comments