Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -4072,8 +4072,8 @@ version = "0.0.0"
dependencies = [
"expect-test",
"memchr",
"unicode-ident",
"unicode-properties",
"unicode-xid",
]

[[package]]
Expand Down Expand Up @@ -5890,24 +5890,24 @@ checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539"

[[package]]
name = "unicode-ident"
version = "1.0.18"
version = "1.0.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"

[[package]]
name = "unicode-normalization"
version = "0.1.24"
version = "0.1.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956"
checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8"
dependencies = [
"tinyvec",
]

[[package]]
name = "unicode-properties"
version = "0.1.3"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e70f2a8b45122e719eb623c01822704c4e0907e7e426a05927e1a1cfff5b75d0"
checksum = "7df058c713841ad818f1dc5d3fd88063241cc61f49f5fbea4b951e8cf5a8d71d"

[[package]]
name = "unicode-script"
Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_lexer/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ Rust lexer used by rustc. No stability guarantees are provided.
# Note that this crate purposefully does not depend on other rustc crates
[dependencies]
memchr = "2.7.6"
unicode-properties = { version = "0.1.0", default-features = false, features = ["emoji"] }
unicode-xid = "0.2.0"
unicode-properties = { version = "0.1.4", default-features = false, features = ["emoji"] }
unicode-ident = "1.0.22"

[dev-dependencies]
expect-test = "1.4.0"
6 changes: 3 additions & 3 deletions compiler/rustc_lexer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ use LiteralKind::*;
use TokenKind::*;
use cursor::EOF_CHAR;
pub use cursor::{Cursor, FrontmatterAllowed};
pub use unicode_ident::UNICODE_VERSION as UNICODE_IDENT_VERSION;
use unicode_properties::UnicodeEmoji;
pub use unicode_xid::UNICODE_VERSION as UNICODE_XID_VERSION;

/// Parsed token.
/// It doesn't contain information about data that has been parsed,
Expand Down Expand Up @@ -370,14 +370,14 @@ pub fn is_horizontal_whitespace(c: char) -> bool {
/// a formal definition of valid identifier name.
pub fn is_id_start(c: char) -> bool {
// This is XID_Start OR '_' (which formally is not a XID_Start).
c == '_' || unicode_xid::UnicodeXID::is_xid_start(c)
c == '_' || unicode_ident::is_xid_start(c)
}

/// True if `c` is valid as a non-first character of an identifier.
/// See [Rust language reference](https://doc.rust-lang.org/reference/identifiers.html) for
/// a formal definition of valid identifier name.
pub fn is_id_continue(c: char) -> bool {
unicode_xid::UnicodeXID::is_xid_continue(c)
unicode_ident::is_xid_continue(c)
}

/// The passed string is lexically an identifier.
Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_parse/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ rustc_session = { path = "../rustc_session" }
rustc_span = { path = "../rustc_span" }
thin-vec = "0.2.12"
tracing = "0.1"
unicode-normalization = "0.1.11"
unicode-width = "0.2.0"
unicode-normalization = "0.1.25"
unicode-width = "0.2.2"
# tidy-alphabetical-end

[dev-dependencies]
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_span/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,5 @@ scoped-tls = "1.0"
sha1 = "0.10.0"
sha2 = "0.10.1"
tracing = "0.1"
unicode-width = "0.2.0"
unicode-width = "0.2.2"
# tidy-alphabetical-end
1 change: 0 additions & 1 deletion src/tools/tidy/src/deps.rs
Original file line number Diff line number Diff line change
Expand Up @@ -454,7 +454,6 @@ const PERMITTED_RUSTC_DEPENDENCIES: &[&str] = &[
"unicode-script",
"unicode-security",
"unicode-width",
"unicode-xid",
"utf8parse",
"valuable",
"version_check",
Expand Down
7 changes: 5 additions & 2 deletions tests/ui-fulldeps/lexer/unicode-version.rs
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since all the relevant crates seem to export some flavour of the UNICODE_VERSION constant included in the standard library, perhaps there could be a simple test somewhere that asserts that all these constants are identical? That way manual review of the versions isn't required.

At the moment there is this test in place, checking that rustc_lexer::UNICODE_IDENT_VERSION == rustc_parse::UNICODE_NORMALIZATION_VERSION

Would expanding this test to all the Unicode-version dependent crates used in /compiler/* be enough?

Copy link
Contributor

@clarfonthey clarfonthey Dec 2, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, to clarify, this is exactly what I was proposing in this comment: #148321 (comment)

I was looking into it, and my particular method I was going with was just adding an anonymous constant in each of the crates to make a static assertion. So, for example, in rustc_lexer, this is what I currently have right now in my (unpublished) branch:

// ensure that unicode version is same as libstd
const _: () = {
    let internal = std::char::UNICODE_VERSION;
    let properties = unicode_properties::UNICODE_VERSION;
    assert!(internal.0 as u64 == properties.0);
    assert!(internal.1 as u64 == properties.1);
    assert!(internal.2 as u64 == properties.2);
};
const _: () = {
    let internal = std::char::UNICODE_VERSION;
    let xid = unicode_xid::UNICODE_VERSION;
    assert!(internal.0 as u64 == xid.0);
    assert!(internal.1 as u64 == xid.1);
    assert!(internal.2 as u64 == xid.2);
};

(note: I'm using libstd's version as an anchor because it's the most convenient: if all of them match, then libstd should match. also, while I could use unstable features to make this simpler with assert_eq, I chose to go with something that works on stable atm just for ease of maintenance)

Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ fn main() {
it should also be updated in the reference at \
https://github.com/rust-lang/reference/blob/HEAD/src/identifiers.md."
);
println!("Unicode XID version is: {:?}", rustc_lexer::UNICODE_XID_VERSION);
println!("Unicode normalization version is: {:?}", rustc_parse::UNICODE_NORMALIZATION_VERSION);
println!("Unicode version of unicode-ident is: {:?}", rustc_lexer::UNICODE_IDENT_VERSION);
println!(
"Unicode version of unicode-normalization is: {:?}",
rustc_parse::UNICODE_NORMALIZATION_VERSION
);
}
4 changes: 2 additions & 2 deletions tests/ui-fulldeps/lexer/unicode-version.run.stdout
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Checking if Unicode version changed.
If the Unicode version changes are intentional, it should also be updated in the reference at https://github.com/rust-lang/reference/blob/HEAD/src/identifiers.md.
Unicode XID version is: (16, 0, 0)
Unicode normalization version is: (16, 0, 0)
Unicode version of unicode-ident is: (17, 0, 0)
Unicode version of unicode-normalization is: (17, 0, 0)
Loading