@@ -10,6 +10,7 @@ use alloc::vec;
1010use alloc:: vec:: Vec ;
1111use core:: char;
1212use core:: str:: CharIndices ;
13+ use icu_locale_core:: subtags:: language;
1314use icu_provider:: prelude:: * ;
1415use utf8_iter:: Utf8CharIndices ;
1516
@@ -183,29 +184,29 @@ pub enum LineBreakWordOption {
183184
184185/// Options to tailor line-breaking behavior.
185186#[ non_exhaustive]
186- #[ derive( Copy , Clone , PartialEq , Eq , Debug ) ]
187+ #[ derive( Clone , PartialEq , Eq , Debug ) ]
187188pub struct LineBreakOptions {
188189 /// Strictness of line-breaking rules. See [`LineBreakStrictness`].
189190 pub strictness : LineBreakStrictness ,
190191
191192 /// Line break opportunities between letters. See [`LineBreakWordOption`].
192193 pub word_option : LineBreakWordOption ,
193194
194- /// Use `true` as a hint to the line segmenter that the writing
195- /// system is Chinese or Japanese. This allows more break opportunities when
196- /// `LineBreakStrictness` is `Normal` or `Loose`. See
197- /// <https://drafts.csswg.org/css-text-3/#line-break-property> for details.
195+ /// Content locale for line segmenter
198196 ///
197+ /// This allows more break opportunities when `LineBreakStrictness` is
198+ /// `Normal` or `Loose`. See
199+ /// <https://drafts.csswg.org/css-text-3/#line-break-property> for details.
199200 /// This option has no effect in Latin-1 mode.
200- pub ja_zh : bool ,
201+ pub content_locale : Option < DataLocale > ,
201202}
202203
203204impl Default for LineBreakOptions {
204205 fn default ( ) -> Self {
205206 Self {
206207 strictness : LineBreakStrictness :: Strict ,
207208 word_option : LineBreakWordOption :: Normal ,
208- ja_zh : false ,
209+ content_locale : None ,
209210 }
210211 }
211212}
@@ -303,7 +304,7 @@ pub type LineBreakIteratorUtf16<'l, 's> = LineBreakIterator<'l, 's, LineBreakTyp
303304/// let mut options = LineBreakOptions::default();
304305/// options.strictness = LineBreakStrictness::Strict;
305306/// options.word_option = LineBreakWordOption::BreakAll;
306- /// options.ja_zh = false ;
307+ /// options.content_locale = None ;
307308/// let segmenter = LineSegmenter::new_auto_with_options(options);
308309///
309310/// let breakpoints: Vec<usize> =
@@ -641,6 +642,11 @@ impl LineSegmenter {
641642 ///
642643 /// There are always breakpoints at 0 and the string length, or only at 0 for the empty string.
643644 pub fn segment_str < ' l , ' s > ( & ' l self , input : & ' s str ) -> LineBreakIteratorUtf8 < ' l , ' s > {
645+ let ja_zh = if let Some ( content_locale) = & self . options . content_locale {
646+ content_locale. language == language ! ( "ja" ) || content_locale. language == language ! ( "zh" )
647+ } else {
648+ false
649+ } ;
644650 LineBreakIterator {
645651 iter : input. char_indices ( ) ,
646652 len : input. len ( ) ,
@@ -649,6 +655,7 @@ impl LineSegmenter {
649655 data : self . payload . get ( ) ,
650656 options : & self . options ,
651657 complex : & self . complex ,
658+ ja_zh,
652659 }
653660 }
654661 /// Creates a line break iterator for a potentially ill-formed UTF8 string
@@ -660,6 +667,11 @@ impl LineSegmenter {
660667 & ' l self ,
661668 input : & ' s [ u8 ] ,
662669 ) -> LineBreakIteratorPotentiallyIllFormedUtf8 < ' l , ' s > {
670+ let ja_zh = if let Some ( content_locale) = & self . options . content_locale {
671+ content_locale. language == language ! ( "ja" ) || content_locale. language == language ! ( "zh" )
672+ } else {
673+ false
674+ } ;
663675 LineBreakIterator {
664676 iter : Utf8CharIndices :: new ( input) ,
665677 len : input. len ( ) ,
@@ -668,6 +680,7 @@ impl LineSegmenter {
668680 data : self . payload . get ( ) ,
669681 options : & self . options ,
670682 complex : & self . complex ,
683+ ja_zh,
671684 }
672685 }
673686 /// Creates a line break iterator for a Latin-1 (8-bit) string.
@@ -682,13 +695,19 @@ impl LineSegmenter {
682695 data : self . payload . get ( ) ,
683696 options : & self . options ,
684697 complex : & self . complex ,
698+ ja_zh : false ,
685699 }
686700 }
687701
688702 /// Creates a line break iterator for a UTF-16 string.
689703 ///
690704 /// There are always breakpoints at 0 and the string length, or only at 0 for the empty string.
691705 pub fn segment_utf16 < ' l , ' s > ( & ' l self , input : & ' s [ u16 ] ) -> LineBreakIteratorUtf16 < ' l , ' s > {
706+ let ja_zh = if let Some ( content_locale) = & self . options . content_locale {
707+ content_locale. language == language ! ( "ja" ) || content_locale. language == language ! ( "zh" )
708+ } else {
709+ false
710+ } ;
692711 LineBreakIterator {
693712 iter : Utf16Indices :: new ( input) ,
694713 len : input. len ( ) ,
@@ -697,6 +716,7 @@ impl LineSegmenter {
697716 data : self . payload . get ( ) ,
698717 options : & self . options ,
699718 complex : & self . complex ,
719+ ja_zh,
700720 }
701721 }
702722}
@@ -853,6 +873,7 @@ pub struct LineBreakIterator<'l, 's, Y: LineBreakType<'l, 's> + ?Sized> {
853873 data : & ' l RuleBreakDataV2 < ' l > ,
854874 options : & ' l LineBreakOptions ,
855875 complex : & ' l ComplexPayloads ,
876+ ja_zh : bool ,
856877}
857878
858879impl < ' l , ' s , Y : LineBreakType < ' l , ' s > > Iterator for LineBreakIterator < ' l , ' s , Y > {
@@ -948,7 +969,7 @@ impl<'l, 's, Y: LineBreakType<'l, 's>> Iterator for LineBreakIterator<'l, 's, Y>
948969 right_codepoint. into ( ) ,
949970 left_prop,
950971 right_prop,
951- self . options . ja_zh ,
972+ self . ja_zh ,
952973 ) {
953974 if breakable && !after_zwj {
954975 return self . get_current_position ( ) ;
@@ -1151,7 +1172,7 @@ impl<'l, 's, Y: LineBreakType<'l, 's>> LineBreakIterator<'l, 's, Y> {
11511172
11521173 fn is_break_by_normal ( & self , codepoint : Y :: CharType ) -> bool {
11531174 match codepoint. into ( ) {
1154- 0x301C | 0x30A0 => self . options . ja_zh ,
1175+ 0x301C | 0x30A0 => self . ja_zh ,
11551176 _ => false ,
11561177 }
11571178 }
0 commit comments