@@ -307,12 +307,114 @@ def emit_table(f, name, t_data, t_type = "&'static [(char, char)]", is_pub=True,
307307 format_table_content (f , data , 8 )
308308 f .write ("\n ];\n \n " )
309309
310+ def emit_trie_lookup_range_table (f ):
311+ f .write ("""
312+ pub struct BoolTrie {
313+ // 0..0x800 (corresponding to 1 and 2 byte utf-8 sequences)
314+ r1: [u64; 32], // leaves
315+
316+ // 0x800..0x10000 (corresponding to 3 byte utf-8 sequences)
317+ r2: [u8; 1024], // first level
318+ r3: &'static [u64], // leaves
319+
320+ // 0x10000..0x110000 (corresponding to 4 byte utf-8 sequences)
321+ r4: [u8; 272], // first level
322+ r5: &'static [u8], // second level
323+ r6: &'static [u64], // leaves
324+ }
325+
326+ fn trie_range_leaf(c: usize, bitmap_chunk: u64) -> bool {
327+ ((bitmap_chunk >> (c & 63)) & 1) != 0
328+ }
329+
330+ fn trie_lookup_range_table(c: char, r: &'static BoolTrie) -> bool {
331+ let c = c as usize;
332+ if c < 0x800 {
333+ trie_range_leaf(c, r.r1[c >> 8])
334+ } else if c < 0x10000 {
335+ let child = r.r2[c >> 6];
336+ trie_range_leaf(c, r.r3[child as usize])
337+ } else {
338+ let child = r.r4[c >> 12];
339+ let leaf = r.r5[((child as usize) << 6) + ((c >> 6) & 0x3f)];
340+ trie_range_leaf(c, r.r6[leaf as usize])
341+ }
342+ }\n
343+ """ )
344+
345+ def compute_trie (rawdata , chunksize ):
346+ root = []
347+ childmap = {}
348+ child_data = []
349+ for i in range (len (rawdata ) / chunksize ):
350+ data = rawdata [i * chunksize : (i + 1 ) * chunksize ]
351+ child = '|' .join (map (str , data ))
352+ if child not in childmap :
353+ childmap [child ] = len (childmap )
354+ child_data .extend (data )
355+ root .append (childmap [child ])
356+ return (root , child_data )
357+
358+ def emit_bool_trie (f , name , t_data , is_pub = True ):
359+ CHUNK = 64
360+ rawdata = [False ] * 0x110000 ;
361+ for (lo , hi ) in t_data :
362+ for cp in range (lo , hi + 1 ):
363+ rawdata [cp ] = True
364+
365+ # convert to bitmap chunks of 64 bits each
366+ chunks = []
367+ for i in range (0x110000 / CHUNK ):
368+ chunk = 0
369+ for j in range (64 ):
370+ if rawdata [i * 64 + j ]:
371+ chunk |= 1 << j
372+ chunks .append (chunk )
373+
374+ pub_string = ""
375+ if is_pub :
376+ pub_string = "pub "
377+ f .write (" %sconst %s: &'static super::BoolTrie = &super::BoolTrie {\n " % (pub_string , name ))
378+ f .write (" r1: [\n " )
379+ data = ',' .join ('0x%016x' % chunk for chunk in chunks [0 :0x800 / CHUNK ])
380+ format_table_content (f , data , 12 )
381+ f .write ("\n ],\n " )
382+
383+ # 0x800..0x10000 trie
384+ (r2 , r3 ) = compute_trie (chunks [0x800 / CHUNK : 0x10000 / CHUNK ], 64 / CHUNK )
385+ f .write (" r2: [\n " )
386+ data = ',' .join (str (node ) for node in [255 ] * 32 + r2 )
387+ format_table_content (f , data , 12 )
388+ f .write ("\n ],\n " )
389+ f .write (" r3: &[\n " )
390+ data = ',' .join ('0x%016x' % chunk for chunk in r3 )
391+ format_table_content (f , data , 12 )
392+ f .write ("\n ],\n " )
393+
394+ # 0x10000..0x110000 trie
395+ (mid , r6 ) = compute_trie (chunks [0x10000 / CHUNK : 0x110000 / CHUNK ], 64 / CHUNK )
396+ (r4 , r5 ) = compute_trie (mid , 64 )
397+ f .write (" r4: [\n " )
398+ data = ',' .join (str (node ) for node in [255 ] * 16 + r4 )
399+ format_table_content (f , data , 12 )
400+ f .write ("\n ],\n " )
401+ f .write (" r5: &[\n " )
402+ data = ',' .join (str (node ) for node in r5 )
403+ format_table_content (f , data , 12 )
404+ f .write ("\n ],\n " )
405+ f .write (" r6: &[\n " )
406+ data = ',' .join ('0x%016x' % chunk for chunk in r6 )
407+ format_table_content (f , data , 12 )
408+ f .write ("\n ],\n " )
409+
410+ f .write (" };\n \n " )
411+
310412def emit_property_module (f , mod , tbl , emit ):
311413 f .write ("pub mod %s {\n " % mod )
312414 for cat in sorted (emit ):
313- emit_table (f , "%s_table" % cat , tbl [cat ])
415+ emit_bool_trie (f , "%s_table" % cat , tbl [cat ])
314416 f .write (" pub fn %s(c: char) -> bool {\n " % cat )
315- f .write (" super::bsearch_range_table (c, %s_table)\n " % cat )
417+ f .write (" super::trie_lookup_range_table (c, %s_table)\n " % cat )
316418 f .write (" }\n \n " )
317419 f .write ("}\n \n " )
318420
@@ -402,8 +504,9 @@ def emit_norm_module(f, canon, compat, combine, norm_props):
402504 norm_props = load_properties ("DerivedNormalizationProps.txt" ,
403505 ["Full_Composition_Exclusion" ])
404506
405- # bsearch_range_table is used in all the property modules below
406- emit_bsearch_range_table (rf )
507+ # trie_lookup_table is used in all the property modules below
508+ emit_trie_lookup_range_table (rf )
509+ # emit_bsearch_range_table(rf)
407510
408511 # category tables
409512 for (name , cat , pfuns ) in ("general_category" , gencats , ["N" , "Cc" ]), \
0 commit comments