Skip to content

Commit

Permalink
Align EMOJI_PRESENTATION_LEAVES to 128 bytes
Browse files Browse the repository at this point in the history
Ensure rows don't cross cache lines, makes a small difference in the benchmarks
  • Loading branch information
Jules-Bertholet committed Mar 5, 2024
1 parent 75be2e9 commit 4aa5fb8
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 11 deletions.
11 changes: 6 additions & 5 deletions scripts/unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -488,8 +488,6 @@ def emit_module(
module.write(
"""
pub mod charwidth {
use core::option::Option::{self, None, Some};
/// Returns the [UAX #11](https://www.unicode.org/reports/tr11/) based width of `c` by
/// consulting a multi-level lookup table.
/// If `is_cjk == true`, ambiguous width characters are treated as double width; otherwise,
Expand Down Expand Up @@ -563,7 +561,7 @@ def emit_module(
// Extract the 3-9th (0-indexed) least significant bits of `cp`,
// and use them to index into `leaf_row`.
let idx_within_leaf = usize::try_from((cp >> 3) & 0x7F).unwrap();
let leaf_byte = EMOJI_PRESENTATION_LEAVES[idx_of_leaf][idx_within_leaf];
let leaf_byte = EMOJI_PRESENTATION_LEAVES.0[idx_of_leaf][idx_within_leaf];
// Use the 3 LSB of `cp` to index into `leaf_byte`.
((leaf_byte >> (cp & 7)) & 1) == 1
Expand Down Expand Up @@ -624,9 +622,12 @@ def emit_module(

module.write(
f"""
#[repr(align(128))]
struct Align128<T>(T);
/// Array of 1024-bit bitmaps. Index into the correct (obtained from `EMOJI_PRESENTATION_INDEX`)
/// bitmap with the 10 LSB of your codepoint to get whether it can start an emoji presentation seq.
static EMOJI_PRESENTATION_LEAVES: [[u8; 128]; {len(variation_leaves)}] = [
static EMOJI_PRESENTATION_LEAVES: Align128<[[u8; 128]; {len(variation_leaves)}]> = Align128([
"""
)
for leaf in variation_leaves:
Expand All @@ -638,7 +639,7 @@ def emit_module(
module.write("\n")
module.write(" ],\n")

module.write(" ];\n")
module.write(" ]);\n")
module.write("}\n")


Expand Down
11 changes: 6 additions & 5 deletions src/tables.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
pub const UNICODE_VERSION: (u8, u8, u8) = (15, 1, 0);

pub mod charwidth {
use core::option::Option::{self, None, Some};

/// Returns the [UAX #11](https://www.unicode.org/reports/tr11/) based width of `c` by
/// consulting a multi-level lookup table.
/// If `is_cjk == true`, ambiguous width characters are treated as double width; otherwise,
Expand Down Expand Up @@ -83,7 +81,7 @@ pub mod charwidth {
// Extract the 3-9th (0-indexed) least significant bits of `cp`,
// and use them to index into `leaf_row`.
let idx_within_leaf = usize::try_from((cp >> 3) & 0x7F).unwrap();
let leaf_byte = EMOJI_PRESENTATION_LEAVES[idx_of_leaf][idx_within_leaf];
let leaf_byte = EMOJI_PRESENTATION_LEAVES.0[idx_of_leaf][idx_within_leaf];

// Use the 3 LSB of `cp` to index into `leaf_byte`.
((leaf_byte >> (cp & 7)) & 1) == 1
Expand Down Expand Up @@ -568,9 +566,12 @@ pub mod charwidth {
0xFF, 0xFF, 0x5F,
];

#[repr(align(128))]
struct Align128<T>(T);

/// Array of 1024-bit bitmaps. Index into the correct (obtained from `EMOJI_PRESENTATION_INDEX`)
/// bitmap with the 10 LSB of your codepoint to get whether it can start an emoji presentation seq.
static EMOJI_PRESENTATION_LEAVES: [[u8; 128]; 6] = [
static EMOJI_PRESENTATION_LEAVES: Align128<[[u8; 128]; 6]> = Align128([
[
0x00, 0x00, 0x00, 0x00, 0x08, 0x04, 0xFF, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
Expand Down Expand Up @@ -643,5 +644,5 @@ pub mod charwidth {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x0F,
0x01, 0x00,
],
];
]);
}
2 changes: 1 addition & 1 deletion src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use std::iter;
use std::prelude::v1::*;

#[cfg(feature = "bench")]
use test::{self, Bencher};
use test::Bencher;

#[cfg(feature = "bench")]
#[bench]
Expand Down

0 comments on commit 4aa5fb8

Please sign in to comment.