Skip to content

Commit

Permalink
AlignedPaddedStr
Browse files Browse the repository at this point in the history
Summary: Extract aligned padded string from `StarlarkStr`.

Reviewed By: ndmitchell

Differential Revision: D37130200

fbshipit-source-id: 3497d62d6ad93bb24d46670c954997c8fdca2846
  • Loading branch information
stepancheg authored and facebook-github-bot committed Jun 20, 2022
1 parent 75a366d commit d71cf20
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 15 deletions.
67 changes: 67 additions & 0 deletions starlark-rust/starlark/src/collections/aligned_padded_str.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/*
* Copyright 2019 The Starlark in Rust Authors.
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

use std::{marker::PhantomData, mem};

use gazebo::dupe::Dupe;

/// String which is `usize` aligned with zeros padding in the end.
#[derive(Copy, Clone, Dupe)]
pub(crate) struct AlignedPaddedStr<'a> {
/// In bytes.
len: usize,
/// Data containing `len` bytes and zero padding in the end.
data: *const usize,
_marker: PhantomData<&'a str>,
}

impl<'a> AlignedPaddedStr<'a> {
#[inline]
pub(crate) unsafe fn new(len: usize, data: *const usize) -> AlignedPaddedStr<'a> {
AlignedPaddedStr {
len,
data,
_marker: PhantomData,
}
}

/// Len of string in words.
#[inline]
fn len_words(self) -> usize {
(self.len + mem::size_of::<usize>() - 1) / mem::size_of::<usize>()
}
}

impl<'a> PartialEq for AlignedPaddedStr<'a> {
#[inline]
fn eq(&self, other: &Self) -> bool {
if self.len != other.len {
return false;
}

// We know strings are aligned, zero-padded and short,
// so we can do better than generic SIMD-optimized `memcmp`
// https://rust.godbolt.org/z/cdscb37Yd
let len_words = self.len_words();
for i in 0..len_words {
if unsafe { *self.data.add(i) != *other.data.add(i) } {
return false;
}
}
true
}
}
1 change: 1 addition & 0 deletions starlark-rust/starlark/src/collections/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ pub use crate::collections::{
small_set::SmallSet,
};

pub(crate) mod aligned_padded_str;
pub(crate) mod alloca;
mod hash;
pub(crate) mod hasher;
Expand Down
24 changes: 9 additions & 15 deletions starlark-rust/starlark/src/values/types/string/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ use gazebo::{any::ProvidesStaticType, prelude::*};
use serde::Serialize;

use crate::{
collections::{Hashed, StarlarkHashValue, StarlarkHasher},
collections::{
aligned_padded_str::AlignedPaddedStr, Hashed, StarlarkHashValue, StarlarkHasher,
},
environment::{Methods, MethodsStatic},
private::Private,
values::{
Expand Down Expand Up @@ -106,20 +108,7 @@ impl Deref for StarlarkStr {

impl PartialEq for StarlarkStr {
fn eq(&self, other: &Self) -> bool {
if self.str.len != other.str.len {
return false;
}
// We know strings are aligned, zero-padded and short,
// so we can do better than generic SIMD-optimized `memcmp`
// https://rust.godbolt.org/z/cdscb37Yd
for i in 0..StarlarkStr::payload_len_for_len(self.len()) {
unsafe {
if self.str.body.get_unchecked(i) != other.str.body.get_unchecked(i) {
return false;
}
}
}
true
self.as_aligned_padded_str() == other.as_aligned_padded_str()
}
}

Expand Down Expand Up @@ -172,6 +161,11 @@ impl StarlarkStr {
}
}

#[inline]
pub(crate) fn as_aligned_padded_str(&self) -> AlignedPaddedStr {
unsafe { AlignedPaddedStr::new(self.len(), self.str.body.as_ptr()) }
}

/// Get cached hash value or compute if it is not cached yet.
pub fn get_hash(&self) -> StarlarkHashValue {
// Note relaxed load and store are practically non-locking memory operations.
Expand Down

0 comments on commit d71cf20

Please sign in to comment.