Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Emulate cpuid #1070

Merged
merged 3 commits into from
Aug 15, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,6 @@ perf.data.old
/build_sysroot/sysroot
/build_sysroot/sysroot_src
/rust
/rand
/regex
/simple-raytracer
2 changes: 1 addition & 1 deletion clean_all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
set -e

rm -rf target/ build_sysroot/{sysroot/,sysroot_src/,target/} perf.data{,.old}
rm -rf regex/ simple-raytracer/
rm -rf rand/ regex/ simple-raytracer/
23 changes: 23 additions & 0 deletions crate_patches/0001-rand-Enable-c2-chacha-simd-feature.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
From 9c5663e36391fa20becf84f3af2e82afa5bb720b Mon Sep 17 00:00:00 2001
From: bjorn3 <bjorn3@users.noreply.github.com>
Date: Sat, 15 Aug 2020 19:56:03 +0200
Subject: [PATCH] [rand] Enable c2-chacha simd feature

---
rand_chacha/Cargo.toml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rand_chacha/Cargo.toml b/rand_chacha/Cargo.toml
index 9190b7f..872cca2 100644
--- a/rand_chacha/Cargo.toml
+++ b/rand_chacha/Cargo.toml
@@ -24,5 +24,5 @@ ppv-lite86 = { version = "0.2.8", default-features = false }

[features]
default = ["std"]
-std = ["ppv-lite86/std"]
+std = ["ppv-lite86/std", "ppv-lite86/simd"]
simd = [] # deprecated
--
2.20.1

33 changes: 33 additions & 0 deletions crate_patches/0002-rand-Disable-failing-test.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
From a8fb97120d71252538b6b026695df40d02696bdb Mon Sep 17 00:00:00 2001
From: bjorn3 <bjorn3@users.noreply.github.com>
Date: Sat, 15 Aug 2020 20:04:38 +0200
Subject: [PATCH] [rand] Disable failing test

---
src/distributions/uniform.rs | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/distributions/uniform.rs b/src/distributions/uniform.rs
index 480b859..c80bb6f 100644
--- a/src/distributions/uniform.rs
+++ b/src/distributions/uniform.rs
@@ -1085,7 +1085,7 @@ mod tests {
_ => panic!("`UniformDurationMode` was not serialized/deserialized correctly")
}
}
-
+
#[test]
#[cfg(feature = "serde1")]
fn test_uniform_serialization() {
@@ -1314,6 +1314,7 @@ mod tests {
not(target_arch = "wasm32"),
not(target_arch = "asmjs")
))]
+ #[ignore] // FIXME
fn test_float_assertions() {
use super::SampleUniform;
use std::panic::catch_unwind;
--
2.20.1

2 changes: 2 additions & 0 deletions example/std_example.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,8 @@ fn panic(_: u128) {

#[target_feature(enable = "sse2")]
unsafe fn test_simd() {
assert!(is_x86_feature_detected!("sse2"));

let x = _mm_setzero_si128();
let y = _mm_set1_epi16(7);
let or = _mm_or_si128(x, y);
Expand Down
27 changes: 0 additions & 27 deletions patches/0016-Disable-cpuid-intrinsic.patch

This file was deleted.

7 changes: 7 additions & 0 deletions prepare.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@ rustup component add rust-src rustc-dev llvm-tools-preview
./build_sysroot/prepare_sysroot_src.sh
cargo install hyperfine || echo "Skipping hyperfine install"

git clone https://github.com/rust-random/rand.git || echo "rust-random/rand has already been cloned"
pushd rand
git checkout -- .
git checkout 0f933f9c7176e53b2a3c7952ded484e1783f0bf1
git am ../crate_patches/*-rand-*.patch
popd

git clone https://github.com/rust-lang/regex.git || echo "rust-lang/regex has already been cloned"
pushd regex
git checkout -- .
Expand Down
52 changes: 36 additions & 16 deletions src/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -681,37 +681,57 @@ fn trans_stmt<'tcx>(
use rustc_span::symbol::Symbol;
let LlvmInlineAsm {
asm,
outputs: _,
inputs: _,
outputs,
inputs,
} = &**asm;
let rustc_hir::LlvmInlineAsmInner {
asm: asm_code, // Name
outputs, // Vec<Name>
inputs, // Vec<Name>
outputs: output_names, // Vec<LlvmInlineAsmOutput>
inputs: input_names, // Vec<Name>
clobbers, // Vec<Name>
volatile, // bool
alignstack, // bool
dialect: _, // rustc_ast::ast::AsmDialect
dialect: _,
asm_str_style: _,
} = asm;
match &*asm_code.as_str() {
match asm_code.as_str().trim() {
"" => {
// Black box
}
cpuid if cpuid.contains("cpuid") => {
crate::trap::trap_unimplemented(
fx,
"__cpuid_count arch intrinsic is not supported",
);
"mov %rbx, %rsi\n cpuid\n xchg %rbx, %rsi" => {
assert_eq!(input_names, &[Symbol::intern("{eax}"), Symbol::intern("{ecx}")]);
assert_eq!(output_names.len(), 4);
for (i, c) in (&["={eax}", "={esi}", "={ecx}", "={edx}"]).iter().enumerate() {
assert_eq!(&output_names[i].constraint.as_str(), c);
assert!(!output_names[i].is_rw);
assert!(!output_names[i].is_indirect);
}

assert_eq!(clobbers, &[]);

assert!(!volatile);
assert!(!alignstack);

assert_eq!(inputs.len(), 2);
let leaf = trans_operand(fx, &inputs[0].1).load_scalar(fx); // %eax
let subleaf = trans_operand(fx, &inputs[1].1).load_scalar(fx); // %ecx

let (eax, ebx, ecx, edx) = crate::intrinsics::codegen_cpuid_call(fx, leaf, subleaf);

assert_eq!(outputs.len(), 4);
trans_place(fx, outputs[0]).write_cvalue(fx, CValue::by_val(eax, fx.layout_of(fx.tcx.types.u32)));
trans_place(fx, outputs[1]).write_cvalue(fx, CValue::by_val(ebx, fx.layout_of(fx.tcx.types.u32)));
trans_place(fx, outputs[2]).write_cvalue(fx, CValue::by_val(ecx, fx.layout_of(fx.tcx.types.u32)));
trans_place(fx, outputs[3]).write_cvalue(fx, CValue::by_val(edx, fx.layout_of(fx.tcx.types.u32)));
}
"xgetbv" => {
assert_eq!(inputs, &[Symbol::intern("{ecx}")]);
assert_eq!(input_names, &[Symbol::intern("{ecx}")]);

assert_eq!(outputs.len(), 2);
assert_eq!(output_names.len(), 2);
for (i, c) in (&["={eax}", "={edx}"]).iter().enumerate() {
assert_eq!(&outputs[i].constraint.as_str(), c);
assert!(!outputs[i].is_rw);
assert!(!outputs[i].is_indirect);
assert_eq!(&output_names[i].constraint.as_str(), c);
assert!(!output_names[i].is_rw);
assert!(!output_names[i].is_indirect);
}

assert_eq!(clobbers, &[]);
Expand Down
67 changes: 67 additions & 0 deletions src/intrinsics/cpuid.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
use crate::prelude::*;

/// Emulates a subset of the cpuid call.
///
/// This emulates an intel cpu with sse and sse2 support, but which doesn't support anything else.
pub(crate) fn codegen_cpuid_call<'tcx>(
fx: &mut FunctionCx<'_, 'tcx, impl Backend>,
leaf: Value,
_subleaf: Value,
) -> (Value, Value, Value, Value) {
let leaf_0 = fx.bcx.create_block();
let leaf_1 = fx.bcx.create_block();
let leaf_8000_0000 = fx.bcx.create_block();
let leaf_8000_0001 = fx.bcx.create_block();
let unsupported_leaf = fx.bcx.create_block();

let dest = fx.bcx.create_block();
let eax = fx.bcx.append_block_param(dest, types::I32);
let ebx = fx.bcx.append_block_param(dest, types::I32);
let ecx = fx.bcx.append_block_param(dest, types::I32);
let edx = fx.bcx.append_block_param(dest, types::I32);

let mut switch = cranelift_frontend::Switch::new();
switch.set_entry(0, leaf_0);
switch.set_entry(1, leaf_1);
switch.set_entry(0x8000_0000, leaf_8000_0000);
switch.set_entry(0x8000_0001, leaf_8000_0001);
switch.emit(&mut fx.bcx, leaf, unsupported_leaf);

fx.bcx.switch_to_block(leaf_0);
let max_basic_leaf = fx.bcx.ins().iconst(types::I32, 1);
let vend0 = fx.bcx.ins().iconst(types::I32, i64::from(u32::from_le_bytes(*b"Genu")));
let vend2 = fx.bcx.ins().iconst(types::I32, i64::from(u32::from_le_bytes(*b"ineI")));
let vend1 = fx.bcx.ins().iconst(types::I32, i64::from(u32::from_le_bytes(*b"ntel")));
fx.bcx.ins().jump(dest, &[max_basic_leaf, vend0, vend1, vend2]);

fx.bcx.switch_to_block(leaf_1);
let cpu_signature = fx.bcx.ins().iconst(types::I32, 0);
let additional_information = fx.bcx.ins().iconst(types::I32, 0);
let ecx_features = fx.bcx.ins().iconst(
types::I32,
0,
);
let edx_features = fx.bcx.ins().iconst(
types::I32,
1 << 25 /* sse */ | 1 << 26 /* sse2 */,
);
fx.bcx.ins().jump(dest, &[cpu_signature, additional_information, ecx_features, edx_features]);

fx.bcx.switch_to_block(leaf_8000_0000);
let extended_max_basic_leaf = fx.bcx.ins().iconst(types::I32, 0);
let zero = fx.bcx.ins().iconst(types::I32, 0);
fx.bcx.ins().jump(dest, &[extended_max_basic_leaf, zero, zero, zero]);

fx.bcx.switch_to_block(leaf_8000_0001);
let zero = fx.bcx.ins().iconst(types::I32, 0);
let proc_info_ecx = fx.bcx.ins().iconst(types::I32, 0);
let proc_info_edx = fx.bcx.ins().iconst(types::I32, 0);
fx.bcx.ins().jump(dest, &[zero, zero, proc_info_ecx, proc_info_edx]);

fx.bcx.switch_to_block(unsupported_leaf);
crate::trap::trap_unreachable(fx, "__cpuid_count arch intrinsic doesn't yet support specified leaf");

fx.bcx.switch_to_block(dest);

(eax, ebx, ecx, edx)
}
25 changes: 25 additions & 0 deletions src/intrinsics/llvm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,31 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>(
bool_to_zero_or_max_uint(fx, res_lane_layout, res_lane)
});
};
llvm.x86.sse2.psrli.d, (c a, o imm8) {
let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8).expect("llvm.x86.sse2.psrli.d imm8 not const");
simd_for_each_lane(fx, a, ret, |fx, _lane_layout, res_lane_layout, lane| {
let res_lane = match imm8.val.try_to_bits(Size::from_bytes(4)).expect(&format!("imm8 not scalar: {:?}", imm8)) {
imm8 if imm8 < 32 => fx.bcx.ins().ushr_imm(lane, i64::from(imm8 as u8)),
_ => fx.bcx.ins().iconst(types::I32, 0),
};
CValue::by_val(res_lane, res_lane_layout)
});
};
llvm.x86.sse2.pslli.d, (c a, o imm8) {
let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8).expect("llvm.x86.sse2.psrli.d imm8 not const");
simd_for_each_lane(fx, a, ret, |fx, _lane_layout, res_lane_layout, lane| {
let res_lane = match imm8.val.try_to_bits(Size::from_bytes(4)).expect(&format!("imm8 not scalar: {:?}", imm8)) {
imm8 if imm8 < 32 => fx.bcx.ins().ishl_imm(lane, i64::from(imm8 as u8)),
_ => fx.bcx.ins().iconst(types::I32, 0),
};
CValue::by_val(res_lane, res_lane_layout)
});
};
llvm.x86.sse2.storeu.dq, (v mem_addr, c a) {
// FIXME correctly handle the unalignment
let dest = CPlace::for_ptr(Pointer::new(mem_addr), a.layout());
dest.write_cvalue(fx, a);
};
}

if let Some((_, dest)) = destination {
Expand Down
2 changes: 2 additions & 0 deletions src/intrinsics/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
mod cpuid;
mod llvm;
mod simd;

pub(crate) use cpuid::codegen_cpuid_call;
pub(crate) use llvm::codegen_llvm_intrinsic_call;

use crate::prelude::*;
Expand Down
4 changes: 1 addition & 3 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -184,13 +184,11 @@ impl CodegenBackend for CraneliftCodegenBackend {
// rustdoc needs to be able to document functions that use all the features, so
// whitelist them all
target_features_whitelist::all_known_features()
.chain(Some(("cg_clif", None)))
.map(|(a, b)| (a.to_string(), b))
.collect()
} else {
target_features_whitelist::target_feature_whitelist(tcx.sess)
.iter()
.chain(&Some(("cg_clif", None)))
.map(|&(a, b)| (a.to_string(), b))
.collect()
}
Expand All @@ -199,7 +197,7 @@ impl CodegenBackend for CraneliftCodegenBackend {
fn provide_extern(&self, _providers: &mut Providers) {}

fn target_features(&self, _sess: &Session) -> Vec<rustc_span::Symbol> {
vec![rustc_span::Symbol::intern("cg_clif")]
vec![]
}

fn codegen_crate<'tcx>(
Expand Down
5 changes: 5 additions & 0 deletions test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,11 @@ $RUN_WRAPPER ./target/out/track-caller-attribute
echo "[BUILD] mod_bench"
$RUSTC example/mod_bench.rs --crate-type bin --target $TARGET_TRIPLE

pushd rand
rm -r ./target || true
../cargo.sh test --workspace
popd

pushd simple-raytracer
if [[ "$HOST_TRIPLE" = "$TARGET_TRIPLE" ]]; then
echo "[BENCH COMPILE] ebobby/simple-raytracer"
Expand Down