Skip to content

Commit

Permalink
(c2rust-analyze) Support ptr-to-ptr casts between safely transmutab…
Browse files Browse the repository at this point in the history
…le types, for now limited to same-sized integers (#839)

- Fixes #840

This introduces the concept of equivalent/compatible/safely transmutable
types:
https://github.com/immunant/c2rust/blob/2915b8d0c71add21dee1f9d540958ea863478212/c2rust-analyze/src/util.rs#L356-L380

Thus, we can now allow ptr-to-ptr casts between safely transmutable
pointee types, whereas previously they were only allowed for equal
types. In particular, this enables support for string casts, which are
produced by `c2rust transpile` as `b"" as *const u8 as *const
core::ffi::c_char`, where `c_char = i8`. Thus, this fixes #840.

New tests are added in `string_casts.rs` to cover various ptr casts,
though some of them crash in the rewriter due to having implicitly
inserted MIR statements like implicit `&raw`s, which are inserted with
`addr_of!`s. Thus, for some of these (where it works), there are
versions with explicit `addr_of!`s that succeed end-to-end.
  • Loading branch information
kkysen committed Jun 9, 2023
2 parents 6f1aeb8 + 31baf0a commit d0fa6ea
Show file tree
Hide file tree
Showing 4 changed files with 166 additions and 39 deletions.
84 changes: 50 additions & 34 deletions c2rust-analyze/src/dataflow/type_check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ use super::DataflowConstraints;
use crate::c_void_casts::CVoidCastDirection;
use crate::context::{AnalysisCtxt, LTy, PermissionSet, PointerId};
use crate::panic_detail;
use crate::util::{describe_rvalue, is_null_const, ty_callee, Callee, RvalueDesc};
use crate::util::{
describe_rvalue, is_null_const, is_transmutable_ptr_cast, ty_callee, Callee, RvalueDesc,
};
use assert_matches::assert_matches;
use rustc_hir::def_id::DefId;
use rustc_middle::mir::{
Expand Down Expand Up @@ -43,11 +45,6 @@ struct TypeChecker<'tcx, 'a> {
equiv_constraints: Vec<(PointerId, PointerId)>,
}

fn is_castable_to<'tcx>(_from_lty: LTy<'tcx>, _to_lty: LTy<'tcx>) -> bool {
// TODO: implement
true
}

impl<'tcx> TypeChecker<'tcx, '_> {
fn add_edge(&mut self, src: PointerId, dest: PointerId) {
// Copying `src` to `dest` can discard permissions, but can't add new ones.
Expand Down Expand Up @@ -105,6 +102,7 @@ impl<'tcx> TypeChecker<'tcx, '_> {
fn visit_cast(&mut self, cast_kind: CastKind, op: &Operand<'tcx>, to_lty: LTy<'tcx>) {
let to_ty = to_lty.ty;
let from_lty = self.acx.type_of(op);
let from_ty = from_lty.ty;

match cast_kind {
CastKind::PointerFromExposedAddress => {
Expand All @@ -114,31 +112,40 @@ impl<'tcx> TypeChecker<'tcx, '_> {
panic!("Creating non-null pointers from exposed addresses not supported");
}
}
CastKind::Pointer(PointerCast::Unsize) => {
let pointee_to_ty = to_ty
.builtin_deref(true)
.unwrap_or_else(|| panic!("unsize cast has non-pointer output {:?}?", to_ty))
.ty;

assert_matches!(from_lty.kind(), TyKind::Ref(..) | TyKind::RawPtr(..));
let pointee_from_lty = assert_matches!(from_lty.args, [lty] => lty);

let elem_to_ty = assert_matches!(pointee_to_ty.kind(), &TyKind::Slice(ty) => ty);
assert!(matches!(pointee_from_lty.kind(), TyKind::Array(..)));
let elem_from_lty = assert_matches!(pointee_from_lty.args, [lty] => lty);
assert_eq!(elem_from_lty.ty, elem_to_ty);
assert_eq!(pointee_from_lty.label, PointerId::NONE);
self.do_assign_pointer_ids(to_lty.label, from_lty.label);
CastKind::PointerExposeAddress => {
// Allow, as [`CastKind::PointerFromExposedAddress`] is the dangerous one,
// and we'll catch (not allow) that above.
// This becomes no longer a pointer, so we don't need to add any dataflow constraints
// (until we try to handle [`CastKind::PointerFromExposedAddress`], if we do).
}
CastKind::Pointer(..) => {
// The source and target types are both pointers, and they have identical pointee types.
// TODO: remove or move check to `is_castable_to`
assert!(from_lty.args[0].ty == to_lty.args[0].ty);
assert!(is_castable_to(from_lty, to_lty));
CastKind::Pointer(ptr_cast) => {
// All of these [`PointerCast`]s are type checked by rustc already.
// They don't involve arbitrary raw ptr to raw ptr casts
// ([PointerCast::MutToConstPointer`] doesn't allow changing types),
// which we need to check for safe transmutability,
// and which are (currently) covered in [`CastKind::Misc`].
// That's why there's a `match` here that does nothing;
// it ensures if [`PointerCast`] is changed in a future `rustc` version,
// this won't compile until we've checked that this reasoning is still accurate.
match ptr_cast {
PointerCast::ReifyFnPointer => {}
PointerCast::UnsafeFnPointer => {}
PointerCast::ClosureFnPointer(_) => {}
PointerCast::MutToConstPointer => {}
PointerCast::ArrayToPointer => {}
PointerCast::Unsize => {}
}
self.do_assign_pointer_ids(to_lty.label, from_lty.label)
// TODO add other dataflow constraints
}
_ => {
// A cast such as `T as U`
assert!(is_castable_to(from_lty, to_lty));
CastKind::Misc => {
match is_transmutable_ptr_cast(from_ty, to_ty) {
Some(true) => {
// TODO add other dataflow constraints
},
Some(false) => ::log::error!("TODO: unsupported ptr-to-ptr cast between pointee types not yet supported as safely transmutable: `{from_ty:?} as {to_ty:?}`"),
None => {}, // not a ptr cast (no dataflow constraints needed); let rustc typeck this
};
}
}

Expand Down Expand Up @@ -295,12 +302,21 @@ impl<'tcx> TypeChecker<'tcx, '_> {
}
}

/// Unify corresponding `PointerId`s in `lty1` and `lty2`.
/// Unify corresponding [`PointerId`]s in `lty1` and `lty2`.
///
/// The two inputs must have identical underlying types.
/// For any position where the underlying type has a pointer,
/// this function unifies the [`PointerId`]s that `lty1` and `lty2` have at that position.
/// For example, given
///
/// ```
/// # fn(
/// lty1: *mut /*l1*/ *const /*l2*/ u8,
/// lty2: *mut /*l3*/ *const /*l4*/ u8,
/// # ) {}
/// ```
///
/// The two inputs must have identical underlying types. For any position where the underlying
/// type has a pointer, this function unifies the `PointerId`s that `lty1` and `lty2` have at
/// that position. For example, given `lty1 = *mut /*l1*/ *const /*l2*/ u8` and `lty2 = *mut
/// /*l3*/ *const /*l4*/ u8`, this function will unify `l1` with `l3` and `l2` with `l4`.
/// this function will unify `l1` with `l3` and `l2` with `l4`.
fn do_unify(&mut self, lty1: LTy<'tcx>, lty2: LTy<'tcx>) {
assert_eq!(
self.acx.tcx().erase_regions(lty1.ty),
Expand Down
85 changes: 85 additions & 0 deletions c2rust-analyze/src/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use rustc_middle::ty::{
self, AdtDef, DefIdTree, EarlyBinder, Subst, SubstsRef, Ty, TyCtxt, TyKind, UintTy,
};
use rustc_span::symbol::Symbol;
use rustc_type_ir::IntTy;
use std::fmt::Debug;

#[derive(Debug)]
Expand Down Expand Up @@ -356,6 +357,90 @@ pub fn is_null_const(constant: Constant) -> bool {
pub trait PhantomLifetime<'a> {}
impl<'a, T: ?Sized> PhantomLifetime<'a> for T {}

/// Determine if `from` can be safely transmuted to `to`,
/// which is defined as `*(from as *const To)` being a safe operation,
/// where `from: *const From` and assuming `*from` already was safe.
///
/// Note that this is one-way, and is slightly different from [`core::mem::transmute`],
/// and more similar to [`core::mem::transmute_copy`].
///
/// This forms a reflexive, transitive, and non-symmetric (one-way) relation, named `~` below.
/// Formally, `A ~ B` iff whenever `*a` is well-defined (i.e., not UB),
/// `*(a as *const B)` is also well-defined, where `a: *const A`.
///
/// However, safe transmutability is difficult to check completely,
/// so this function only checks a subset of it,
/// with these formal rules for all types `A`, `B`:
///
/// 1. `A = B => A ~ B`
/// 2. `A ~ B => *A ~ *B`
/// 3. `uN ~ iN`, `iN ~ uN`, where `N` is an integer width
/// 4. `A ~ B, N > 0 => [A; N] ~ B`, where `const N: usize`
///
/// Note: 5. `A ~ B => [A] ~ B` is not a rule because it would be unsound for zero-length slices,
/// which we cannot check unlike for arrays, which we need for translated string literals.
///
/// Thus, [`true`] means it is definitely transmutable,
/// while [`false`] means it may not be transmutable.
///
/// Also note that for `A ~ B`, we need at least
/// * `size_of::<A>() >= size_of::<B>()`
/// * `align_of::<A>() >= align_of::<B>()`
///
/// For rules 1 and 3, this obviously holds.
/// For rule 2, this holds as long as
/// `A ~ B` implies that (`*B` is a fat ptr implies `*A` is a fat ptr).
///
/// For rule 1, this holds trivially.
/// For rule 2, this holds because `**A` and `**B` are always thin ptrs.
/// For rule 3, this holds trivially.
/// For rule 4, this holds because if `*A` is a fat ptr,
/// `A` is unsized, and thus `[A; N]` is ill-formed to begin with.
/// For (almost) rule 5, this holds because `*[A]` is always a fat ptr.
pub fn is_transmutable_to<'tcx>(from: Ty<'tcx>, to: Ty<'tcx>) -> bool {
let transmutable_ints = || {
use IntTy::*;
use UintTy::*;
match (from.kind(), to.kind()) {
(ty::Uint(u), ty::Int(i)) | (ty::Int(i), ty::Uint(u)) => {
matches!(
(u, i),
(Usize, Isize) | (U8, I8) | (U16, I16) | (U32, I32) | (U64, I64)
)
}
_ => false,
}
};

let one_way_transmutable = || match *from.kind() {
ty::Array(from, n) => {
is_transmutable_to(from, to) && {
let is_zero = n.kind().try_to_scalar_int().unwrap().is_null();
!is_zero
}
}
_ => false,
};

from == to
|| is_transmutable_ptr_cast(from, to).unwrap_or(false)
|| transmutable_ints()
|| one_way_transmutable()
}

/// Determine if `from as to` is a ptr-to-ptr cast.
/// and if it is, if the pointee types are [safely transmutable](is_transmutable_to).
///
/// This returns [`Some`]`(is_transmutable)` if they're both pointers,
/// and [`None`] if its some other types.
///
/// See [`is_transmutable_to`] for the definition of safe transmutability.
pub fn is_transmutable_ptr_cast<'tcx>(from: Ty<'tcx>, to: Ty<'tcx>) -> Option<bool> {
let from = from.builtin_deref(true)?.ty;
let to = to.builtin_deref(true)?.ty;
Some(is_transmutable_to(from, to))
}

#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
pub enum TestAttr {
/// `#[c2rust_analyze_test::fixed_signature]`: Mark all pointers in the function signature as
Expand Down
32 changes: 29 additions & 3 deletions c2rust-analyze/tests/analyze/string_casts.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,35 @@
#[cfg(any())]
pub fn cast_only(s: *const u8) {
pub fn cast_ptr_to_ptr(s: *const u8) {
s as *const core::ffi::c_char;
}

pub fn deep_cast_ptr_to_ptr(x: *const *const u8) {
x as *const *const i8;
}

/// For the below disabled (`#[cfg(any())]`ed) tests, they currently crash in the rewriter
/// due to it not being able to handle implicitly inserted `&raw` MIR statements yet.
/// Thus, they also have `*_explicit` versions where
/// a `std::ptr::addr_of!` is used to make the `&raw` explicit.
///
/// Also note that `addr_of!` (with a `use std::ptr::addr_of`)
/// and `::core::ptr::addr_of!` don't work either,
/// though `std::ptr::addr_of`, `::std::ptr::addr_of!`,
/// and `core::ptr::addr_of!` do work.

#[cfg(any())]
pub fn cast_array_to_ptr(s: &[u8; 1]) {
s as *const u8;
}

pub fn cast_array_to_ptr_explicit(s: &[u8; 1]) {
std::ptr::addr_of!(*s) as *const u8;
}

#[cfg(any())]
pub fn cast_from_literal() {
b"" as *const u8 as *const core::ffi::c_char;
b"\0" as *const u8 as *const core::ffi::c_char;
}

pub fn cast_from_literal_explicit() {
std::ptr::addr_of!(*b"\0") as *const u8 as *const core::ffi::c_char;
}
4 changes: 2 additions & 2 deletions c2rust-analyze/tests/filecheck/addr_of.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ fn shared_ref_with_struct() {
let y = std::ptr::addr_of!(x.a);
}

// CHECK-LABEL: fn cast_array_to_ptr_explicit(s: &[u8; 0]) {
pub fn cast_array_to_ptr_explicit(s: &[u8; 0]) {
// CHECK-LABEL: fn cast_array_to_ptr_explicit(s: &[u8; 1]) {
pub fn cast_array_to_ptr_explicit(s: &[u8; 1]) {
// CHECK-DAG: &*(std::ptr::addr_of!(*s)) as *const u8
std::ptr::addr_of!(*s) as *const u8;
}

0 comments on commit d0fa6ea

Please sign in to comment.