From 89c664891a51f0e765c6a4228fa6e9d4b39874a2 Mon Sep 17 00:00:00 2001 From: Allan Clements Date: Tue, 16 May 2023 16:58:32 -0500 Subject: [PATCH] Fixed Java magic byte detection and added test --- src/matchers/app.rs | 31 +++++++++++++++++++++++-------- testdata/sample.class | Bin 0 -> 425 bytes tests/app.rs | 8 ++++++++ 3 files changed, 31 insertions(+), 8 deletions(-) create mode 100644 testdata/sample.class diff --git a/src/matchers/app.rs b/src/matchers/app.rs index 2e77fa2..53efdeb 100644 --- a/src/matchers/app.rs +++ b/src/matchers/app.rs @@ -44,13 +44,23 @@ pub fn is_elf(buf: &[u8]) -> bool { /// Returns whether a buffer is compiled Java bytecode. pub fn is_java(buf: &[u8]) -> bool { - buf.len() >= 8 - && buf[0] == 0x43 - && buf[1] == 0x41 - && buf[2] == 0x76 - && buf[3] == 0x45 - && ((buf[4] == 0x42 && buf[5] == 0x01 && buf[6] == 0x42 && buf[7] == 0x45) - || (buf[4] == 0x44 && buf[5] == 0x30 && buf[6] == 0x30 && buf[7] == 0x44)) + if buf.len() < 8 || [0xca, 0xfe, 0xba, 0xbe] != buf[0..4] { + return false; + } + + //Checking the next 4 bytes are greater than or equal to 45 to distinguish from Mach-O binaries + //Mach-O "Fat" binaries also use 0xCAFEBABE as magic bytes to start the file + //Java are always Big Endian, after the magic bytes there are 2 bytes for the class file's + //minor version and then 2 bytes for the major version + //https://docs.oracle.com/javase/specs/jvms/se20/html/jvms-4.html + let minor_major_bytes = [buf[4], buf[5], buf[6], buf[7]]; + if u32::from_be_bytes(minor_major_bytes) < 45 { + //Java class files start at a major version of 45 and a minor of 0 + //So a value less than this shouldn't be a Java class file + return false; + } + //For due dillegence confirm that the major bytes are greater than or equal to 45 + u16::from_be_bytes([buf[6], buf[7]]) >= 45 } /// Returns whether a buffer is LLVM Bitcode. @@ -70,7 +80,12 @@ pub fn is_mach(buf: &[u8]) -> bool { match buf[0..4] { [width, 0xfa, 0xed, 0xfe] if width == 0xcf || width == 0xce => true, [0xfe, 0xed, 0xfa, width] if width == 0xcf || width == 0xce => true, - [0xca, 0xfe, 0xba, 0xbe] => true, + [0xca, 0xfe, 0xba, 0xbe] if buf.len() >= 8 => { + //Checking the next 4 bytes are less than 45 to distinguish from Java class files + //which also use 0xCAFEBABE as magic bytes + //Fat Mach-O binaries are always Big Endian + u32::from_be_bytes([buf[4], buf[5], buf[6], buf[7]]) < 45 + }, _ => false, } } diff --git a/testdata/sample.class b/testdata/sample.class new file mode 100644 index 0000000000000000000000000000000000000000..11f731308a8cdefc4ddea6cc914289211087be89 GIT binary patch literal 425 zcmZvZ%}T>S6ot=i(xyqHsjaQQ1ziZK3-bU~H-fk*Dn#nSRniV+N|KS72tJlB6kPZK zK9qPTE{f1u+@G0qJ}!5@e?GqeoMGUg2E&5sqK*cky@;NpAc?Yha1$@Yv>-H2WhRSr zg5eJ*Hkz<4I4)Xn|H;Fbr$VHxQYD*u;|e5IaGT4l2#Z`q>8OcyLhDK-i8{Dfc`|d* z#)gHCi!L?^-4!N+^{5d^##H|gTg}HznaxMTiG@DFsi+l{U|y)1Ab4Y$iR&_rMSd5> z3E!A$B$?CqA69{w5~?zvic6`*-V%AD2g4zHye)ldAiXb+Tigve6X*Kz2k4ax!Zt_! ltZN%P*yXIHN0k+@-(Ypt-r+6f`X>N`7i_Y4kLw!te*wDSS4#i@ literal 0 HcmV?d00001 diff --git a/tests/app.rs b/tests/app.rs index ae19b25..2576c8c 100644 --- a/tests/app.rs +++ b/tests/app.rs @@ -42,6 +42,14 @@ test_format!( "sample_mach_fat" ); +test_format!( + App, + "application/java", + "class", + java, + "sample.class" +); + test_format!(App, "application/wasm", "wasm", wasm, "sample.wasm"); test_format!(App, "application/x-x509-ca-cert", "der", der, "sample.der");