Skip to content

Commit

Permalink
Prefer use of java.util.zip.CRC32C for Framed format (#269)
Browse files Browse the repository at this point in the history
Co-authored-by: BO8979 <BO8979@W1971362.northamerica.cerner.net>
  • Loading branch information
bokken and BO8979 committed Jan 20, 2021
1 parent 110727e commit 822513d
Show file tree
Hide file tree
Showing 5 changed files with 1,426 additions and 1,379 deletions.
3 changes: 1 addition & 2 deletions src/main/java/org/xerial/snappy/PureJavaCrc32C.java
Expand Up @@ -48,8 +48,7 @@ public int getIntegerValue()
/** {@inheritDoc} */
public long getValue()
{
long ret = crc;
return (~ret) & 0xffffffffL;
return (~crc) & 0xffffffffL;
}

/** {@inheritDoc} */
Expand Down
290 changes: 165 additions & 125 deletions src/main/java/org/xerial/snappy/SnappyFramed.java
@@ -1,125 +1,165 @@
/*
* Created: Apr 12, 2013
*/
package org.xerial.snappy;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.ReadableByteChannel;

/**
* Constants and utilities for implementing x-snappy-framed.
*
* @author Brett Okken
* @since 1.1.0
*/
final class SnappyFramed
{
public static final int COMPRESSED_DATA_FLAG = 0x00;

public static final int UNCOMPRESSED_DATA_FLAG = 0x01;

public static final int STREAM_IDENTIFIER_FLAG = 0xff;

private static final int MASK_DELTA = 0xa282ead8;

/**
* The header consists of the stream identifier flag, 3 bytes indicating a
* length of 6, and "sNaPpY" in ASCII.
*/
public static final byte[] HEADER_BYTES = new byte[] {
(byte) STREAM_IDENTIFIER_FLAG, 0x06, 0x00, 0x00, 0x73, 0x4e, 0x61,
0x50, 0x70, 0x59};

public static int maskedCrc32c(byte[] data)
{
return maskedCrc32c(data, 0, data.length);
}

public static int maskedCrc32c(byte[] data, int offset, int length)
{
final PureJavaCrc32C crc32c = new PureJavaCrc32C();
crc32c.update(data, offset, length);
return mask(crc32c.getIntegerValue());
}

/**
* Checksums are not stored directly, but masked, as checksumming data and
* then its own checksum can be problematic. The masking is the same as used
* in Apache Hadoop: Rotate the checksum by 15 bits, then add the constant
* 0xa282ead8 (using wraparound as normal for unsigned integers). This is
* equivalent to the following C code:
* <p/>
* <pre>
* uint32_t mask_checksum(uint32_t x) {
* return ((x >> 15) | (x << 17)) + 0xa282ead8;
* }
* </pre>
*/
public static int mask(int crc)
{
// Rotate right by 15 bits and add a constant.
return ((crc >>> 15) | (crc << 17)) + MASK_DELTA;
}

static final int readBytes(ReadableByteChannel source, ByteBuffer dest)
throws IOException
{
// tells how many bytes to read.
final int expectedLength = dest.remaining();

int totalRead = 0;

// how many bytes were read.
int lastRead = source.read(dest);

totalRead = lastRead;

// if we did not read as many bytes as we had hoped, try reading again.
if (lastRead < expectedLength) {
// as long the buffer is not full (remaining() == 0) and we have not reached EOF (lastRead == -1) keep reading.
while (dest.remaining() != 0 && lastRead != -1) {
lastRead = source.read(dest);

// if we got EOF, do not add to total read.
if (lastRead != -1) {
totalRead += lastRead;
}
}
}

if (totalRead > 0) {
dest.limit(dest.position());
}
else {
dest.position(dest.limit());
}

return totalRead;
}

static int skip(final ReadableByteChannel source, final int skip, final ByteBuffer buffer)
throws IOException
{
if (skip <= 0) {
return 0;
}

int toSkip = skip;
int skipped = 0;
while (toSkip > 0 && skipped != -1) {
buffer.clear();
if (toSkip < buffer.capacity()) {
buffer.limit(toSkip);
}

skipped = source.read(buffer);
if (skipped > 0) {
toSkip -= skipped;
}
}

buffer.clear();
return skip - toSkip;
}
}
/*
* Created: Apr 12, 2013
*/
package org.xerial.snappy;

import java.io.IOException;
import java.lang.invoke.MethodHandle;
import java.lang.invoke.MethodHandles;
import java.lang.invoke.MethodType;
import java.nio.ByteBuffer;
import java.nio.channels.ReadableByteChannel;
import java.util.function.Supplier;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.zip.Checksum;

/**
* Constants and utilities for implementing x-snappy-framed.
*
* @author Brett Okken
* @since 1.1.0
*/
final class SnappyFramed
{
public static final int COMPRESSED_DATA_FLAG = 0x00;

public static final int UNCOMPRESSED_DATA_FLAG = 0x01;

public static final int STREAM_IDENTIFIER_FLAG = 0xff;

private static final int MASK_DELTA = 0xa282ead8;

private static final Supplier<Checksum> CHECKSUM_SUPPLIER;

static
{
Supplier<Checksum> supplier = null;
try
{
final Class crc32cClazz = Class.forName("java.util.zip.CRC32C");
final MethodHandles.Lookup lookup = MethodHandles.publicLookup();

final MethodHandle conHandle = lookup.findConstructor(crc32cClazz, MethodType.methodType(void.class))
.asType(MethodType.methodType(Checksum.class));
supplier = () -> {
try
{
return (Checksum) conHandle.invokeExact();
}
catch (Throwable e)
{
throw new IllegalStateException(e);
}
};
}
catch(Throwable t)
{
Logger.getLogger(SnappyFramed.class.getName())
.log(Level.FINE, "java.util.zip.CRC32C not loaded, using PureJavaCrc32C", t);
supplier = null;
}

CHECKSUM_SUPPLIER = supplier != null ? supplier : PureJavaCrc32C::new;
}

/**
* The header consists of the stream identifier flag, 3 bytes indicating a
* length of 6, and "sNaPpY" in ASCII.
*/
public static final byte[] HEADER_BYTES = new byte[] {
(byte) STREAM_IDENTIFIER_FLAG, 0x06, 0x00, 0x00, 0x73, 0x4e, 0x61,
0x50, 0x70, 0x59};

public static Checksum getCRC32C()
{
return CHECKSUM_SUPPLIER.get();
}

public static int maskedCrc32c(Checksum crc32c, byte[] data, int offset, int length)
{
crc32c.reset();
crc32c.update(data, offset, length);
return mask((int) crc32c.getValue());
}

/**
* Checksums are not stored directly, but masked, as checksumming data and
* then its own checksum can be problematic. The masking is the same as used
* in Apache Hadoop: Rotate the checksum by 15 bits, then add the constant
* 0xa282ead8 (using wraparound as normal for unsigned integers). This is
* equivalent to the following C code:
* <p/>
* <pre>
* uint32_t mask_checksum(uint32_t x) {
* return ((x >> 15) | (x << 17)) + 0xa282ead8;
* }
* </pre>
*/
public static int mask(int crc)
{
// Rotate right by 15 bits and add a constant.
return ((crc >>> 15) | (crc << 17)) + MASK_DELTA;
}

static final int readBytes(ReadableByteChannel source, ByteBuffer dest)
throws IOException
{
// tells how many bytes to read.
final int expectedLength = dest.remaining();

int totalRead = 0;

// how many bytes were read.
int lastRead = source.read(dest);

totalRead = lastRead;

// if we did not read as many bytes as we had hoped, try reading again.
if (lastRead < expectedLength) {
// as long the buffer is not full (remaining() == 0) and we have not reached EOF (lastRead == -1) keep reading.
while (dest.remaining() != 0 && lastRead != -1) {
lastRead = source.read(dest);

// if we got EOF, do not add to total read.
if (lastRead != -1) {
totalRead += lastRead;
}
}
}

if (totalRead > 0) {
dest.limit(dest.position());
}
else {
dest.position(dest.limit());
}

return totalRead;
}

static int skip(final ReadableByteChannel source, final int skip, final ByteBuffer buffer)
throws IOException
{
if (skip <= 0) {
return 0;
}

int toSkip = skip;
int skipped = 0;
while (toSkip > 0 && skipped != -1) {
buffer.clear();
if (toSkip < buffer.capacity()) {
buffer.limit(toSkip);
}

skipped = source.read(buffer);
if (skipped > 0) {
toSkip -= skipped;
}
}

buffer.clear();
return skip - toSkip;
}
}

0 comments on commit 822513d

Please sign in to comment.