Skip to content

Commit

Permalink
fix: Optimize file checksums calculation (#5112)
Browse files Browse the repository at this point in the history
* calculate checksums more efficiently

checksums are frequently calculated together (see Dependency constructor)
so it makes sense to only read files once to calculate all checksums together

* remove commons-codec dependency

use the already existing checksum methods instead
  • Loading branch information
koscejev committed Dec 2, 2022
1 parent 823f739 commit afb09b3
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 61 deletions.
4 changes: 0 additions & 4 deletions utils/pom.xml
Expand Up @@ -54,10 +54,6 @@ Copyright (c) 2014 - Jeremy Long. All Rights Reserved.
<artifactId>jackson-databind</artifactId>
<version>${jackson.version}</version>
</dependency>
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
Expand Down
117 changes: 60 additions & 57 deletions utils/src/main/java/org/owasp/dependencycheck/utils/Checksum.java
Expand Up @@ -17,22 +17,18 @@
*/
package org.owasp.dependencycheck.utils;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.charset.UnsupportedCharsetException;
import java.nio.file.Files;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.commons.codec.digest.DigestUtils;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Includes methods to generate the MD5 and SHA1 checksum.
Expand All @@ -46,6 +42,10 @@ public final class Checksum {
* Hex code characters used in getHex.
*/
private static final String HEXES = "0123456789abcdef";
/**
* Buffer size for calculating checksums.
*/
private static final int BUFFER_SIZE = 1024;

/**
* The logger.
Expand All @@ -58,21 +58,15 @@ public final class Checksum {
/**
* SHA1 constant.
*/
private static final String SHA1 = "SHA1";
private static final String SHA1 = "SHA-1";
/**
* SHA256 constant.
*/
private static final String SHA256 = "SHA-256";
/**
* Cached file checksums for each supported algorithm.
*/
private static final Map<String, Map<File, String>> CHECKSUM_CACHES = new HashMap<>(3);

static {
CHECKSUM_CACHES.put(MD5, new ConcurrentHashMap<>());
CHECKSUM_CACHES.put(SHA256, new ConcurrentHashMap<>());
CHECKSUM_CACHES.put(SHA1, new ConcurrentHashMap<>());
}
private static final Map<File, FileChecksums> CHECKSUM_CACHE = new ConcurrentHashMap<>();

/**
* Private constructor for a utility class.
Expand All @@ -93,30 +87,39 @@ private Checksum() {
* specified that does not exist
*/
public static String getChecksum(String algorithm, File file) throws NoSuchAlgorithmException, IOException {
final Map<File, String> checksumCache = CHECKSUM_CACHES.get(algorithm.toUpperCase());
if (checksumCache == null) {
throw new NoSuchAlgorithmException(algorithm);
}
String checksum = checksumCache.get(file);
try (InputStream stream = new FileInputStream(file)) {
if (checksum == null) {
switch (algorithm.toUpperCase()) {
case MD5:
checksum = DigestUtils.md5Hex(stream);
break;
case SHA1:
checksum = DigestUtils.sha1Hex(stream);
break;
case SHA256:
checksum = DigestUtils.sha256Hex(stream);
break;
default:
throw new NoSuchAlgorithmException(algorithm);
FileChecksums fileChecksums = CHECKSUM_CACHE.get(file);
if (fileChecksums == null) {
try (InputStream stream = Files.newInputStream(file.toPath())) {
final MessageDigest md5Digest = getMessageDigest(MD5);
final MessageDigest sha1Digest = getMessageDigest(SHA1);
final MessageDigest sha256Digest = getMessageDigest(SHA256);
final byte[] buffer = new byte[BUFFER_SIZE];
int read = stream.read(buffer, 0, BUFFER_SIZE);
while (read > -1) {
// update all checksums together instead of reading the file multiple times
md5Digest.update(buffer, 0, read);
sha1Digest.update(buffer, 0, read);
sha256Digest.update(buffer, 0, read);
read = stream.read(buffer, 0, BUFFER_SIZE);
}
checksumCache.put(file, checksum);
fileChecksums = new FileChecksums(
getHex(md5Digest.digest()),
getHex(sha1Digest.digest()),
getHex(sha256Digest.digest())
);
CHECKSUM_CACHE.put(file, fileChecksums);
}
}
return checksum;
switch (algorithm.toUpperCase()) {
case MD5:
return fileChecksums.md5;
case SHA1:
return fileChecksums.sha1;
case SHA256:
return fileChecksums.sha256;
default:
throw new NoSuchAlgorithmException(algorithm);
}
}

/**
Expand Down Expand Up @@ -167,16 +170,7 @@ public static String getSHA256Checksum(File file) throws IOException, NoSuchAlgo
* @return the hex representation of the MD5 hash
*/
public static String getChecksum(String algorithm, byte[] bytes) {
switch (algorithm.toUpperCase()) {
case MD5:
return DigestUtils.md5Hex(bytes);
case SHA1:
return DigestUtils.sha1Hex(bytes);
case SHA256:
return DigestUtils.sha256Hex(bytes);
default:
return null;
}
return getHex(getMessageDigest(algorithm).digest(bytes));
}

/**
Expand All @@ -186,7 +180,7 @@ public static String getChecksum(String algorithm, byte[] bytes) {
* @return the hex representation of the MD5
*/
public static String getMD5Checksum(String text) {
return DigestUtils.md5Hex(text);
return getChecksum(MD5, stringToBytes(text));
}

/**
Expand All @@ -196,7 +190,7 @@ public static String getMD5Checksum(String text) {
* @return the hex representation of the SHA1
*/
public static String getSHA1Checksum(String text) {
return DigestUtils.sha1Hex(text);
return getChecksum(SHA1, stringToBytes(text));
}

/**
Expand All @@ -206,7 +200,7 @@ public static String getSHA1Checksum(String text) {
* @return the hex representation of the SHA1
*/
public static String getSHA256Checksum(String text) {
return DigestUtils.sha256Hex(text);
return getChecksum(SHA256, stringToBytes(text));
}

/**
Expand All @@ -216,13 +210,7 @@ public static String getSHA256Checksum(String text) {
* @return the bytes
*/
private static byte[] stringToBytes(String text) {
byte[] data;
try {
data = text.getBytes(Charset.forName(StandardCharsets.UTF_8.name()));
} catch (UnsupportedCharsetException ex) {
data = text.getBytes(Charset.defaultCharset());
}
return data;
return text.getBytes(StandardCharsets.UTF_8);
}

/**
Expand Down Expand Up @@ -262,4 +250,19 @@ private static MessageDigest getMessageDigest(String algorithm) {
throw new IllegalStateException(msg, e);
}
}

/**
* File checksums for each supported algorithm
*/
private static class FileChecksums {
private final String md5;
private final String sha1;
private final String sha256;

public FileChecksums(String md5, String sha1, String sha256) {
this.md5 = md5;
this.sha1 = sha1;
this.sha256 = sha256;
}
}
}

0 comments on commit afb09b3

Please sign in to comment.