From f00fc23fbe37d26644e84a562d3f20b4fb51025b Mon Sep 17 00:00:00 2001 From: Anton Koscejev Date: Fri, 2 Dec 2022 12:00:07 +0100 Subject: [PATCH 1/2] calculate checksums more efficiently checksums are frequently calculated together (see Dependency constructor) so it makes sense to only read files once to calculate all checksums together --- .../owasp/dependencycheck/utils/Checksum.java | 90 ++++++++++++------- 1 file changed, 56 insertions(+), 34 deletions(-) diff --git a/utils/src/main/java/org/owasp/dependencycheck/utils/Checksum.java b/utils/src/main/java/org/owasp/dependencycheck/utils/Checksum.java index d6efb8ebbba..10ab6ba48d1 100644 --- a/utils/src/main/java/org/owasp/dependencycheck/utils/Checksum.java +++ b/utils/src/main/java/org/owasp/dependencycheck/utils/Checksum.java @@ -17,22 +17,22 @@ */ package org.owasp.dependencycheck.utils; +import org.apache.commons.codec.binary.Hex; +import org.apache.commons.codec.digest.DigestUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import java.io.File; -import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.nio.charset.UnsupportedCharsetException; +import java.nio.file.Files; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; -import java.util.HashMap; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; -import org.apache.commons.codec.digest.DigestUtils; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; /** * Includes methods to generate the MD5 and SHA1 checksum. @@ -46,6 +46,10 @@ public final class Checksum { * Hex code characters used in getHex. */ private static final String HEXES = "0123456789abcdef"; + /** + * Buffer size for calculating checksums. + */ + private static final int BUFFER_SIZE = 1024; /** * The logger. @@ -66,13 +70,7 @@ public final class Checksum { /** * Cached file checksums for each supported algorithm. */ - private static final Map> CHECKSUM_CACHES = new HashMap<>(3); - - static { - CHECKSUM_CACHES.put(MD5, new ConcurrentHashMap<>()); - CHECKSUM_CACHES.put(SHA256, new ConcurrentHashMap<>()); - CHECKSUM_CACHES.put(SHA1, new ConcurrentHashMap<>()); - } + private static final Map CHECKSUM_CACHE = new ConcurrentHashMap<>(); /** * Private constructor for a utility class. @@ -93,30 +91,39 @@ private Checksum() { * specified that does not exist */ public static String getChecksum(String algorithm, File file) throws NoSuchAlgorithmException, IOException { - final Map checksumCache = CHECKSUM_CACHES.get(algorithm.toUpperCase()); - if (checksumCache == null) { - throw new NoSuchAlgorithmException(algorithm); - } - String checksum = checksumCache.get(file); - try (InputStream stream = new FileInputStream(file)) { - if (checksum == null) { - switch (algorithm.toUpperCase()) { - case MD5: - checksum = DigestUtils.md5Hex(stream); - break; - case SHA1: - checksum = DigestUtils.sha1Hex(stream); - break; - case SHA256: - checksum = DigestUtils.sha256Hex(stream); - break; - default: - throw new NoSuchAlgorithmException(algorithm); + FileChecksums fileChecksums = CHECKSUM_CACHE.get(file); + if (fileChecksums == null) { + try (InputStream stream = Files.newInputStream(file.toPath())) { + final MessageDigest md5Digest = DigestUtils.getMd5Digest(); + final MessageDigest sha1Digest = DigestUtils.getSha1Digest(); + final MessageDigest sha256Digest = DigestUtils.getSha256Digest(); + final byte[] buffer = new byte[BUFFER_SIZE]; + int read = stream.read(buffer, 0, BUFFER_SIZE); + while (read > -1) { + // update all checksums together instead of reading the file multiple times + md5Digest.update(buffer, 0, read); + sha1Digest.update(buffer, 0, read); + sha256Digest.update(buffer, 0, read); + read = stream.read(buffer, 0, BUFFER_SIZE); } - checksumCache.put(file, checksum); + fileChecksums = new FileChecksums( + Hex.encodeHexString(md5Digest.digest()), + Hex.encodeHexString(sha1Digest.digest()), + Hex.encodeHexString(sha256Digest.digest()) + ); + CHECKSUM_CACHE.put(file, fileChecksums); } } - return checksum; + switch (algorithm.toUpperCase()) { + case MD5: + return fileChecksums.md5; + case SHA1: + return fileChecksums.sha1; + case SHA256: + return fileChecksums.sha256; + default: + throw new NoSuchAlgorithmException(algorithm); + } } /** @@ -262,4 +269,19 @@ private static MessageDigest getMessageDigest(String algorithm) { throw new IllegalStateException(msg, e); } } + + /** + * File checksums for each supported algorithm + */ + private static class FileChecksums { + private final String md5; + private final String sha1; + private final String sha256; + + public FileChecksums(String md5, String sha1, String sha256) { + this.md5 = md5; + this.sha1 = sha1; + this.sha256 = sha256; + } + } } From e910e3c63d130f4f3e6905e99e49ed937a345b82 Mon Sep 17 00:00:00 2001 From: Anton Koscejev Date: Fri, 2 Dec 2022 12:33:35 +0100 Subject: [PATCH 2/2] remove commons-codec dependency use the already existing checksum methods instead --- utils/pom.xml | 4 -- .../owasp/dependencycheck/utils/Checksum.java | 43 ++++++------------- 2 files changed, 12 insertions(+), 35 deletions(-) diff --git a/utils/pom.xml b/utils/pom.xml index dd088a13153..41cbdb4a753 100644 --- a/utils/pom.xml +++ b/utils/pom.xml @@ -54,10 +54,6 @@ Copyright (c) 2014 - Jeremy Long. All Rights Reserved. jackson-databind ${jackson.version} - - commons-codec - commons-codec - org.slf4j slf4j-api diff --git a/utils/src/main/java/org/owasp/dependencycheck/utils/Checksum.java b/utils/src/main/java/org/owasp/dependencycheck/utils/Checksum.java index 10ab6ba48d1..7bac9736783 100644 --- a/utils/src/main/java/org/owasp/dependencycheck/utils/Checksum.java +++ b/utils/src/main/java/org/owasp/dependencycheck/utils/Checksum.java @@ -17,17 +17,13 @@ */ package org.owasp.dependencycheck.utils; -import org.apache.commons.codec.binary.Hex; -import org.apache.commons.codec.digest.DigestUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; import java.io.IOException; import java.io.InputStream; -import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; -import java.nio.charset.UnsupportedCharsetException; import java.nio.file.Files; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; @@ -62,7 +58,7 @@ public final class Checksum { /** * SHA1 constant. */ - private static final String SHA1 = "SHA1"; + private static final String SHA1 = "SHA-1"; /** * SHA256 constant. */ @@ -94,9 +90,9 @@ public static String getChecksum(String algorithm, File file) throws NoSuchAlgor FileChecksums fileChecksums = CHECKSUM_CACHE.get(file); if (fileChecksums == null) { try (InputStream stream = Files.newInputStream(file.toPath())) { - final MessageDigest md5Digest = DigestUtils.getMd5Digest(); - final MessageDigest sha1Digest = DigestUtils.getSha1Digest(); - final MessageDigest sha256Digest = DigestUtils.getSha256Digest(); + final MessageDigest md5Digest = getMessageDigest(MD5); + final MessageDigest sha1Digest = getMessageDigest(SHA1); + final MessageDigest sha256Digest = getMessageDigest(SHA256); final byte[] buffer = new byte[BUFFER_SIZE]; int read = stream.read(buffer, 0, BUFFER_SIZE); while (read > -1) { @@ -107,9 +103,9 @@ public static String getChecksum(String algorithm, File file) throws NoSuchAlgor read = stream.read(buffer, 0, BUFFER_SIZE); } fileChecksums = new FileChecksums( - Hex.encodeHexString(md5Digest.digest()), - Hex.encodeHexString(sha1Digest.digest()), - Hex.encodeHexString(sha256Digest.digest()) + getHex(md5Digest.digest()), + getHex(sha1Digest.digest()), + getHex(sha256Digest.digest()) ); CHECKSUM_CACHE.put(file, fileChecksums); } @@ -174,16 +170,7 @@ public static String getSHA256Checksum(File file) throws IOException, NoSuchAlgo * @return the hex representation of the MD5 hash */ public static String getChecksum(String algorithm, byte[] bytes) { - switch (algorithm.toUpperCase()) { - case MD5: - return DigestUtils.md5Hex(bytes); - case SHA1: - return DigestUtils.sha1Hex(bytes); - case SHA256: - return DigestUtils.sha256Hex(bytes); - default: - return null; - } + return getHex(getMessageDigest(algorithm).digest(bytes)); } /** @@ -193,7 +180,7 @@ public static String getChecksum(String algorithm, byte[] bytes) { * @return the hex representation of the MD5 */ public static String getMD5Checksum(String text) { - return DigestUtils.md5Hex(text); + return getChecksum(MD5, stringToBytes(text)); } /** @@ -203,7 +190,7 @@ public static String getMD5Checksum(String text) { * @return the hex representation of the SHA1 */ public static String getSHA1Checksum(String text) { - return DigestUtils.sha1Hex(text); + return getChecksum(SHA1, stringToBytes(text)); } /** @@ -213,7 +200,7 @@ public static String getSHA1Checksum(String text) { * @return the hex representation of the SHA1 */ public static String getSHA256Checksum(String text) { - return DigestUtils.sha256Hex(text); + return getChecksum(SHA256, stringToBytes(text)); } /** @@ -223,13 +210,7 @@ public static String getSHA256Checksum(String text) { * @return the bytes */ private static byte[] stringToBytes(String text) { - byte[] data; - try { - data = text.getBytes(Charset.forName(StandardCharsets.UTF_8.name())); - } catch (UnsupportedCharsetException ex) { - data = text.getBytes(Charset.defaultCharset()); - } - return data; + return text.getBytes(StandardCharsets.UTF_8); } /**