Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JS/Java/Kotlin extractors: support Zstd TRAP compression #16344

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions java/kotlin-extractor/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ version '0.0.1'
dependencies {
implementation "org.jetbrains.kotlin:kotlin-stdlib"
compileOnly("org.jetbrains.kotlin:kotlin-compiler")
implementation "io.airlift:aircompressor"
}

repositories {
Expand Down
15 changes: 11 additions & 4 deletions java/kotlin-extractor/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,13 @@ def compile_to_jar(build_dir, tmp_src_dir, srcs, version, classpath, java_classp

compile_to_dir(build_dir, srcs, version, classpath, java_classpath, class_dir)

cwd = os.getcwd()
try:
os.chdir(class_dir)
run_process(['jar', 'xf', cwd + '/' + kotlin_dependency_folder + '/aircompressor-0.26.jar'])
finally:
os.chdir(cwd)

run_process(['jar', 'cf', output,
'-C', class_dir, '.',
'-C', tmp_src_dir + '/main/resources', 'META-INF',
Expand Down Expand Up @@ -206,8 +213,8 @@ def compile(jars, java_jars, dependency_folder, transform_to_embeddable, output,


def compile_embeddable(version):
compile(['kotlin-stdlib-' + version, 'kotlin-compiler-embeddable-' + version],
['kotlin-stdlib-' + version],
compile(['kotlin-stdlib-' + version, 'kotlin-compiler-embeddable-' + version, 'aircompressor-0.26'],
['kotlin-stdlib-' + version, 'aircompressor-0.26'],
kotlin_dependency_folder,
transform_to_embeddable,
'codeql-extractor-kotlin-embeddable-%s.jar' % (version),
Expand All @@ -216,8 +223,8 @@ def compile_embeddable(version):


def compile_standalone(version):
compile(['kotlin-stdlib-' + version, 'kotlin-compiler-' + version],
['kotlin-stdlib-' + version],
compile(['kotlin-stdlib-' + version, 'kotlin-compiler-' + version, 'aircompressor-0.26'],
['kotlin-stdlib-' + version, 'aircompressor-0.26'],
kotlin_dependency_folder,
lambda srcs: None,
'codeql-extractor-kotlin-standalone-%s.jar' % (version),
Expand Down
Original file line number Diff line number Diff line change
@@ -1,29 +1,59 @@
package com.semmle.util.trap;

import com.semmle.util.zip.MultiMemberGZIPInputStream;
import io.airlift.compress.zstd.ZstdInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;

import com.semmle.util.zip.MultiMemberGZIPInputStream;

public class CompressedFileInputStream {
/**
* Create an input stream for reading the uncompressed data from a (possibly) compressed file, with
* the decompression method chosen based on the file extension.
*
* @param f The compressed file to read
* @return An input stream from which you can read the file's uncompressed data.
* @throws IOException From the underlying decompression input stream.
*/
public static InputStream fromFile(Path f) throws IOException {
InputStream fileInputStream = Files.newInputStream(f);
if (f.getFileName().toString().endsWith(".gz")) {
return new MultiMemberGZIPInputStream(fileInputStream, 8192);
//} else if (f.getFileName().toString().endsWith(".br")) {
// return new BrotliInputStream(fileInputStream);
} else {
return fileInputStream;
}
}
/**
* Create an input stream for reading the uncompressed data from a (possibly) compressed file,
* with the decompression method chosen based on the file extension.
*
* @param f The compressed file to read
* @return An input stream from which you can read the file's uncompressed data.
* @throws IOException From the underlying decompression input stream.
*/
public static InputStream fromFile(Path f) throws IOException {
InputStream fileInputStream = Files.newInputStream(f);
String fileName = f.getFileName().toString();
if (fileName.endsWith(".gz")) {
return new MultiMemberGZIPInputStream(fileInputStream, 8192);
//} else if (fileName.endsWith(".br")) {
// return new BrotliInputStream(fileInputStream);
} else if (fileName.endsWith(".zst")) {
return new WrappedZstdInputStream(fileInputStream);
} else {
return fileInputStream;
}
}

// Turn the MalformedInputException thrown by the ZstdInputStream into an IOException,
// which will be handled as a non-catastrophic error during TRAP import.
private static class WrappedZstdInputStream extends ZstdInputStream {
public WrappedZstdInputStream(InputStream in) {
super(in);
}

@Override
public int read() throws IOException {
try {
return super.read();
} catch (io.airlift.compress.MalformedInputException e) {
throw new IOException("Zstd stream decoding failed", e);
}
}

@Override
public int read(final byte[] outputBuffer, final int outputOffset, final int outputLength)
throws IOException {
try {
return super.read(outputBuffer, outputOffset, outputLength);
} catch (io.airlift.compress.MalformedInputException e) {
throw new IOException("Zstd stream decoding failed", e);
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ package com.github.codeql
import com.github.codeql.utils.versions.usesK2
import com.semmle.util.files.FileUtil
import com.semmle.util.trap.pathtransformers.PathTransformer
import io.airlift.compress.zstd.ZstdInputStream
import io.airlift.compress.zstd.ZstdOutputStream
import java.io.BufferedInputStream
import java.io.BufferedOutputStream
import java.io.BufferedReader
Expand Down Expand Up @@ -210,17 +212,17 @@ class KotlinExtractorExtension(
private fun getCompression(logger: Logger): Compression {
val compression_env_var = "CODEQL_EXTRACTOR_JAVA_OPTION_TRAP_COMPRESSION"
val compression_option = System.getenv(compression_env_var)
val defaultCompression = Compression.GZIP
val defaultCompression = Compression.ZSTD
if (compression_option == null) {
return defaultCompression
} else {
try {
val compression_option_upper = compression_option.uppercase()
if (compression_option_upper == "BROTLI") {
logger.warn(
"Kotlin extractor doesn't support Brotli compression. Using GZip instead."
"Kotlin extractor doesn't support Brotli compression. Using Zstandard instead."
)
return Compression.GZIP
return Compression.ZSTD
} else {
return Compression.valueOf(compression_option_upper)
}
Expand Down Expand Up @@ -470,6 +472,11 @@ enum class Compression(val extension: String) {
override fun bufferedWriter(file: File): BufferedWriter {
return GZIPOutputStream(file.outputStream()).bufferedWriter()
}
},
ZSTD(".zst") {
override fun bufferedWriter(file: File): BufferedWriter {
return ZstdOutputStream(file.outputStream()).bufferedWriter()
}
};

abstract fun bufferedWriter(file: File): BufferedWriter
Expand All @@ -483,6 +490,7 @@ private fun getTrapFileWriter(
return when (compression) {
Compression.NONE -> NonCompressedTrapFileWriter(logger, trapFileName)
Compression.GZIP -> GZipCompressedTrapFileWriter(logger, trapFileName)
Compression.ZSTD -> ZstdCompressedTrapFileWriter(logger, trapFileName)
}
}

Expand Down Expand Up @@ -582,3 +590,18 @@ private class GZipCompressedTrapFileWriter(logger: FileLogger, trapName: String)
)
}
}

private class ZstdCompressedTrapFileWriter(logger: FileLogger, trapName: String) :
TrapFileWriter(logger, trapName, ".zst") {
override protected fun getReader(file: File): BufferedReader {
return BufferedReader(
InputStreamReader(ZstdInputStream(BufferedInputStream(FileInputStream(file))))
)
}

override protected fun getWriter(file: File): BufferedWriter {
return BufferedWriter(
OutputStreamWriter(ZstdOutputStream(BufferedOutputStream(FileOutputStream(file))))
)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
os.environ['SEMMLE_PATH_TRANSFORMER'] = root + '/' + path_transformer_file

run_codeql_database_create(["kotlinc kotlin_source.kt"], lang="java")
files = ['test-db/trap/java/src/kotlin_source.kt.trap.gz', 'test-db/src/src/kotlin_source.kt']
files = ['test-db/trap/java/src/kotlin_source.kt.trap.zst', 'test-db/src/src/kotlin_source.kt']
exists = list(map(os.path.exists, files))
if exists != [True] * 2:
print(exists)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ def check_extensions_worker(counts, directory):
counts.count_gzip += 1
if not startsWith(x, b'\x1f\x8b'): # The GZip magic numbers
raise Exception("GZipped TRAP file that doesn't start with GZip magic numbers: " + f)
elif f.endswith('.trap.zst'):
counts.count_gzip += 1
if not startsWith(x, b'\x28\xb5\x2f\xfd'): # The Zstd magic numbers
raise Exception("Zstd-compressed TRAP file that doesn't start with Zstd magic numbers: " + f)

def startsWith(f, b):
with open(f, 'rb') as f_in:
Expand All @@ -58,6 +62,9 @@ def startsWith(f, b):
os.environ["CODEQL_EXTRACTOR_JAVA_OPTION_TRAP_COMPRESSION"] = "gzip"
run_codeql_database_create(['kotlinc test.kt'], test_db="gzip-db", db=None, lang="java")
check_extensions('gzip-db/trap', Counts(1, -1))
os.environ["CODEQL_EXTRACTOR_JAVA_OPTION_TRAP_COMPRESSION"] = "zstd"
run_codeql_database_create(['kotlinc test.kt'], test_db="zstd-db", db=None, lang="java")
check_extensions('zstd-db/trap', Counts(1, -1))
os.environ["CODEQL_EXTRACTOR_JAVA_OPTION_TRAP_COMPRESSION"] = "brotli"
run_codeql_database_create(['kotlinc test.kt'], test_db="brotli-db", db=None, lang="java")
check_extensions('brotli-db/trap', Counts(1, -1))
Expand Down
2 changes: 2 additions & 0 deletions javascript/extractor/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ java_library(
exports = [
"@semmle_code//extractor:html",
"@semmle_code//extractor:yaml",
"@semmle_code//resources/lib/java:aircompressor",
"@semmle_code//resources/lib/java:commons-compress",
"@semmle_code//resources/lib/java:gson",
"@semmle_code//resources/lib/java:jericho-html",
Expand All @@ -33,6 +34,7 @@ codeql_fat_jar(
"@semmle_code//extractor:html",
"@semmle_code//extractor:xml-trap-writer",
"@semmle_code//extractor:yaml",
"@semmle_code//resources/lib/java:aircompressor",
"@semmle_code//resources/lib/java:commons-compress",
"@semmle_code//resources/lib/java:gson",
"@semmle_code//resources/lib/java:jericho-html",
Expand Down