Skip to content

Commit

Permalink
Merge pull request #850 from attilapuskas/feature/reduce-file-access
Browse files Browse the repository at this point in the history
Reduce scan time by avoid having redundant file access
  • Loading branch information
lukehutch committed Apr 17, 2024
2 parents 3be2f77 + dac1321 commit b92a251
Show file tree
Hide file tree
Showing 5 changed files with 241 additions and 95 deletions.
158 changes: 85 additions & 73 deletions src/main/java/io/github/classgraph/ClasspathElementDir.java
Expand Up @@ -37,6 +37,7 @@
import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.attribute.BasicFileAttributes;
import java.nio.file.attribute.PosixFileAttributes;
import java.nio.file.attribute.PosixFilePermission;
import java.util.ArrayList;
Expand Down Expand Up @@ -110,17 +111,20 @@ void open(final WorkQueue<ClasspathEntryWorkUnit> workQueue, final LogNode log)
final Path libDirPath = classpathEltPath.resolve(libDirPrefix);
if (FileUtils.canReadAndIsDir(libDirPath)) {
// Add all jarfiles within the lib dir as child classpath entries
try (DirectoryStream<Path> stream = Files.newDirectoryStream(libDirPath)) {
try (DirectoryStream<Path> stream = Files.newDirectoryStream(libDirPath, new DirectoryStream.Filter<Path>() {
@Override
public boolean accept(Path filePath) {
return filePath.toString().toLowerCase().endsWith(".jar") && Files.isRegularFile(filePath);
}
})) {
for (final Path filePath : stream) {
if (Files.isRegularFile(filePath) && filePath.toString().toLowerCase().endsWith(".jar")) {
if (log != null) {
log(classpathElementIdx, "Found lib jar: " + filePath, log);
}
workQueue.addWorkUnit(new ClasspathEntryWorkUnit(filePath, getClassLoader(),
/* parentClasspathElement = */ this,
/* orderWithinParentClasspathElement = */ childClasspathEntryIdx++,
/* packageRootPrefix = */ ""));
if (log != null) {
log(classpathElementIdx, "Found lib jar: " + filePath, log);
}
workQueue.addWorkUnit(new ClasspathEntryWorkUnit(filePath, getClassLoader(),
/* parentClasspathElement = */ this,
/* orderWithinParentClasspathElement = */ childClasspathEntryIdx++,
/* packageRootPrefix = */ ""));
}
} catch (final IOException e) {
// Ignore -- thrown by Files.newDirectoryStream
Expand Down Expand Up @@ -156,24 +160,29 @@ void open(final WorkQueue<ClasspathEntryWorkUnit> workQueue, final LogNode log)
*
* @param resourcePath
* the {@link Path} for the resource
* @param nestedJarHandler
* the nested jar handler
* @return the resource
*/
private Resource newResource(final Path resourcePath, final NestedJarHandler nestedJarHandler) {
long length;
try {
length = Files.size(resourcePath);
} catch (IOException | SecurityException e) {
length = -1L;
}
return new Resource(this, length) {
private Resource newResource(final Path resourcePath, final BasicFileAttributes attributes) {
final int notYetLoadedLength = -2;
return new Resource(this, attributes == null ? notYetLoadedLength : attributes.size()) {
/** The {@link PathSlice} opened on the file. */
private PathSlice pathSlice;

/** True if the resource is open. */
private final AtomicBoolean isOpen = new AtomicBoolean();

@Override
public long getLength() {
if (length == notYetLoadedLength) {
try {
length = Files.size(resourcePath);
} catch (IOException | SecurityException e) {
length = -1;
}
}
return length;
}

@Override
public String getPath() {
String path = FastPathResolver.resolve(classpathEltPath.relativize(resourcePath).toString());
Expand All @@ -191,7 +200,7 @@ public String getPathRelativeToClasspathElement() {
@Override
public long getLastModified() {
try {
return resourcePath.toFile().lastModified();
return attributes == null ? resourcePath.toFile().lastModified() : attributes.lastModifiedTime().toMillis();
} catch (final UnsupportedOperationException e) {
return 0L;
}
Expand All @@ -202,8 +211,12 @@ public long getLastModified() {
public Set<PosixFilePermission> getPosixFilePermissions() {
Set<PosixFilePermission> posixFilePermissions = null;
try {
posixFilePermissions = Files.readAttributes(resourcePath, PosixFileAttributes.class)
.permissions();
if (attributes instanceof PosixFileAttributes) {
posixFilePermissions = ((PosixFileAttributes) attributes).permissions();
} else {
posixFilePermissions = Files.readAttributes(resourcePath, PosixFileAttributes.class)
.permissions();
}
} catch (UnsupportedOperationException | IOException | SecurityException e) {
// POSIX attributes not supported
}
Expand All @@ -212,60 +225,36 @@ public Set<PosixFilePermission> getPosixFilePermissions() {

@Override
public ByteBuffer read() throws IOException {
if (skipClasspathElement) {
// Shouldn't happen
throw new IOException("Parent directory could not be opened");
}
if (isOpen.getAndSet(true)) {
throw new IOException(
"Resource is already open -- cannot open it again without first calling close()");
}
pathSlice = new PathSlice(resourcePath, nestedJarHandler);
length = pathSlice.sliceLength;
checkSkipState();
openAndCreateSlice();
byteBuffer = pathSlice.read();
return byteBuffer;
}

@Override
ClassfileReader openClassfile() throws IOException {
if (skipClasspathElement) {
// Shouldn't happen
throw new IOException("Parent directory could not be opened");
}
if (isOpen.getAndSet(true)) {
throw new IOException(
"Resource is already open -- cannot open it again without first calling close()");
}
checkSkipState();
// Classfile won't be compressed, so wrap it in a new PathSlice and then open it
pathSlice = new PathSlice(resourcePath, nestedJarHandler);
length = pathSlice.sliceLength;
openAndCreateSlice();
return new ClassfileReader(pathSlice, this);
}

@Override
public InputStream open() throws IOException {
if (skipClasspathElement) {
// Shouldn't happen
throw new IOException("Parent directory could not be opened");
}
if (isOpen.getAndSet(true)) {
throw new IOException(
"Resource is already open -- cannot open it again without first calling close()");
}
pathSlice = new PathSlice(resourcePath, nestedJarHandler);
checkSkipState();
openAndCreateSlice();
inputStream = pathSlice.open(this);
length = pathSlice.sliceLength;
return inputStream;
}

@Override
public byte[] load() throws IOException {
read();
try (Resource res = this) { // Close this after use
pathSlice = new PathSlice(resourcePath, nestedJarHandler);
final byte[] bytes = pathSlice.load();
res.length = bytes.length;
return bytes;
checkSkipState();
try {
openAndCreateSlice();
return pathSlice.load();
} finally {
close();
}
}

Expand All @@ -286,6 +275,22 @@ public void close() {
super.close();
}
}

private void checkSkipState() throws IOException {
if (skipClasspathElement) {
// Shouldn't happen
throw new IOException("Parent directory could not be opened");
}
}

private void openAndCreateSlice() throws IOException {
if (isOpen.getAndSet(true)) {
throw new IOException(
"Resource is already open -- cannot open it again without first calling close()");
}
pathSlice = new PathSlice(resourcePath, false, 0L, nestedJarHandler, false);
length = pathSlice.sliceLength;
}
};
}

Expand All @@ -300,7 +305,7 @@ public void close() {
@Override
Resource getResource(final String relativePath) {
final Path resourcePath = classpathEltPath.resolve(relativePath);
return FileUtils.canReadAndIsFile(resourcePath) ? newResource(resourcePath, nestedJarHandler) : null;
return FileUtils.canReadAndIsFile(resourcePath) ? newResource(resourcePath, null) : null;
}

/**
Expand Down Expand Up @@ -396,16 +401,19 @@ private void scanPathRecursively(final Path path, final LogNode log) {
return;
}
Collections.sort(pathsInDir);
FileUtils.FileAttributesGetter getFileAttributes = FileUtils.createCachedAttributesGetter();

// Determine whether this is a modular jar running under JRE 9+
final boolean isModularJar = VersionFinder.JAVA_MAJOR_VERSION >= 9 && getModuleName() != null;

// Only scan files in directory if directory is not only an ancestor of an accepted path
if (parentMatchStatus != ScanSpecPathMatch.ANCESTOR_OF_ACCEPTED_PATH) {
// Do preorder traversal (files in dir, then subdirs), to reduce filesystem cache misses
for (final Path subPath : pathsInDir) {
for (final Path subPath : new ArrayList<>(pathsInDir)) {
// Process files in dir before recursing
if (Files.isRegularFile(subPath)) {
BasicFileAttributes fileAttributes = getFileAttributes.get(subPath);
if (fileAttributes.isRegularFile()) {
pathsInDir.remove(subPath);
final Path subPathRelative = classpathEltPath.relativize(subPath);
final String subPathRelativeStr = FastPathResolver.resolve(subPathRelative.toString());
// If this is a modular jar, ignore all classfiles other than "module-info.class" in the
Expand All @@ -426,12 +434,12 @@ private void scanPathRecursively(final Path path, final LogNode log) {
|| (parentMatchStatus == ScanSpecPathMatch.AT_ACCEPTED_CLASS_PACKAGE
&& scanSpec.classfileIsSpecificallyAccepted(subPathRelativeStr))) {
// Resource is accepted
final Resource resource = newResource(subPath, nestedJarHandler);
final Resource resource = newResource(subPath, fileAttributes);
addAcceptedResource(resource, parentMatchStatus, /* isClassfileOnly = */ false, subLog);

// Save last modified time
try {
fileToLastModified.put(subPath.toFile(), subPath.toFile().lastModified());
fileToLastModified.put(subPath.toFile(), fileAttributes.lastModifiedTime().toMillis());
} catch (final UnsupportedOperationException e) {
// Ignore
}
Expand All @@ -444,23 +452,27 @@ private void scanPathRecursively(final Path path, final LogNode log) {
}
} else if (scanSpec.enableClassInfo && dirRelativePathStr.equals("/")) {
// Always check for module descriptor in package root, even if package root isn't in accept
for (final Path subPath : pathsInDir) {
if (subPath.getFileName().toString().equals("module-info.class") && Files.isRegularFile(subPath)) {
final Resource resource = newResource(subPath, nestedJarHandler);
addAcceptedResource(resource, parentMatchStatus, /* isClassfileOnly = */ true, subLog);
try {
fileToLastModified.put(subPath.toFile(), subPath.toFile().lastModified());
} catch (final UnsupportedOperationException e) {
// Ignore
for (final Path subPath : new ArrayList<>(pathsInDir)) {
if (subPath.getFileName().toString().equals("module-info.class")) {
BasicFileAttributes fileAttributes = getFileAttributes.get(subPath);
if (fileAttributes.isRegularFile()) {
pathsInDir.remove(subPath);
final Resource resource = newResource(subPath, fileAttributes);
addAcceptedResource(resource, parentMatchStatus, /* isClassfileOnly = */ true, subLog);
try {
fileToLastModified.put(subPath.toFile(), fileAttributes.lastModifiedTime().toMillis());
} catch (final UnsupportedOperationException e) {
// Ignore
}
break;
}
break;
}
}
}
// Recurse into subdirectories
for (final Path subPath : pathsInDir) {
try {
if (Files.isDirectory(subPath)) {
if (FileUtils.isDir(subPath)) {
scanPathRecursively(subPath, subLog);
}
} catch (final SecurityException e) {
Expand Down
23 changes: 15 additions & 8 deletions src/main/java/io/github/classgraph/Scanner.java
Expand Up @@ -34,10 +34,14 @@
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.file.FileSystem;
import java.nio.file.FileSystemNotFoundException;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.InvalidPathException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.AbstractMap.SimpleEntry;
import java.util.ArrayList;
import java.util.Collection;
Expand Down Expand Up @@ -548,16 +552,19 @@ public void processWorkUnit(final ClasspathEntryWorkUnit workUnit,
throw new IOException("Ignoring JrtFS filesystem path "
+ "(modules are scanned using the JPMS API): " + path);
}
if (FileUtils.canReadAndIsFile(path)) {
// classpathEntObj is a Path which points to a file, so it must be a jar
isJar = true;
} else if (FileUtils.canReadAndIsDir(path)) {
// classpathEntObj is a Path which points to a dir
isJar = false;
} else if (!FileUtils.canRead(path)) {
if (!FileUtils.canRead(path)) {
throw new IOException("Cannot read path: " + path);
} else {
throw new IOException("Not a file or directory: " + path);
BasicFileAttributes attributes = Files.readAttributes(path, BasicFileAttributes.class);
if (attributes.isRegularFile()) {
// classpathEntObj is a Path which points to a file, so it must be a jar
isJar = true;
} else if (attributes.isDirectory()) {
// classpathEntObj is a Path which points to a dir
isJar = false;
} else {
throw new IOException("Not a file or directory: " + path);
}
}
} else {
// Should not happen
Expand Down
Expand Up @@ -78,10 +78,6 @@ class PhysicalZipFile {
PhysicalZipFile(final File file, final NestedJarHandler nestedJarHandler, final LogNode log)
throws IOException {
this.nestedJarHandler = nestedJarHandler;

// Make sure the File is readable and is a regular file
FileUtils.checkCanReadAndIsFile(file);

this.file = file;
this.pathStr = FastPathResolver.resolve(FileUtils.currDirPath(), file.getPath());
this.slice = new FileSlice(file, nestedJarHandler, log);
Expand All @@ -102,10 +98,6 @@ class PhysicalZipFile {
PhysicalZipFile(final Path path, final NestedJarHandler nestedJarHandler, final LogNode log)
throws IOException {
this.nestedJarHandler = nestedJarHandler;

// Make sure the File is readable and is a regular file
FileUtils.checkCanReadAndIsFile(path);

this.path = path;
this.pathStr = FastPathResolver.resolve(FileUtils.currDirPath(), path.toString());
this.slice = new PathSlice(path, nestedJarHandler);
Expand Down
29 changes: 27 additions & 2 deletions src/main/java/nonapi/io/github/classgraph/fileslice/PathSlice.java
Expand Up @@ -112,10 +112,35 @@ private PathSlice(final PathSlice parentSlice, final long offset, final long len
*/
public PathSlice(final Path path, final boolean isDeflatedZipEntry, final long inflatedLengthHint,
final NestedJarHandler nestedJarHandler) throws IOException {
this(path, isDeflatedZipEntry, inflatedLengthHint, nestedJarHandler, true);
}

/**
* Constructor for toplevel file slice.
*
* @param path
* the path
* @param isDeflatedZipEntry
* true if this is a deflated zip entry
* @param inflatedLengthHint
* the uncompressed size of a deflated zip entry, or
* -1 if unknown, or 0 of this is not a deflated
* zip entry.
* @param nestedJarHandler
* the nested jar handler
* @param checkAccess
* whether it is needed to check read access and if it is a file
* @throws IOException
* if the file cannot be opened.
*/
public PathSlice(final Path path, final boolean isDeflatedZipEntry, final long inflatedLengthHint,
final NestedJarHandler nestedJarHandler, boolean checkAccess) throws IOException {
super(0L, isDeflatedZipEntry, inflatedLengthHint, nestedJarHandler);

// Make sure the File is readable and is a regular file
FileUtils.checkCanReadAndIsFile(path);
if (checkAccess) {
// Make sure the File is readable and is a regular file
FileUtils.checkCanReadAndIsFile(path);
}

this.path = path;
this.fileChannel = FileChannel.open(path, StandardOpenOption.READ);
Expand Down

0 comments on commit b92a251

Please sign in to comment.