Skip to content

Commit

Permalink
Merge pull request #12484 from sberyozkin/tika_pdf_fonts
Browse files Browse the repository at this point in the history
Add Tika PDF FontBox native resources
  • Loading branch information
sberyozkin committed Oct 3, 2020
2 parents 64f01df + bb270e2 commit 832552a
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import io.quarkus.deployment.builditem.CapabilityBuildItem;
import io.quarkus.deployment.builditem.FeatureBuildItem;
import io.quarkus.deployment.builditem.nativeimage.NativeImageResourceBuildItem;
import io.quarkus.deployment.builditem.nativeimage.NativeImageResourceDirectoryBuildItem;
import io.quarkus.deployment.builditem.nativeimage.RuntimeInitializedClassBuildItem;
import io.quarkus.deployment.builditem.nativeimage.ServiceProviderBuildItem;
import io.quarkus.deployment.util.ServiceUtil;
Expand Down Expand Up @@ -85,10 +86,10 @@ public void registerTikaParsersResources(BuildProducer<NativeImageResourceBuildI
}

@BuildStep
public void registerPdfBoxResources(BuildProducer<NativeImageResourceBuildItem> resource) {
resource.produce(new NativeImageResourceBuildItem("org/apache/pdfbox/resources/glyphlist/additional.txt"));
resource.produce(new NativeImageResourceBuildItem("org/apache/pdfbox/resources/glyphlist/glyphlist.txt"));
resource.produce(new NativeImageResourceBuildItem("org/apache/pdfbox/resources/glyphlist/zapfdingbats.txt"));
public void registerPdfBoxResources(BuildProducer<NativeImageResourceDirectoryBuildItem> resource) {
resource.produce(new NativeImageResourceDirectoryBuildItem("org/apache/pdfbox/resources/glyphlist"));
resource.produce(new NativeImageResourceDirectoryBuildItem("org/apache/fontbox/cmap"));
resource.produce(new NativeImageResourceDirectoryBuildItem("org/apache/fontbox/unicode"));
}

@BuildStep
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import static io.restassured.RestAssured.given;
import static org.hamcrest.CoreMatchers.containsString;
import static org.hamcrest.CoreMatchers.is;
import static org.hamcrest.CoreMatchers.startsWith;

import java.io.ByteArrayOutputStream;
import java.io.InputStream;
Expand All @@ -29,6 +30,17 @@ public void testGetTextFromPdfFormat() throws Exception {
checkText("application/pdf", "pdf");
}

@Test
public void testGetTextFromPdfFormatWithFonts() throws Exception {
given()
.when().header("Content-Type", "application/pdf")
.body(readQuarkusFile("americanexpress.pdf"))
.post("/parse/text")
.then()
.statusCode(200)
.body(startsWith("American express card"));
}

@Test
public void testGetMetadataFromTextFormat() throws Exception {
checkMetadata("text/plain", "txt");
Expand Down

0 comments on commit 832552a

Please sign in to comment.