Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tika 4237 add jwt authentication ability to the http fetcher #1712

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 6 additions & 1 deletion tika-pipes/tika-fetchers/tika-fetcher-http/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@
<artifactId>tika-httpclient-commons</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.nimbusds</groupId>
<artifactId>nimbus-jose-jwt</artifactId>
<version>9.5</version>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>tika-core</artifactId>
Expand Down Expand Up @@ -127,4 +132,4 @@
<scm>
<tag>3.0.0-BETA-rc1</tag>
</scm>
</project>
</project>
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.security.PrivateKey;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
Expand All @@ -36,6 +37,7 @@
import java.util.TimerTask;
import java.util.concurrent.atomic.AtomicBoolean;

import com.nimbusds.jose.JOSEException;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.output.UnsynchronizedByteArrayOutputStream;
import org.apache.http.ConnectionClosedException;
Expand Down Expand Up @@ -69,6 +71,9 @@
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.pipes.fetcher.AbstractFetcher;
import org.apache.tika.pipes.fetcher.RangeFetcher;
import org.apache.tika.pipes.fetcher.http.jwt.JwtGenerator;
import org.apache.tika.pipes.fetcher.http.jwt.JwtPrivateKeyCreds;
import org.apache.tika.pipes.fetcher.http.jwt.JwtSecretCreds;
import org.apache.tika.utils.StringUtils;

/**
Expand Down Expand Up @@ -128,13 +133,20 @@ public class HttpFetcher extends AbstractFetcher implements Initializable, Range
private int maxErrMsgSize = 10000;

//httpHeaders to capture in the metadata
private Set<String> httpHeaders = new HashSet<>();
private final Set<String> httpHeaders = new HashSet<>();

private String jwtIssuer;
private String jwtSubject;
private int jwtExpiresInSeconds;
private String jwtSecret;
private String jwtPrivateKeyBase64;

JwtGenerator jwtGenerator;

//When making the request, what User-Agent is sent.
//By default httpclient adds e.g. "Apache-HttpClient/4.5.13 (Java/x.y.z)"
private String userAgent = null;


@Override
public InputStream fetch(String fetchKey, Metadata metadata) throws IOException, TikaException {
HttpGet get = new HttpGet(fetchKey);
Expand All @@ -143,19 +155,28 @@ public InputStream fetch(String fetchKey, Metadata metadata) throws IOException,
.setMaxRedirects(maxRedirects)
.setRedirectsEnabled(true).build();
get.setConfig(requestConfig);
if (! StringUtils.isBlank(userAgent)) {
populateHeaders(get);
return execute(get, metadata, httpClient, true);
}

private void populateHeaders(HttpGet get) throws TikaException {
if (!StringUtils.isBlank(userAgent)) {
get.setHeader(USER_AGENT, userAgent);
}
return execute(get, metadata, httpClient, true);
if (jwtGenerator != null) {
try {
get.setHeader("Authorization", "Bearer " + jwtGenerator.jwt());
} catch (JOSEException e) {
throw new TikaException("Could not generate JWT", e);
}
}
}

@Override
public InputStream fetch(String fetchKey, long startRange, long endRange, Metadata metadata)
throws IOException {
throws IOException, TikaException {
HttpGet get = new HttpGet(fetchKey);
if (! StringUtils.isBlank(userAgent)) {
get.setHeader(USER_AGENT, userAgent);
}
populateHeaders(get);
get.setHeader("Range", "bytes=" + startRange + "-" + endRange);
return execute(get, metadata, httpClient, true);
}
Expand Down Expand Up @@ -437,17 +458,75 @@ public void setUserAgent(String userAgent) {
this.userAgent = userAgent;
}

public String getJwtIssuer() {
return jwtIssuer;
}

@Field
public void setJwtIssuer(String jwtIssuer) {
this.jwtIssuer = jwtIssuer;
}

public String getJwtSubject() {
return jwtSubject;
}

@Field
public void setJwtSubject(String jwtSubject) {
this.jwtSubject = jwtSubject;
}

public int getJwtExpiresInSeconds() {
return jwtExpiresInSeconds;
}

@Field
public void setJwtExpiresInSeconds(int jwtExpiresInSeconds) {
this.jwtExpiresInSeconds = jwtExpiresInSeconds;
}

public String getJwtSecret() {
return jwtSecret;
}

@Field
public void setJwtSecret(String jwtSecret) {
this.jwtSecret = jwtSecret;
}

public String getJwtPrivateKeyBase64() {
return jwtPrivateKeyBase64;
}

@Field
public void setJwtPrivateKeyBase64(String jwtPrivateKeyBase64) {
this.jwtPrivateKeyBase64 = jwtPrivateKeyBase64;
}

@Override
public void initialize(Map<String, Param> params) throws TikaConfigException {
httpClient = httpClientFactory.build();
HttpClientFactory cp = httpClientFactory.copy();
cp.setDisableContentCompression(true);
noCompressHttpClient = cp.build();
if (!StringUtils.isBlank(jwtPrivateKeyBase64)) {
PrivateKey key = JwtPrivateKeyCreds.convertBase64ToPrivateKey(jwtPrivateKeyBase64);
jwtGenerator = new JwtGenerator(new JwtPrivateKeyCreds(key, jwtIssuer, jwtSubject,
jwtExpiresInSeconds));
} else if (!StringUtils.isBlank(jwtSecret)) {
jwtGenerator = new JwtGenerator(new JwtSecretCreds(jwtSecret.getBytes(StandardCharsets.UTF_8),
jwtIssuer,
jwtSubject, jwtExpiresInSeconds));
}
}

@Override
public void checkInitialization(InitializableProblemHandler problemHandler)
throws TikaConfigException {
if (!StringUtils.isBlank(jwtSecret) && !StringUtils.isBlank(jwtPrivateKeyBase64)) {
throw new TikaConfigException("Both JWT secret and JWT private key base 64 were " +
"specified. Only one or the other is supported");
}
}

// For test purposes
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package org.apache.tika.pipes.fetcher.http.jwt;

public abstract class JwtCreds {
private final String issuer;
private final String subject;
private final int expiresInSeconds;

public JwtCreds(String issuer, String subject, int expiresInSeconds) {
this.issuer = issuer;
this.subject = subject;
this.expiresInSeconds = expiresInSeconds;
}

public String getIssuer() {
return issuer;
}

public String getSubject() {
return subject;
}

public int getExpiresInSeconds() {
return expiresInSeconds;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
package org.apache.tika.pipes.fetcher.http.jwt;

import java.time.Instant;
import java.time.temporal.ChronoUnit;
import java.util.Date;

import com.nimbusds.jose.JOSEException;
import com.nimbusds.jose.JWSAlgorithm;
import com.nimbusds.jose.JWSHeader;
import com.nimbusds.jose.JWSSigner;
import com.nimbusds.jose.crypto.MACSigner;
import com.nimbusds.jose.crypto.RSASSASigner;
import com.nimbusds.jwt.JWTClaimsSet;
import com.nimbusds.jwt.SignedJWT;

public class JwtGenerator {
nddipiazza marked this conversation as resolved.
Show resolved Hide resolved
JwtCreds jwtCreds;
public JwtGenerator(JwtCreds jwtCreds) {
this.jwtCreds = jwtCreds;
}

public String jwt() throws JOSEException {
if (jwtCreds instanceof JwtSecretCreds) {
return jwtHS256((JwtSecretCreds) jwtCreds);
} else {
return jwtRS256((JwtPrivateKeyCreds) jwtCreds);
}
}

String jwtHS256(JwtSecretCreds jwtSecretCreds)
throws JOSEException {
JWSSigner signer = new MACSigner(jwtSecretCreds.getSecret());

JWTClaimsSet claimsSet = getJwtClaimsSet(jwtSecretCreds.getIssuer(),
jwtSecretCreds.getSubject(), jwtSecretCreds.getExpiresInSeconds());

SignedJWT signedJWT = new SignedJWT(new JWSHeader(JWSAlgorithm.HS256), claimsSet);
signedJWT.sign(signer);

return signedJWT.serialize();
}

String jwtRS256(JwtPrivateKeyCreds jwtPrivateKeyCreds)
throws JOSEException {
JWSSigner signer = new RSASSASigner(jwtPrivateKeyCreds.getPrivateKey());

JWTClaimsSet claimsSet = getJwtClaimsSet(jwtPrivateKeyCreds.getIssuer(),
jwtPrivateKeyCreds.getSubject(), jwtPrivateKeyCreds.getExpiresInSeconds());

SignedJWT signedJWT = new SignedJWT(new JWSHeader(JWSAlgorithm.RS256), claimsSet);

signedJWT.sign(signer);

return signedJWT.serialize();
}

private JWTClaimsSet getJwtClaimsSet(String issuer, String subject, int expiresInSeconds) {
return new JWTClaimsSet.Builder()
.subject(subject)
.issuer(issuer)
.expirationTime(Date.from(Instant.now().plus(expiresInSeconds, ChronoUnit.SECONDS)))
.build();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package org.apache.tika.pipes.fetcher.http.jwt;

import java.security.KeyFactory;
import java.security.NoSuchAlgorithmException;
import java.security.PrivateKey;
import java.security.spec.InvalidKeySpecException;
import java.security.spec.PKCS8EncodedKeySpec;
import java.util.Base64;

import org.apache.tika.exception.TikaConfigException;

public class JwtPrivateKeyCreds extends JwtCreds {
private final PrivateKey privateKey;
public JwtPrivateKeyCreds(PrivateKey privateKey, String issuer, String subject,
int expiresInSeconds) {
super(issuer, subject, expiresInSeconds);
this.privateKey = privateKey;
}

public PrivateKey getPrivateKey() {
return privateKey;
}

public static String convertPrivateKeyToBase64(PrivateKey privateKey) {
// Get the encoded form of the private key
byte[] privateKeyEncoded = privateKey.getEncoded();
// Encode the byte array using Base64
return Base64.getEncoder().encodeToString(privateKeyEncoded);
}

public static PrivateKey convertBase64ToPrivateKey(String privateKeyBase64)
throws TikaConfigException {
try {
byte[] privateKeyEncoded = Base64.getDecoder().decode(privateKeyBase64);

KeyFactory keyFactory = KeyFactory.getInstance("RSA");
PKCS8EncodedKeySpec keySpec = new PKCS8EncodedKeySpec(privateKeyEncoded);
return keyFactory.generatePrivate(keySpec);
} catch (NoSuchAlgorithmException | InvalidKeySpecException e) {
throw new TikaConfigException("Could not convert private key base64 to PrivateKey", e);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package org.apache.tika.pipes.fetcher.http.jwt;

public class JwtSecretCreds extends JwtCreds {
private final byte[] secret;
public JwtSecretCreds(byte[] secret, String issuer, String subject, int expiresInSeconds) {
super(issuer, subject, expiresInSeconds);
this.secret = secret;
}

public byte[] getSecret() {
return secret;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
import java.security.SecureRandom;
import java.util.Collections;
import java.util.zip.GZIPInputStream;

Expand All @@ -44,6 +45,7 @@
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.mockito.Mockito;

import org.apache.tika.TikaTest;
import org.apache.tika.client.HttpClientFactory;
Expand All @@ -52,6 +54,7 @@
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.pipes.fetcher.FetcherManager;
import org.apache.tika.pipes.fetcher.http.jwt.JwtGenerator;

public class HttpFetcherTest extends TikaTest {

Expand Down Expand Up @@ -98,6 +101,30 @@ public void test4xxResponse() throws Exception {
assertEquals(TEST_URL, meta.get("http-connection:target-url"));
}

@Test
public void testJwt() throws Exception {
byte[] randomBytes = new byte[32];
new SecureRandom().nextBytes(randomBytes);

httpFetcher.jwtGenerator = Mockito.mock(JwtGenerator.class);

final Metadata meta = new Metadata();
meta.set(TikaCoreProperties.RESOURCE_NAME_KEY, "fileName");

try (final InputStream ignored = httpFetcher.fetch(TEST_URL, meta)) {
// HTTP headers added into meta
assertEquals("200", meta.get("http-header:status-code"));
assertEquals(TEST_URL, meta.get("http-connection:target-url"));
// Content size included in meta
assertEquals("15", meta.get("Content-Length"));

// Filename passed in should be preserved
assertEquals("fileName", meta.get(TikaCoreProperties.RESOURCE_NAME_KEY));
}

Mockito.verify(httpFetcher.jwtGenerator).jwt();
}

@Test
@Disabled("requires network connectivity")
public void testRedirect() throws Exception {
Expand Down