Skip to content

Commit

Permalink
Backport usage reporting improvements #7101 to AS3 (#7106)
Browse files Browse the repository at this point in the history
We only require Node 12 here but that's still enough for the zlib API
change.

We don't bother to add `signal` to the `apollo-server-env` RequestInit.
The integration test does show that it works with the default fetcher
(`node-fetch` via `apollo-server-env`).
  • Loading branch information
glasser committed Oct 31, 2022
1 parent 0e8d85f commit 8ca2c11
Show file tree
Hide file tree
Showing 6 changed files with 89 additions and 37 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Expand Up @@ -10,7 +10,13 @@ The version headers in this history reflect the versions of Apollo Server itself

## vNEXT

## v3.10.4

- `apollo-server-core`: Manage memory more efficiently in the usage reporting plugin by allowing large objects to be garbage collected more quickly. [PR #7106](https://github.com/apollographql/apollo-server/pull/7106)
- `apollo-server-core`: The usage reporting plugin now defaults to a 30 second timeout for each attempt to send reports to Apollo Server instead of no timeout; the timeout can be adjusted with the new `requestTimeoutMs` option to `ApolloServerPluginUsageReporting`. (Apollo's servers already enforced a 30 second timeout, so this is unlikely to break any existing use cases.) [PR #7106](https://github.com/apollographql/apollo-server/pull/7106)

## v3.10.3

- `apollo-server-core`: Fix memory leak in usage reporting plugin. [Issue #6983](https://github.com/apollographql/apollo-server/issues/6983) [PR #6999](https://github.com/apollographql/apollo-server/[Issue #6983](https://github.com/apollographql/apollo-server/issues/6983)pull/6999)

## v3.10.2
Expand Down
14 changes: 14 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions packages/apollo-server-core/package.json
Expand Up @@ -44,6 +44,7 @@
"graphql-tag": "^2.11.0",
"loglevel": "^1.6.8",
"lru-cache": "^6.0.0",
"node-abort-controller": "^3.0.1",
"sha.js": "^2.4.11",
"uuid": "^9.0.0",
"whatwg-mimetype": "^3.0.0"
Expand Down
Expand Up @@ -258,6 +258,12 @@ export interface ApolloServerPluginUsageReportingOptions<TContext> {
* Minimum back-off for retries. Defaults to 100ms.
*/
minimumRetryDelayMs?: number;
/**
* Default timeout for each individual attempt to send a report to Apollo.
* (This is for each HTTP POST, not for all potential retries.) Defaults to 30
* seconds (30000ms).
*/
requestTimeoutMs?: number;
/**
* A logger interface to be used for output and errors. When not provided
* it will default to the server's own `logger` implementation and use
Expand Down
72 changes: 44 additions & 28 deletions packages/apollo-server-core/src/plugin/usageReporting/plugin.ts
@@ -1,8 +1,10 @@
import os from 'os';
import { promisify } from 'util';
import { gzip } from 'zlib';
import retry from 'async-retry';
import { Report, ReportHeader, Trace } from 'apollo-reporting-protobuf';
import { Response, fetch, Headers } from 'apollo-server-env';
import { Response, fetch, Headers, RequestInit } from 'apollo-server-env';
import { AbortController } from 'node-abort-controller';
import type {
GraphQLRequestListener,
GraphQLServerListener,
Expand Down Expand Up @@ -38,6 +40,8 @@ import {
} from '@apollo/utils.usagereporting';
import type LRUCache from 'lru-cache';

const gzipPromise = promisify(gzip);

const reportHeaderDefaults = {
hostname: os.hostname(),
agentVersion: `apollo-server-core@${
Expand Down Expand Up @@ -238,7 +242,7 @@ export function ApolloServerPluginUsageReporting<TContext extends BaseContext>(

// Needs to be an arrow function to be confident that key is defined.
const sendReport = async (executableSchemaId: string): Promise<void> => {
const report = getAndDeleteReport(executableSchemaId);
let report = getAndDeleteReport(executableSchemaId);
if (
!report ||
(Object.keys(report.tracesPerQuery).length === 0 &&
Expand All @@ -255,9 +259,12 @@ export function ApolloServerPluginUsageReporting<TContext extends BaseContext>(

const protobufError = Report.verify(report);
if (protobufError) {
throw new Error(`Error encoding report: ${protobufError}`);
throw new Error(`Error verifying report: ${protobufError}`);
}
const message = Report.encode(report).finish();
let message: Uint8Array | null = Report.encode(report).finish();
// Let the original protobuf object be garbage collected (helpful if the
// HTTP request hangs).
report = null;

// Potential follow-up: we can compare message.length to
// report.sizeEstimator.bytes and use it to "learn" if our estimation is
Expand All @@ -282,35 +289,26 @@ export function ApolloServerPluginUsageReporting<TContext extends BaseContext>(
);
}

const compressed = await new Promise<Buffer>((resolve, reject) => {
// The protobuf library gives us a Uint8Array. Node 8's zlib lets us
// pass it directly; convert for the sake of Node 6. (No support right
// now for Node 4, which lacks Buffer.from.)
const messageBuffer = Buffer.from(
message.buffer as ArrayBuffer,
message.byteOffset,
message.byteLength,
);
gzip(messageBuffer, (err, gzipResult) => {
if (err) {
reject(err);
} else {
resolve(gzipResult);
}
});
});
const compressed = await gzipPromise(message);
// Let the uncompressed message be garbage collected (helpful if the
// HTTP request is slow).
message = null;

// Wrap fetcher with async-retry for automatic retrying
const fetcher = options.fetcher ?? fetch;
const response: Response = await retry(
// Retry on network errors and 5xx HTTP
// responses.
async () => {
const curResponse = await fetcher(
(options.endpointUrl ||
'https://usage-reporting.api.apollographql.com') +
'/api/ingress/traces',
{
// Note that once we require Node v16 we can use its global
// AbortController instead of the one from `node-abort-controller`.
const controller = new AbortController();
const abortTimeout = setTimeout(() => {
controller.abort();
}, options.requestTimeoutMs ?? 30_000);
let curResponse;
try {
const requestInit: RequestInit = {
method: 'POST',
headers: {
'user-agent': 'ApolloServerPluginUsageReporting',
Expand All @@ -320,8 +318,26 @@ export function ApolloServerPluginUsageReporting<TContext extends BaseContext>(
},
body: compressed,
agent: options.requestAgent,
},
);
};
// The apollo-server-env Fetch API doesn't have `signal` in
// RequestInit, but it does work in node-fetch. We've added it
// already to our `Fetcher` interface (`@apollo/utils.fetcher`)
// that we're using in AS4 but making changes to
// `apollo-server-env` that could cause custom AS3 fetchers to not
// compile feels like a bad idea. The worst case scenario of
// passing in an ignored `signal` is the timeout doesn't work, in
// which case you're not getting the new feature but can change
// your fetcher to make it work.
(requestInit as any).signal = controller.signal;
curResponse = await fetcher(
(options.endpointUrl ||
'https://usage-reporting.api.apollographql.com') +
'/api/ingress/traces',
requestInit,
);
} finally {
clearTimeout(abortTimeout);
}

if (curResponse.status >= 500 && curResponse.status < 600) {
throw new Error(
Expand Down
27 changes: 18 additions & 9 deletions packages/apollo-server-integration-testsuite/src/ApolloServer.ts
Expand Up @@ -1983,21 +1983,20 @@ export function testApolloServer<AS extends ApolloServerBase>(

describe('graphql server functions even when Apollo servers are down', () => {
async function testWithStatus(
status: number,
status: number | 'cannot-connect' | 'timeout',
expectedRequestCount: number,
) {
const networkError = status === 0;

const { closeServer, fakeUsageReportingUrl, writeResponseResolve } =
await makeFakeUsageReportingServer({
status,
// the 444 case shouldn't ever get to actually sending 444
status: typeof status === 'number' ? status : 444,
waitWriteResponse: true,
});

try {
// To simulate a network error, we create and close the server.
// This lets us still generate a port that is hopefully unused.
if (networkError) {
if (status == 'cannot-connect') {
await closeServer();
}

Expand Down Expand Up @@ -2034,6 +2033,8 @@ export function testApolloServer<AS extends ApolloServerBase>(
reportErrorFunction(error: Error) {
reportErrorPromiseResolve(error);
},
// Make sure the timeout test actually finishes in time
requestTimeoutMs: status === 'timeout' ? 10 : undefined,
}),
],
});
Expand All @@ -2049,27 +2050,32 @@ export function testApolloServer<AS extends ApolloServerBase>(
});
expect(result.data.something).toBe('hello');

if (!networkError) {
if (typeof status === 'number') {
// Allow reporting to return its response (for every retry).
// (But not if we're intentionally timing out!)
writeResponseResolve();
}

// Make sure we can get the error from reporting.
const sendingError = await reportErrorPromise;
expect(sendingError).toBeTruthy();
if (networkError) {
if (status === 'cannot-connect') {
expect(sendingError.message).toContain(
'Error sending report to Apollo servers',
);
expect(sendingError.message).toContain('ECONNREFUSED');
} else if (status === 'timeout') {
expect(sendingError.message).toBe(
'Error sending report to Apollo servers: The user aborted a request.',
);
} else {
expect(sendingError.message).toBe(
`Error sending report to Apollo servers: HTTP status ${status}, Important text in the body`,
);
}
expect(requestCount).toBe(expectedRequestCount);
} finally {
if (!networkError) {
if (status !== 'cannot-connect') {
await closeServer();
}
}
Expand All @@ -2079,7 +2085,10 @@ export function testApolloServer<AS extends ApolloServerBase>(
await testWithStatus(500, 3);
});
it('with network error', async () => {
await testWithStatus(0, 3);
await testWithStatus('cannot-connect', 3);
});
it('with timeout', async () => {
await testWithStatus('timeout', 3);
});
it('with non-retryable error', async () => {
await testWithStatus(400, 1);
Expand Down

0 comments on commit 8ca2c11

Please sign in to comment.