Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

E2E testing for local images #602

Merged
merged 9 commits into from
Feb 1, 2023
100 changes: 76 additions & 24 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,36 +1,43 @@
# This Makefile contains common development / build commands for Package Analysis. For everything to work properly, it needs to be kept in the top-level project directory.

REGISTRY := gcr.io/ossf-malware-analysis

# Get absolute path to top-level package analysis project directory
# outermost abspath removes the trailing slash from the directory path
PREFIX := $(abspath $(dir $(abspath $(lastword $(MAKEFILE_LIST)))))
SANDBOX_DIR := $(PREFIX)/sandboxes

#
# This is just the old 'build everything script'
#
.PHONY: legacy_build_docker
legacy_build_docker:
bash build/build_docker.sh
# Registry for Docker images built and used by package analysis
REGISTRY := gcr.io/ossf-malware-analysis

# If RELEASE_TAG environment variable is not specified, images will be tagged
# as 'latest' which is equivalent to just tagging without specifying a version
TAG := ${RELEASE_TAG}
ifeq ($(TAG), )
TAG := latest
BUILD_ARG=
else
# pass tag into analysis image build
BUILD_ARG=--build-arg=SANDBOX_IMAGE_TAG=$(TAG)
endif

.PHONY: all
all: docker_build_all

#
# These recipes build all the top-level docker images
# TODO grab release tag from env vars

docker_build_%_image:
docker build -t ${REGISTRY}/$(IMAGE_NAME) -f $(DOCKERFILE) $(DIR)
@# if TAG is 'latest', the two -t arguments are equivalent and do the same thing
docker build $(BUILD_ARG) -t ${REGISTRY}/$(IMAGE_NAME) -t ${REGISTRY}/$(IMAGE_NAME):$(TAG) -f $(DOCKERFILE) $(DIR)

#
# These build the sandbox images and also update (sync) them locally
# from Docker to podman. This is needed for local analyses; in order
# to use these updated images, pass 'nopull' to run_analysis.sh
#
docker_build_%_sandbox:
docker build -t ${REGISTRY}/$(IMAGE_NAME) -f $(DOCKERFILE) $(DIR)
sudo buildah pull docker-daemon:${REGISTRY}/${IMAGE_NAME}:latest
@# if TAG is 'latest', the two -t arguments are equivalent and do the same thing
docker build -t ${REGISTRY}/$(IMAGE_NAME) -t ${REGISTRY}/$(IMAGE_NAME):$(TAG) -f $(DOCKERFILE) $(DIR)
sudo buildah pull docker-daemon:${REGISTRY}/${IMAGE_NAME}:$(TAG)

docker_build_analysis_image: DIR=$(PREFIX)
docker_build_analysis_image: DOCKERFILE=$(PREFIX)/cmd/analyze/Dockerfile
Expand Down Expand Up @@ -70,6 +77,37 @@ docker_build_all_sandboxes: docker_build_node_sandbox docker_build_python_sandbo
.PHONY: docker_build_all
docker_build_all: docker_build_all_sandboxes docker_build_analysis_image docker_build_scheduler_image

#
# Builds then pushes analysis and sandbox images
#

docker_push_%:
docker push --all-tags ${REGISTRY}/$(IMAGE_NAME)

docker_push_analysis_image: docker_build_analysis_image
docker_push_scheduler_image: docker_build_scheduler_image
docker_push_node_sandbox: docker_build_node_sandbox
docker_push_python_sandbox: docker_build_python_sandbox
docker_push_ruby_sandbox: docker_build_ruby_sandbox
docker_push_packagist_sandbox: docker_build_packagist_sandbox
docker_push_crates_sandbox: docker_build_crates_sandbox
docker_push_static_analysis_sandbox: docker_build_static_analysis_sandbox

docker_push_analysis_image: IMAGE_NAME=analysis
docker_push_scheduler_image: IMAGE_NAME=scheduler
docker_push_node_sandbox: IMAGE_NAME=node
docker_push_python_sandbox: IMAGE_NAME=python
docker_push_ruby_sandbox: IMAGE_NAME=ruby
docker_push_packagist_sandbox: IMAGE_NAME=packagist
docker_push_crates_sandbox: IMAGE_NAME=crates.io
docker_push_static_analysis_sandbox: IMAGE_NAME=static-analysis

.PHONY: docker_push_all_sandboxes
docker_push_all_sandboxes: docker_push_node_sandbox docker_push_python_sandbox docker_push_ruby_sandbox docker_push_packagist_sandbox docker_push_crates_sandbox docker_push_static_analysis_sandbox

.PHONY: docker_push_all
docker_push_all: docker_push_all_sandboxes docker_push_analysis_image docker_push_scheduler_image


#
# This runs a lint check on all shell scripts in the repo
Expand All @@ -87,22 +125,36 @@ run:
# These recipes control docker-compose, which is used for
# end-to-end testing of the complete scheduler/worker system
#
.PHONY: docker_compose_start
docker_compose_start:
cd ./examples/e2e && docker-compose up -d
sleep 3
curl localhost:8080
E2E_TEST_DIR := ./test/e2e

.PHONY: e2e_test_start
e2e_test_start:
cd $(E2E_TEST_DIR) && docker-compose up -d
@echo
@echo "To see analysis results, go to http://localhost:9000/minio/package-analysis"
@echo "Remember to run `make docker_compose_stop` when done!"
@echo "Remember to run 'make e2e_test_stop' when done!"
sleep 5
curl localhost:8080

.PHONY: e2e_test_stop
e2e_test_stop:
cd $(E2E_TEST_DIR) && docker-compose down

.PHONY: e2e_test_logs_all
e2e_test_logs_all:
cd $(E2E_TEST_DIR) && docker-compose logs

.PHONY: e2e_test_logs_feeds
e2e_test_logs_feeds:
cd $(E2E_TEST_DIR) && docker-compose logs -f feeds

.PHONY: docker_compose_logs
docker_compose_logs:
cd ./examples/e2e && docker-compose logs
.PHONY: e2e_test_logs_scheduler
e2e_test_logs_scheduler:
cd $(E2E_TEST_DIR) && docker-compose logs -f scheduler

.PHONY: docker_compose_stop
docker_compose_stop:
cd ./examples/e2e && docker-compose down
.PHONY: e2e_test_logs_analysis:
e2e_test_logs_analysis:
cd $(E2E_TEST_DIR) && docker-compose logs -f analysis


.PHONY: test
Expand Down
91 changes: 91 additions & 0 deletions test/e2e/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# End to End Testing with Package-Feeds integration

This directory helps run end-to-end tests of the package analysis system
to ensure everything is working properly.
In particular, local changes to both the worker/analysis and sandbox images can be tested
before they are pushed to the docker registry.

The test is orchestrated using docker-compose, using a setup adapted from the `examples/e2e` directory.
However all the necessary commands can be run easily using the project Makefile.

## Running

### Starting the test

In the top-level project directory, run

```shell
$ make RELEASE_TAG=test docker_build_all # rebuild images with 'test' tag
$ make e2e_test_start

```

### Stopping the test

In the top-level project directory, run

```shell
$ make e2e_test_stop
```

## Analysis Output

Output can be found at http://localhost:9000/minio/package-analysis,
using the following credentials for authentication:

- username: `minio`
- password: `minio123`

## Logs Access

In the top-level project directory, run

`make e2e_test_logs_feeds` to see information on the packages which have been send downstream.

`make e2e_test_logs_scheduler` to see information on the packages which have been received and proxied onto the analysis workers.

`make e2e_tests_logs_analysis` to see analysis stdout (too much to be useful); better to check minio output as described above.

## PubSub (Kafka) Inspection

Output from the Kafka PubSub topics can be inspected using
[KafkaCat](https://github.com/edenhill/kcat).

1. Install `kafkacat` or `kcat` (e.g. `sudo apt install kafkacat`)
2. Run `kafkacat` to observe the topics:
- package-feeds: `kafkacat -C -J -b localhost:9094 -t package-feeds`
- workers: `kafkacat -C -J -b localhost:9094 -t workers`
- notifications: `kafkacat -C -J -b localhost:9094 -t notifications`

## Troubleshooting

### Feeds does not start (missing config)

This can happen if `./config` is not world-readable. You will see the error message `open /config/feeds.yml: permission denied` in the feeds logs.

To fix simply run:

```shell
$ chmod ugo+rx ./config
$ chmod ugo+r ./config/feeds.yml
```

### Sandbox container is not starting (cgroups v2)

If the `analysis` logs show failures when trying to start the sandbox container, your machine may need to be configured to use cgroups v2.

To work with cgroups v2 you will need to:

1. add/edit `/etc/docker/daemon.json` and the following:

```json
{
"default-cgroupns-mode": "host"
oliverchang marked this conversation as resolved.
Show resolved Hide resolved
}
```

2. restart dockerd (if it is running). e.g.:

```shell
$ systemctl restart docker.service
```
13 changes: 13 additions & 0 deletions test/e2e/config/feeds.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
feeds:
- type: pypi
- type: rubygems
- type: packagist
- type: npm
- type: crates
publisher:
type: kafka
config:
brokers: ["kafka:9092"]
topic: "package-feeds"

poll_rate: "10h"
84 changes: 84 additions & 0 deletions test/e2e/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
version: "3"
services:
zookeeper:
image: confluentinc/cp-zookeeper@sha256:87314e87320abf190f0407bf1689f4827661fbb4d671a41cba62673b45b66bfa
ports:
- "2181:2181"
environment:
ZOOKEEPER_CLIENT_PORT: 2181
ZOOKEEPER_TICK_TIME: 2000
ZOOKEEPER_SYNC_LIMIT: 2

kafka:
image: confluentinc/cp-kafka@sha256:c6320f9a0cbf57075e102546de110dcebdf374955f12388d58c23a54b8a47d31
ports:
- 9094:9094
depends_on:
- zookeeper
environment:
KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
KAFKA_LISTENERS: INTERNAL://kafka:9092,OUTSIDE://kafka:9094
KAFKA_ADVERTISED_LISTENERS: INTERNAL://kafka:9092,OUTSIDE://localhost:9094
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: INTERNAL:PLAINTEXT,OUTSIDE:PLAINTEXT
KAFKA_INTER_BROKER_LISTENER_NAME: INTERNAL
KAFKA_offsets_topic_replication_factor: 1

minio:
image: minio/minio@sha256:684ce208c005fe032659ec77bafa6a17a16c41686c334618dec924b3505e7090
hostname: minio
ports:
- 9000:9000
environment:
MINIO_ROOT_USER: minio
MINIO_ROOT_PASSWORD: minio123
MINIO_REGION_NAME: dummy_region
entrypoint: sh
command: -c 'mkdir -p /data/package-analysis{,-static,-file-writes} && /usr/bin/minio server /data'
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
interval: 30s
timeout: 20s
retries: 3

analysis:
image: gcr.io/ossf-malware-analysis/analysis:test
privileged: true
entrypoint: "/usr/local/bin/worker"
environment:
OSSMALWARE_WORKER_SUBSCRIPTION: kafka://worker?topic=workers
OSSF_MALWARE_NOTIFICATION_TOPIC: kafka://notifications
OSSF_MALWARE_ANALYSIS_RESULTS: s3://package-analysis?endpoint=minio:9000&disableSSL=true&s3ForcePathStyle=true
OSSF_MALWARE_STATIC_ANALYSIS_RESULTS: s3://package-analysis-static?endpoint=minio:9000&disableSSL=true&s3ForcePathStyle=true
OSSF_MALWARE_ANALYSIS_FILE_WRITE_RESULTS: s3://package-analysis-file-writes?endpoint=minio:9000&disableSSL=true&s3ForcePathStyle=true
OSSF_SANDBOX_NOPULL: "true"
maxfisher-g marked this conversation as resolved.
Show resolved Hide resolved
KAFKA_BROKERS: kafka:9092
AWS_ACCESS_KEY_ID: minio
AWS_SECRET_ACCESS_KEY: minio123
AWS_REGION: dummy_region
depends_on:
- kafka
# for mounting local sandbox images inside container
volumes:
- "/var/lib/containers:/var/lib/containers"

scheduler:
image: gcr.io/ossf-malware-analysis/scheduler:test
entrypoint: "/usr/local/bin/scheduler"
depends_on:
- kafka
environment:
OSSMALWARE_WORKER_TOPIC: kafka://workers
OSSMALWARE_SUBSCRIPTION_URL: kafka://worker?topic=package-feeds
KAFKA_BROKERS: kafka:9092

feeds:
restart: "on-failure"
image: gcr.io/ossf-malware-analysis/scheduled-feeds:latest
ports:
- 8080:8080
depends_on:
- kafka
environment:
PACKAGE_FEEDS_CONFIG_PATH: /config/feeds.yml
volumes:
- "./config/:/config/"