From fa1b964ec1bba362c78c2d2f9a7d158a65d6259a Mon Sep 17 00:00:00 2001 From: Agalin <6164461+Agalin@users.noreply.github.com> Date: Fri, 4 Nov 2022 11:04:15 +0100 Subject: [PATCH] feat(pymongo): add PyMongo integration (#1590) * feat(pymongo): add PyMongo integration Adds breadcrumbs and performance traces for PyMongo queries using an official monitoring API. Integration is similar to the one available in OpenTelemetry, tags set to values recommended for attributes by OT as specified in `Span Operations` guidelines. Personal identifiable information (PII) will be stripped from all PyMongo commands. (This was tested in the PyMongo versions below, but "should" also be future proof) PyMongo version selection explanation: * 3.1 - introduction of monitoring API. Only Python 2.7 and 3.6 supported. * 3.12 - latest 3.x release, support for 2.7, 3.6-3.9 (3.7-3.9 added in various minor releases between 3.1 and 3.12). * 4.0 - no support for 2.7, added support for 3.10. * 4.1 - no support for 3.6.0-3.6.1. * 4.2 - no support for any 3.6. Co-authored-by: Szymon Soloch Co-authored-by: Anton Pirker --- .../workflows/test-integration-pymongo.yml | 62 +++ linter-requirements.txt | 1 + sentry_sdk/integrations/pymongo.py | 183 ++++++++ setup.py | 1 + tests/integrations/pymongo/__init__.py | 3 + tests/integrations/pymongo/test_pymongo.py | 419 ++++++++++++++++++ tox.ini | 14 + 7 files changed, 683 insertions(+) create mode 100644 .github/workflows/test-integration-pymongo.yml create mode 100644 sentry_sdk/integrations/pymongo.py create mode 100644 tests/integrations/pymongo/__init__.py create mode 100644 tests/integrations/pymongo/test_pymongo.py diff --git a/.github/workflows/test-integration-pymongo.yml b/.github/workflows/test-integration-pymongo.yml new file mode 100644 index 0000000000..b2e82b7fb3 --- /dev/null +++ b/.github/workflows/test-integration-pymongo.yml @@ -0,0 +1,62 @@ +name: Test pymongo + +on: + push: + branches: + - master + - release/** + + pull_request: + +# Cancel in progress workflows on pull_requests. +# https://docs.github.com/en/actions/using-jobs/using-concurrency#example-using-a-fallback-value +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +permissions: + contents: read + +env: + BUILD_CACHE_KEY: ${{ github.sha }} + CACHED_BUILD_PATHS: | + ${{ github.workspace }}/dist-serverless + +jobs: + test: + name: pymongo, python ${{ matrix.python-version }}, ${{ matrix.os }} + runs-on: ${{ matrix.os }} + timeout-minutes: 45 + continue-on-error: true + + strategy: + matrix: + python-version: ["2.7","3.6","3.7","3.8","3.9","3.10"] + os: [ubuntu-latest] + + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + - name: Setup Test Env + env: + PGHOST: localhost + PGPASSWORD: sentry + run: | + pip install codecov tox + + - name: Test pymongo + env: + CI_PYTHON_VERSION: ${{ matrix.python-version }} + timeout-minutes: 45 + shell: bash + run: | + set -x # print commands that are executed + coverage erase + + ./scripts/runtox.sh "${{ matrix.python-version }}-pymongo" --cov=tests --cov=sentry_sdk --cov-report= --cov-branch + coverage combine .coverage* + coverage xml -i + codecov --file coverage.xml diff --git a/linter-requirements.txt b/linter-requirements.txt index e8ed3e36df..1b0829ae83 100644 --- a/linter-requirements.txt +++ b/linter-requirements.txt @@ -4,6 +4,7 @@ flake8==5.0.4 types-certifi types-redis types-setuptools +pymongo # There is no separate types module. flake8-bugbear==22.9.23 pep8-naming==0.13.2 pre-commit # local linting diff --git a/sentry_sdk/integrations/pymongo.py b/sentry_sdk/integrations/pymongo.py new file mode 100644 index 0000000000..ca4669ec9e --- /dev/null +++ b/sentry_sdk/integrations/pymongo.py @@ -0,0 +1,183 @@ +from __future__ import absolute_import +import copy + +from sentry_sdk import Hub +from sentry_sdk.hub import _should_send_default_pii +from sentry_sdk.integrations import DidNotEnable, Integration +from sentry_sdk.tracing import Span +from sentry_sdk.utils import capture_internal_exceptions + +from sentry_sdk._types import MYPY + +try: + from pymongo import monitoring +except ImportError: + raise DidNotEnable("Pymongo not installed") + +if MYPY: + from typing import Any, Dict, Union + + from pymongo.monitoring import ( + CommandFailedEvent, + CommandStartedEvent, + CommandSucceededEvent, + ) + + +SAFE_COMMAND_ATTRIBUTES = [ + "insert", + "ordered", + "find", + "limit", + "singleBatch", + "aggregate", + "createIndexes", + "indexes", + "delete", + "findAndModify", + "renameCollection", + "to", + "drop", +] + + +def _strip_pii(command): + # type: (Dict[str, Any]) -> Dict[str, Any] + for key in command: + is_safe_field = key in SAFE_COMMAND_ATTRIBUTES + if is_safe_field: + # Skip if safe key + continue + + update_db_command = key == "update" and "findAndModify" not in command + if update_db_command: + # Also skip "update" db command because it is save. + # There is also an "update" key in the "findAndModify" command, which is NOT safe! + continue + + # Special stripping for documents + is_document = key == "documents" + if is_document: + for doc in command[key]: + for doc_key in doc: + doc[doc_key] = "%s" + continue + + # Special stripping for dict style fields + is_dict_field = key in ["filter", "query", "update"] + if is_dict_field: + for item_key in command[key]: + command[key][item_key] = "%s" + continue + + # For pipeline fields strip the `$match` dict + is_pipeline_field = key == "pipeline" + if is_pipeline_field: + for pipeline in command[key]: + for match_key in pipeline["$match"] if "$match" in pipeline else []: + pipeline["$match"][match_key] = "%s" + continue + + # Default stripping + command[key] = "%s" + + return command + + +class CommandTracer(monitoring.CommandListener): + def __init__(self): + # type: () -> None + self._ongoing_operations = {} # type: Dict[int, Span] + + def _operation_key(self, event): + # type: (Union[CommandFailedEvent, CommandStartedEvent, CommandSucceededEvent]) -> int + return event.request_id + + def started(self, event): + # type: (CommandStartedEvent) -> None + hub = Hub.current + if hub.get_integration(PyMongoIntegration) is None: + return + with capture_internal_exceptions(): + command = dict(copy.deepcopy(event.command)) + + command.pop("$db", None) + command.pop("$clusterTime", None) + command.pop("$signature", None) + + op = "db.query" + + tags = { + "db.name": event.database_name, + "db.system": "mongodb", + "db.operation": event.command_name, + } + + try: + tags["net.peer.name"] = event.connection_id[0] + tags["net.peer.port"] = str(event.connection_id[1]) + except TypeError: + pass + + data = {"operation_ids": {}} # type: Dict[str, Dict[str, Any]] + + data["operation_ids"]["operation"] = event.operation_id + data["operation_ids"]["request"] = event.request_id + + try: + lsid = command.pop("lsid")["id"] + data["operation_ids"]["session"] = str(lsid) + except KeyError: + pass + + if not _should_send_default_pii(): + command = _strip_pii(command) + + query = "{} {}".format(event.command_name, command) + span = hub.start_span(op=op, description=query) + + for tag, value in tags.items(): + span.set_tag(tag, value) + + for key, value in data.items(): + span.set_data(key, value) + + with capture_internal_exceptions(): + hub.add_breadcrumb(message=query, category="query", type=op, data=tags) + + self._ongoing_operations[self._operation_key(event)] = span.__enter__() + + def failed(self, event): + # type: (CommandFailedEvent) -> None + hub = Hub.current + if hub.get_integration(PyMongoIntegration) is None: + return + + try: + span = self._ongoing_operations.pop(self._operation_key(event)) + span.set_status("internal_error") + span.__exit__(None, None, None) + except KeyError: + return + + def succeeded(self, event): + # type: (CommandSucceededEvent) -> None + hub = Hub.current + if hub.get_integration(PyMongoIntegration) is None: + return + + try: + span = self._ongoing_operations.pop(self._operation_key(event)) + span.set_status("ok") + span.__exit__(None, None, None) + except KeyError: + pass + + +class PyMongoIntegration(Integration): + identifier = "pymongo" + + @staticmethod + def setup_once(): + # type: () -> None + monitoring.register(CommandTracer()) diff --git a/setup.py b/setup.py index 40fa607c1f..62f2d10eec 100644 --- a/setup.py +++ b/setup.py @@ -62,6 +62,7 @@ def get_file_text(file_name): "httpx": ["httpx>=0.16.0"], "starlette": ["starlette>=0.19.1"], "fastapi": ["fastapi>=0.79.0"], + "pymongo": ["pymongo>=3.1"], }, classifiers=[ "Development Status :: 5 - Production/Stable", diff --git a/tests/integrations/pymongo/__init__.py b/tests/integrations/pymongo/__init__.py new file mode 100644 index 0000000000..91223b0630 --- /dev/null +++ b/tests/integrations/pymongo/__init__.py @@ -0,0 +1,3 @@ +import pytest + +pytest.importorskip("pymongo") diff --git a/tests/integrations/pymongo/test_pymongo.py b/tests/integrations/pymongo/test_pymongo.py new file mode 100644 index 0000000000..16438ac971 --- /dev/null +++ b/tests/integrations/pymongo/test_pymongo.py @@ -0,0 +1,419 @@ +from sentry_sdk import capture_message, start_transaction +from sentry_sdk.integrations.pymongo import PyMongoIntegration, _strip_pii + +from mockupdb import MockupDB, OpQuery +from pymongo import MongoClient +import pytest + + +@pytest.fixture(scope="session") +def mongo_server(): + server = MockupDB(verbose=True) + server.autoresponds("ismaster", maxWireVersion=6) + server.run() + server.autoresponds( + {"find": "test_collection"}, cursor={"id": 123, "firstBatch": []} + ) + # Find query changed somewhere between PyMongo 3.1 and 3.12. + # This line is to respond to "find" queries sent by old PyMongo the same way it's done above. + server.autoresponds(OpQuery({"foobar": 1}), cursor={"id": 123, "firstBatch": []}) + server.autoresponds({"insert": "test_collection"}, ok=1) + server.autoresponds({"insert": "erroneous"}, ok=0, errmsg="test error") + yield server + server.stop() + + +@pytest.mark.parametrize("with_pii", [False, True]) +def test_transactions(sentry_init, capture_events, mongo_server, with_pii): + sentry_init( + integrations=[PyMongoIntegration()], + traces_sample_rate=1.0, + send_default_pii=with_pii, + ) + events = capture_events() + + connection = MongoClient(mongo_server.uri) + + with start_transaction(): + list( + connection["test_db"]["test_collection"].find({"foobar": 1}) + ) # force query execution + connection["test_db"]["test_collection"].insert_one({"foo": 2}) + try: + connection["test_db"]["erroneous"].insert_many([{"bar": 3}, {"baz": 4}]) + pytest.fail("Request should raise") + except Exception: + pass + + (event,) = events + (find, insert_success, insert_fail) = event["spans"] + + common_tags = { + "db.name": "test_db", + "db.system": "mongodb", + "net.peer.name": mongo_server.host, + "net.peer.port": str(mongo_server.port), + } + for span in find, insert_success, insert_fail: + for field, value in common_tags.items(): + assert span["tags"][field] == value + + assert find["op"] == "db.query" + assert insert_success["op"] == "db.query" + assert insert_fail["op"] == "db.query" + + assert find["tags"]["db.operation"] == "find" + assert insert_success["tags"]["db.operation"] == "insert" + assert insert_fail["tags"]["db.operation"] == "insert" + + assert find["description"].startswith("find {") + assert insert_success["description"].startswith("insert {") + assert insert_fail["description"].startswith("insert {") + if with_pii: + assert "1" in find["description"] + assert "2" in insert_success["description"] + assert "3" in insert_fail["description"] and "4" in insert_fail["description"] + else: + # All values in filter replaced by "%s" + assert "1" not in find["description"] + # All keys below top level replaced by "%s" + assert "2" not in insert_success["description"] + assert ( + "3" not in insert_fail["description"] + and "4" not in insert_fail["description"] + ) + + assert find["tags"]["status"] == "ok" + assert insert_success["tags"]["status"] == "ok" + assert insert_fail["tags"]["status"] == "internal_error" + + +@pytest.mark.parametrize("with_pii", [False, True]) +def test_breadcrumbs(sentry_init, capture_events, mongo_server, with_pii): + sentry_init( + integrations=[PyMongoIntegration()], + traces_sample_rate=1.0, + send_default_pii=with_pii, + ) + events = capture_events() + + connection = MongoClient(mongo_server.uri) + + list( + connection["test_db"]["test_collection"].find({"foobar": 1}) + ) # force query execution + capture_message("hi") + + (event,) = events + (crumb,) = event["breadcrumbs"]["values"] + + assert crumb["category"] == "query" + assert crumb["message"].startswith("find {") + if with_pii: + assert "1" in crumb["message"] + else: + assert "1" not in crumb["message"] + assert crumb["type"] == "db.query" + assert crumb["data"] == { + "db.name": "test_db", + "db.system": "mongodb", + "db.operation": "find", + "net.peer.name": mongo_server.host, + "net.peer.port": str(mongo_server.port), + } + + +@pytest.mark.parametrize( + "testcase", + [ + { + "command": { + "insert": "my_collection", + "ordered": True, + "documents": [ + { + "username": "anton2", + "email": "anton@somewhere.io", + "password": "c4e86722fb56d946f7ddeecdae47e1c4458bf98a0a3ee5d5113111adf7bf0175", + "_id": "635bc7403cb4f8a736f61cf2", + } + ], + }, + "command_stripped": { + "insert": "my_collection", + "ordered": True, + "documents": [ + {"username": "%s", "email": "%s", "password": "%s", "_id": "%s"} + ], + }, + }, + { + "command": { + "insert": "my_collection", + "ordered": True, + "documents": [ + { + "username": "indiana4", + "email": "indy@jones.org", + "password": "63e86722fb56d946f7ddeecdae47e1c4458bf98a0a3ee5d5113111adf7bf016b", + "_id": "635bc7403cb4f8a736f61cf3", + } + ], + }, + "command_stripped": { + "insert": "my_collection", + "ordered": True, + "documents": [ + {"username": "%s", "email": "%s", "password": "%s", "_id": "%s"} + ], + }, + }, + { + "command": { + "find": "my_collection", + "filter": {}, + "limit": 1, + "singleBatch": True, + }, + "command_stripped": { + "find": "my_collection", + "filter": {}, + "limit": 1, + "singleBatch": True, + }, + }, + { + "command": { + "find": "my_collection", + "filter": {"username": "notthere"}, + "limit": 1, + "singleBatch": True, + }, + "command_stripped": { + "find": "my_collection", + "filter": {"username": "%s"}, + "limit": 1, + "singleBatch": True, + }, + }, + { + "command": { + "insert": "my_collection", + "ordered": True, + "documents": [ + { + "username": "userx1", + "email": "x@somewhere.io", + "password": "ccc86722fb56d946f7ddeecdae47e1c4458bf98a0a3ee5d5113111adf7bf0175", + "_id": "635bc7403cb4f8a736f61cf4", + }, + { + "username": "userx2", + "email": "x@somewhere.io", + "password": "xxx86722fb56d946f7ddeecdae47e1c4458bf98a0a3ee5d5113111adf7bf0175", + "_id": "635bc7403cb4f8a736f61cf5", + }, + ], + }, + "command_stripped": { + "insert": "my_collection", + "ordered": True, + "documents": [ + {"username": "%s", "email": "%s", "password": "%s", "_id": "%s"}, + {"username": "%s", "email": "%s", "password": "%s", "_id": "%s"}, + ], + }, + }, + { + "command": { + "find": "my_collection", + "filter": {"email": "ada@lovelace.com"}, + }, + "command_stripped": {"find": "my_collection", "filter": {"email": "%s"}}, + }, + { + "command": { + "aggregate": "my_collection", + "pipeline": [{"$match": {}}, {"$group": {"_id": 1, "n": {"$sum": 1}}}], + "cursor": {}, + }, + "command_stripped": { + "aggregate": "my_collection", + "pipeline": [{"$match": {}}, {"$group": {"_id": 1, "n": {"$sum": 1}}}], + "cursor": "%s", + }, + }, + { + "command": { + "aggregate": "my_collection", + "pipeline": [ + {"$match": {"email": "x@somewhere.io"}}, + {"$group": {"_id": 1, "n": {"$sum": 1}}}, + ], + "cursor": {}, + }, + "command_stripped": { + "aggregate": "my_collection", + "pipeline": [ + {"$match": {"email": "%s"}}, + {"$group": {"_id": 1, "n": {"$sum": 1}}}, + ], + "cursor": "%s", + }, + }, + { + "command": { + "createIndexes": "my_collection", + "indexes": [{"name": "username_1", "key": [("username", 1)]}], + }, + "command_stripped": { + "createIndexes": "my_collection", + "indexes": [{"name": "username_1", "key": [("username", 1)]}], + }, + }, + { + "command": { + "update": "my_collection", + "ordered": True, + "updates": [ + ("q", {"email": "anton@somewhere.io"}), + ( + "u", + { + "email": "anton2@somwehre.io", + "extra_field": "extra_content", + "new": "bla", + }, + ), + ("multi", False), + ("upsert", False), + ], + }, + "command_stripped": { + "update": "my_collection", + "ordered": True, + "updates": "%s", + }, + }, + { + "command": { + "update": "my_collection", + "ordered": True, + "updates": [ + ("q", {"email": "anton2@somwehre.io"}), + ("u", {"$rename": {"new": "new_field"}}), + ("multi", False), + ("upsert", False), + ], + }, + "command_stripped": { + "update": "my_collection", + "ordered": True, + "updates": "%s", + }, + }, + { + "command": { + "update": "my_collection", + "ordered": True, + "updates": [ + ("q", {"email": "x@somewhere.io"}), + ("u", {"$rename": {"password": "pwd"}}), + ("multi", True), + ("upsert", False), + ], + }, + "command_stripped": { + "update": "my_collection", + "ordered": True, + "updates": "%s", + }, + }, + { + "command": { + "delete": "my_collection", + "ordered": True, + "deletes": [("q", {"username": "userx2"}), ("limit", 1)], + }, + "command_stripped": { + "delete": "my_collection", + "ordered": True, + "deletes": "%s", + }, + }, + { + "command": { + "delete": "my_collection", + "ordered": True, + "deletes": [("q", {"email": "xplus@somewhere.io"}), ("limit", 0)], + }, + "command_stripped": { + "delete": "my_collection", + "ordered": True, + "deletes": "%s", + }, + }, + { + "command": { + "findAndModify": "my_collection", + "query": {"email": "ada@lovelace.com"}, + "new": False, + "remove": True, + }, + "command_stripped": { + "findAndModify": "my_collection", + "query": {"email": "%s"}, + "new": "%s", + "remove": "%s", + }, + }, + { + "command": { + "findAndModify": "my_collection", + "query": {"email": "anton2@somewhere.io"}, + "new": False, + "update": {"email": "anton3@somwehre.io", "extra_field": "xxx"}, + "upsert": False, + }, + "command_stripped": { + "findAndModify": "my_collection", + "query": {"email": "%s"}, + "new": "%s", + "update": {"email": "%s", "extra_field": "%s"}, + "upsert": "%s", + }, + }, + { + "command": { + "findAndModify": "my_collection", + "query": {"email": "anton3@somewhere.io"}, + "new": False, + "update": {"$rename": {"extra_field": "extra_field2"}}, + "upsert": False, + }, + "command_stripped": { + "findAndModify": "my_collection", + "query": {"email": "%s"}, + "new": "%s", + "update": {"$rename": "%s"}, + "upsert": "%s", + }, + }, + { + "command": { + "renameCollection": "test.my_collection", + "to": "test.new_collection", + }, + "command_stripped": { + "renameCollection": "test.my_collection", + "to": "test.new_collection", + }, + }, + { + "command": {"drop": "new_collection"}, + "command_stripped": {"drop": "new_collection"}, + }, + ], +) +def test_strip_pii(testcase): + assert _strip_pii(testcase["command"]) == testcase["command_stripped"] diff --git a/tox.ini b/tox.ini index 8b19296671..2067ff8916 100644 --- a/tox.ini +++ b/tox.ini @@ -96,6 +96,11 @@ envlist = {py3.6,py3.7,py3.8,py3.9,py3.10}-httpx-{0.16,0.17} + {py2.7,py3.6}-pymongo-{3.1} + {py2.7,py3.6,py3.7,py3.8,py3.9}-pymongo-{3.12} + {py3.6,py3.7,py3.8,py3.9,py3.10}-pymongo-{4.0} + {py3.7,py3.8,py3.9,py3.10}-pymongo-{4.1,4.2} + [testenv] deps = # if you change test-requirements.txt and your change is not being reflected @@ -280,6 +285,13 @@ deps = httpx-0.16: httpx>=0.16,<0.17 httpx-0.17: httpx>=0.17,<0.18 + pymongo: mockupdb + pymongo-3.1: pymongo>=3.1,<3.2 + pymongo-3.12: pymongo>=3.12,<4.0 + pymongo-4.0: pymongo>=4.0,<4.1 + pymongo-4.1: pymongo>=4.1,<4.2 + pymongo-4.2: pymongo>=4.2,<4.3 + setenv = PYTHONDONTWRITEBYTECODE=1 TESTPATH=tests @@ -309,6 +321,7 @@ setenv = chalice: TESTPATH=tests/integrations/chalice boto3: TESTPATH=tests/integrations/boto3 httpx: TESTPATH=tests/integrations/httpx + pymongo: TESTPATH=tests/integrations/pymongo COVERAGE_FILE=.coverage-{envname} passenv = @@ -324,6 +337,7 @@ extras = bottle: bottle falcon: falcon quart: quart + pymongo: pymongo basepython = py2.7: python2.7