Skip to content

Commit

Permalink
feat(pymongo): add PyMongo integration (#1590)
Browse files Browse the repository at this point in the history
* feat(pymongo): add PyMongo integration

Adds breadcrumbs and performance traces for PyMongo queries using an
official monitoring API. Integration is similar to the one available in
OpenTelemetry, tags set to values recommended for attributes by OT as
specified in `Span Operations` guidelines.

Personal identifiable information (PII) will be stripped from all PyMongo commands. (This was tested in the PyMongo versions below, but "should" also be future proof)

PyMongo version selection explanation:
* 3.1 - introduction of monitoring API. Only Python 2.7 and 3.6
supported.
* 3.12 - latest 3.x release, support for 2.7, 3.6-3.9 (3.7-3.9 added in
various minor releases between 3.1 and 3.12).
* 4.0 - no support for 2.7, added support for 3.10.
* 4.1 - no support for 3.6.0-3.6.1.
* 4.2 - no support for any 3.6.

Co-authored-by: Szymon Soloch <ssoloch@opera.com>
Co-authored-by: Anton Pirker <anton.pirker@sentry.io>
  • Loading branch information
3 people committed Nov 4, 2022
1 parent d196a43 commit fa1b964
Show file tree
Hide file tree
Showing 7 changed files with 683 additions and 0 deletions.
62 changes: 62 additions & 0 deletions .github/workflows/test-integration-pymongo.yml
@@ -0,0 +1,62 @@
name: Test pymongo

on:
push:
branches:
- master
- release/**

pull_request:

# Cancel in progress workflows on pull_requests.
# https://docs.github.com/en/actions/using-jobs/using-concurrency#example-using-a-fallback-value
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true

permissions:
contents: read

env:
BUILD_CACHE_KEY: ${{ github.sha }}
CACHED_BUILD_PATHS: |
${{ github.workspace }}/dist-serverless
jobs:
test:
name: pymongo, python ${{ matrix.python-version }}, ${{ matrix.os }}
runs-on: ${{ matrix.os }}
timeout-minutes: 45
continue-on-error: true

strategy:
matrix:
python-version: ["2.7","3.6","3.7","3.8","3.9","3.10"]
os: [ubuntu-latest]

steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}

- name: Setup Test Env
env:
PGHOST: localhost
PGPASSWORD: sentry
run: |
pip install codecov tox
- name: Test pymongo
env:
CI_PYTHON_VERSION: ${{ matrix.python-version }}
timeout-minutes: 45
shell: bash
run: |
set -x # print commands that are executed
coverage erase
./scripts/runtox.sh "${{ matrix.python-version }}-pymongo" --cov=tests --cov=sentry_sdk --cov-report= --cov-branch
coverage combine .coverage*
coverage xml -i
codecov --file coverage.xml
1 change: 1 addition & 0 deletions linter-requirements.txt
Expand Up @@ -4,6 +4,7 @@ flake8==5.0.4
types-certifi
types-redis
types-setuptools
pymongo # There is no separate types module.
flake8-bugbear==22.9.23
pep8-naming==0.13.2
pre-commit # local linting
183 changes: 183 additions & 0 deletions sentry_sdk/integrations/pymongo.py
@@ -0,0 +1,183 @@
from __future__ import absolute_import
import copy

from sentry_sdk import Hub
from sentry_sdk.hub import _should_send_default_pii
from sentry_sdk.integrations import DidNotEnable, Integration
from sentry_sdk.tracing import Span
from sentry_sdk.utils import capture_internal_exceptions

from sentry_sdk._types import MYPY

try:
from pymongo import monitoring
except ImportError:
raise DidNotEnable("Pymongo not installed")

if MYPY:
from typing import Any, Dict, Union

from pymongo.monitoring import (
CommandFailedEvent,
CommandStartedEvent,
CommandSucceededEvent,
)


SAFE_COMMAND_ATTRIBUTES = [
"insert",
"ordered",
"find",
"limit",
"singleBatch",
"aggregate",
"createIndexes",
"indexes",
"delete",
"findAndModify",
"renameCollection",
"to",
"drop",
]


def _strip_pii(command):
# type: (Dict[str, Any]) -> Dict[str, Any]
for key in command:
is_safe_field = key in SAFE_COMMAND_ATTRIBUTES
if is_safe_field:
# Skip if safe key
continue

update_db_command = key == "update" and "findAndModify" not in command
if update_db_command:
# Also skip "update" db command because it is save.
# There is also an "update" key in the "findAndModify" command, which is NOT safe!
continue

# Special stripping for documents
is_document = key == "documents"
if is_document:
for doc in command[key]:
for doc_key in doc:
doc[doc_key] = "%s"
continue

# Special stripping for dict style fields
is_dict_field = key in ["filter", "query", "update"]
if is_dict_field:
for item_key in command[key]:
command[key][item_key] = "%s"
continue

# For pipeline fields strip the `$match` dict
is_pipeline_field = key == "pipeline"
if is_pipeline_field:
for pipeline in command[key]:
for match_key in pipeline["$match"] if "$match" in pipeline else []:
pipeline["$match"][match_key] = "%s"
continue

# Default stripping
command[key] = "%s"

return command


class CommandTracer(monitoring.CommandListener):
def __init__(self):
# type: () -> None
self._ongoing_operations = {} # type: Dict[int, Span]

def _operation_key(self, event):
# type: (Union[CommandFailedEvent, CommandStartedEvent, CommandSucceededEvent]) -> int
return event.request_id

def started(self, event):
# type: (CommandStartedEvent) -> None
hub = Hub.current
if hub.get_integration(PyMongoIntegration) is None:
return
with capture_internal_exceptions():
command = dict(copy.deepcopy(event.command))

command.pop("$db", None)
command.pop("$clusterTime", None)
command.pop("$signature", None)

op = "db.query"

tags = {
"db.name": event.database_name,
"db.system": "mongodb",
"db.operation": event.command_name,
}

try:
tags["net.peer.name"] = event.connection_id[0]
tags["net.peer.port"] = str(event.connection_id[1])
except TypeError:
pass

data = {"operation_ids": {}} # type: Dict[str, Dict[str, Any]]

data["operation_ids"]["operation"] = event.operation_id
data["operation_ids"]["request"] = event.request_id

try:
lsid = command.pop("lsid")["id"]
data["operation_ids"]["session"] = str(lsid)
except KeyError:
pass

if not _should_send_default_pii():
command = _strip_pii(command)

query = "{} {}".format(event.command_name, command)
span = hub.start_span(op=op, description=query)

for tag, value in tags.items():
span.set_tag(tag, value)

for key, value in data.items():
span.set_data(key, value)

with capture_internal_exceptions():
hub.add_breadcrumb(message=query, category="query", type=op, data=tags)

self._ongoing_operations[self._operation_key(event)] = span.__enter__()

def failed(self, event):
# type: (CommandFailedEvent) -> None
hub = Hub.current
if hub.get_integration(PyMongoIntegration) is None:
return

try:
span = self._ongoing_operations.pop(self._operation_key(event))
span.set_status("internal_error")
span.__exit__(None, None, None)
except KeyError:
return

def succeeded(self, event):
# type: (CommandSucceededEvent) -> None
hub = Hub.current
if hub.get_integration(PyMongoIntegration) is None:
return

try:
span = self._ongoing_operations.pop(self._operation_key(event))
span.set_status("ok")
span.__exit__(None, None, None)
except KeyError:
pass


class PyMongoIntegration(Integration):
identifier = "pymongo"

@staticmethod
def setup_once():
# type: () -> None
monitoring.register(CommandTracer())
1 change: 1 addition & 0 deletions setup.py
Expand Up @@ -62,6 +62,7 @@ def get_file_text(file_name):
"httpx": ["httpx>=0.16.0"],
"starlette": ["starlette>=0.19.1"],
"fastapi": ["fastapi>=0.79.0"],
"pymongo": ["pymongo>=3.1"],
},
classifiers=[
"Development Status :: 5 - Production/Stable",
Expand Down
3 changes: 3 additions & 0 deletions tests/integrations/pymongo/__init__.py
@@ -0,0 +1,3 @@
import pytest

pytest.importorskip("pymongo")

0 comments on commit fa1b964

Please sign in to comment.