Skip to content

Commit

Permalink
Add SpiderLoggerAdapter, change Spider.logger to return SpiderLoggerA…
Browse files Browse the repository at this point in the history
…dapter (#6324)
  • Loading branch information
bloodforcream authored and Gallaecio committed May 13, 2024
1 parent 7ad9eb6 commit fa4c42d
Show file tree
Hide file tree
Showing 4 changed files with 237 additions and 3 deletions.
7 changes: 5 additions & 2 deletions scrapy/spiders/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

from scrapy.crawler import Crawler
from scrapy.settings import BaseSettings
from scrapy.utils.log import SpiderLoggerAdapter


class Spider(object_ref):
Expand All @@ -41,9 +42,11 @@ def __init__(self, name: Optional[str] = None, **kwargs: Any):
self.start_urls: List[str] = []

@property
def logger(self) -> logging.LoggerAdapter:
def logger(self) -> SpiderLoggerAdapter:
from scrapy.utils.log import SpiderLoggerAdapter

logger = logging.getLogger(self.name)
return logging.LoggerAdapter(logger, {"spider": self})
return SpiderLoggerAdapter(logger, {"spider": self})

def log(self, message: Any, level: int = logging.DEBUG, **kw: Any) -> None:
"""Log the given message at the given log level
Expand Down
25 changes: 24 additions & 1 deletion scrapy/utils/log.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,17 @@
import warnings
from logging.config import dictConfig
from types import TracebackType
from typing import TYPE_CHECKING, Any, List, Optional, Tuple, Type, Union, cast
from typing import (
TYPE_CHECKING,
Any,
List,
MutableMapping,
Optional,
Tuple,
Type,
Union,
cast,
)

from twisted.python import log as twisted_log
from twisted.python.failure import Failure
Expand Down Expand Up @@ -249,3 +259,16 @@ def logformatter_adapter(logkws: dict) -> Tuple[int, str, dict]:
args = logkws if not logkws.get("args") else logkws["args"]

return (level, message, args)


class SpiderLoggerAdapter(logging.LoggerAdapter):
def process(
self, msg: str, kwargs: MutableMapping[str, Any]
) -> Tuple[str, MutableMapping[str, Any]]:
"""Method that augments logging with additional 'extra' data"""
if isinstance(kwargs.get("extra"), MutableMapping):
kwargs["extra"].update(self.extra)
else:
kwargs["extra"] = self.extra

return msg, kwargs
23 changes: 23 additions & 0 deletions tests/spiders.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""
import asyncio
import time
from typing import Optional
from urllib.parse import urlencode

from twisted.internet import defer
Expand Down Expand Up @@ -77,6 +78,28 @@ def errback(self, failure):
self.t2_err = time.time()


class LogSpider(MetaSpider):
name = "log_spider"

def log_debug(self, message: str, extra: Optional[dict] = None):
self.logger.debug(message, extra=extra)

def log_info(self, message: str, extra: Optional[dict] = None):
self.logger.info(message, extra=extra)

def log_warning(self, message: str, extra: Optional[dict] = None):
self.logger.warning(message, extra=extra)

def log_error(self, message: str, extra: Optional[dict] = None):
self.logger.error(message, extra=extra)

def log_critical(self, message: str, extra: Optional[dict] = None):
self.logger.critical(message, extra=extra)

def parse(self, response):
pass


class SlowSpider(DelaySpider):
name = "slow"

Expand Down
185 changes: 185 additions & 0 deletions tests/test_utils_log.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,26 @@
import json
import logging
import re
import sys
import unittest
from io import StringIO
from typing import Any, Dict, Mapping, MutableMapping
from unittest import TestCase

import pytest
from testfixtures import LogCapture
from twisted.python.failure import Failure

from scrapy.extensions import telnet
from scrapy.utils.log import (
LogCounterHandler,
SpiderLoggerAdapter,
StreamLogger,
TopLevelFormatter,
failure_to_exc_info,
)
from scrapy.utils.test import get_crawler
from tests.spiders import LogSpider


class FailureToExcInfoTest(unittest.TestCase):
Expand Down Expand Up @@ -106,3 +114,180 @@ def test_redirect(self):
with LogCapture() as log:
print("test log msg")
log.check(("test", "ERROR", "test log msg"))


@pytest.mark.parametrize(
("base_extra", "log_extra", "expected_extra"),
(
(
{"spider": "test"},
{"extra": {"log_extra": "info"}},
{"extra": {"log_extra": "info", "spider": "test"}},
),
(
{"spider": "test"},
{"extra": None},
{"extra": {"spider": "test"}},
),
(
{"spider": "test"},
{"extra": {"spider": "test2"}},
{"extra": {"spider": "test"}},
),
),
)
def test_spider_logger_adapter_process(
base_extra: Mapping[str, Any], log_extra: MutableMapping, expected_extra: Dict
):
logger = logging.getLogger("test")
spider_logger_adapter = SpiderLoggerAdapter(logger, base_extra)

log_message = "test_log_message"
result_message, result_kwargs = spider_logger_adapter.process(
log_message, log_extra
)

assert result_message == log_message
assert result_kwargs == expected_extra


class LoggingTestCase(TestCase):
def setUp(self):
self.log_stream = StringIO()
handler = logging.StreamHandler(self.log_stream)
logger = logging.getLogger("log_spider")
logger.addHandler(handler)
logger.setLevel(logging.DEBUG)
self.handler = handler
self.logger = logger
self.spider = LogSpider()

def tearDown(self):
self.logger.removeHandler(self.handler)

def test_debug_logging(self):
log_message = "Foo message"
self.spider.log_debug(log_message)
log_contents = self.log_stream.getvalue()

assert log_contents == f"{log_message}\n"

def test_info_logging(self):
log_message = "Bar message"
self.spider.log_info(log_message)
log_contents = self.log_stream.getvalue()

assert log_contents == f"{log_message}\n"

def test_warning_logging(self):
log_message = "Baz message"
self.spider.log_warning(log_message)
log_contents = self.log_stream.getvalue()

assert log_contents == f"{log_message}\n"

def test_error_logging(self):
log_message = "Foo bar message"
self.spider.log_error(log_message)
log_contents = self.log_stream.getvalue()

assert log_contents == f"{log_message}\n"

def test_critical_logging(self):
log_message = "Foo bar baz message"
self.spider.log_critical(log_message)
log_contents = self.log_stream.getvalue()

assert log_contents == f"{log_message}\n"


class LoggingWithExtraTestCase(TestCase):
def setUp(self):
self.log_stream = StringIO()
handler = logging.StreamHandler(self.log_stream)
formatter = logging.Formatter(
'{"levelname": "%(levelname)s", "message": "%(message)s", "spider": "%(spider)s", "important_info": "%(important_info)s"}'
)
handler.setFormatter(formatter)
logger = logging.getLogger("log_spider")
logger.addHandler(handler)
logger.setLevel(logging.DEBUG)
self.handler = handler
self.logger = logger
self.spider = LogSpider()
self.regex_pattern = re.compile(r"^<LogSpider\s'log_spider'\sat\s[^>]+>$")

def tearDown(self):
self.logger.removeHandler(self.handler)

def test_debug_logging(self):
log_message = "Foo message"
extra = {"important_info": "foo"}
self.spider.log_debug(log_message, extra)
log_contents = self.log_stream.getvalue()
log_contents = json.loads(log_contents)

assert log_contents["levelname"] == "DEBUG"
assert log_contents["message"] == log_message
assert self.regex_pattern.match(log_contents["spider"])
assert log_contents["important_info"] == extra["important_info"]

def test_info_logging(self):
log_message = "Bar message"
extra = {"important_info": "bar"}
self.spider.log_info(log_message, extra)
log_contents = self.log_stream.getvalue()
log_contents = json.loads(log_contents)

assert log_contents["levelname"] == "INFO"
assert log_contents["message"] == log_message
assert self.regex_pattern.match(log_contents["spider"])
assert log_contents["important_info"] == extra["important_info"]

def test_warning_logging(self):
log_message = "Baz message"
extra = {"important_info": "baz"}
self.spider.log_warning(log_message, extra)
log_contents = self.log_stream.getvalue()
log_contents = json.loads(log_contents)

assert log_contents["levelname"] == "WARNING"
assert log_contents["message"] == log_message
assert self.regex_pattern.match(log_contents["spider"])
assert log_contents["important_info"] == extra["important_info"]

def test_error_logging(self):
log_message = "Foo bar message"
extra = {"important_info": "foo bar"}
self.spider.log_error(log_message, extra)
log_contents = self.log_stream.getvalue()
log_contents = json.loads(log_contents)

assert log_contents["levelname"] == "ERROR"
assert log_contents["message"] == log_message
assert self.regex_pattern.match(log_contents["spider"])
assert log_contents["important_info"] == extra["important_info"]

def test_critical_logging(self):
log_message = "Foo bar baz message"
extra = {"important_info": "foo bar baz"}
self.spider.log_critical(log_message, extra)
log_contents = self.log_stream.getvalue()
log_contents = json.loads(log_contents)

assert log_contents["levelname"] == "CRITICAL"
assert log_contents["message"] == log_message
assert self.regex_pattern.match(log_contents["spider"])
assert log_contents["important_info"] == extra["important_info"]

def test_overwrite_spider_extra(self):
log_message = "Foo message"
extra = {"important_info": "foo", "spider": "shouldn't change"}
self.spider.log_error(log_message, extra)
log_contents = self.log_stream.getvalue()
log_contents = json.loads(log_contents)

assert log_contents["levelname"] == "ERROR"
assert log_contents["message"] == log_message
assert self.regex_pattern.match(log_contents["spider"])
assert log_contents["important_info"] == extra["important_info"]

0 comments on commit fa4c42d

Please sign in to comment.