Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve typing for Spider.parse(). #6274

Merged
merged 4 commits into from
May 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ jobs:
- python-version: 3.8
env:
TOXENV: typing
- python-version: 3.8
env:
TOXENV: typing-tests
- python-version: "3.11" # Keep in sync with .readthedocs.yml
env:
TOXENV: docs
Expand Down
2 changes: 1 addition & 1 deletion scrapy/commands/bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def start_requests(self) -> Iterable[Request]:
url = f"{self.baseurl}?{urlencode(qargs, doseq=True)}"
return [scrapy.Request(url, dont_filter=True)]

def parse(self, response: Response) -> Any: # type: ignore[override]
def parse(self, response: Response) -> Any:
assert isinstance(Response, TextResponse)
for link in self.link_extractor.extract_links(response):
yield scrapy.Request(link.url, callback=self.parse)
19 changes: 14 additions & 5 deletions scrapy/spiders/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,17 @@
from scrapy.utils.url import url_is_from_spider

if TYPE_CHECKING:
from collections.abc import Callable

# typing.Concatenate requires Python 3.10
# typing.Self requires Python 3.11
from typing_extensions import Self
from typing_extensions import Concatenate, Self

from scrapy.crawler import Crawler
from scrapy.settings import BaseSettings

CallbackT = Callable[Concatenate[Response, ...], Any]
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could be useful in other places as it's a general type for a spider callback. I didn't think about that further though.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually we should use it for Request.callback

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like we can't, probably because of wrong covariance when assigning a specific callback to a request, I need to investigate this separately.



class Spider(object_ref):
"""Base class for scrapy spiders. All spiders must inherit from this
Expand Down Expand Up @@ -79,10 +84,14 @@ def start_requests(self) -> Iterable[Request]:
def _parse(self, response: Response, **kwargs: Any) -> Any:
return self.parse(response, **kwargs)

def parse(self, response: Response, **kwargs: Any) -> Any:
raise NotImplementedError(
f"{self.__class__.__name__}.parse callback is not defined"
)
if TYPE_CHECKING:
parse: CallbackT
else:

def parse(self, response: Response, **kwargs: Any) -> Any:
raise NotImplementedError(
f"{self.__class__.__name__}.parse callback is not defined"
)

@classmethod
def update_settings(cls, settings: BaseSettings) -> None:
Expand Down
68 changes: 68 additions & 0 deletions tests_typing/test_spiders.mypy-testing
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from typing import Any

import pytest

from scrapy.http import HtmlResponse, Response
from scrapy.spiders import Spider


class SimpleSpider(Spider):
pass


class SameOverrideSpider(Spider):
def parse(self, response: Response, **kwargs: Any) -> Any:
pass


class NoKwargsSpider(Spider):
def parse(self, response: Response) -> Any:
pass


class SpecificKwargsSpider(Spider):
def parse(self, response: Response, page: int) -> Any:
pass


class NarrowOverrideSpider(Spider):
# without type: ignore this produces several note lines in addition to an error line,
# which is unsupported by pytest-mypy-testing
def parse(self, response: HtmlResponse, **kwargs: Any) -> Any: # type: ignore[override]
pass


@pytest.mark.mypy_testing
def test_spider_parse() -> None:
spider = Spider()
reveal_type(spider.parse) # R: def (scrapy.http.response.Response, *Any, **Any) -> Any


@pytest.mark.mypy_testing
def test_spider_parse_override_simple() -> None:
spider = SimpleSpider()
reveal_type(spider.parse) # R: def (scrapy.http.response.Response, *Any, **Any) -> Any


@pytest.mark.mypy_testing
def test_spider_parse_override_same() -> None:
spider = SameOverrideSpider()
reveal_type(spider.parse) # R: def (response: scrapy.http.response.Response, **kwargs: Any) -> Any


@pytest.mark.mypy_testing
def test_spider_parse_override_no_kwargs() -> None:
spider = NoKwargsSpider()
reveal_type(spider.parse) # R: def (response: scrapy.http.response.Response) -> Any


@pytest.mark.mypy_testing
def test_spider_parse_override_specific_kwargs() -> None:
spider = SpecificKwargsSpider()
reveal_type(spider.parse) # R: def (response: scrapy.http.response.Response, page: builtins.int) -> Any


@pytest.mark.mypy_testing
def test_spider_parse_override_narrow() -> None:
spider = NarrowOverrideSpider()
reveal_type(spider.parse) # R: def (response: scrapy.http.response.html.HtmlResponse, **kwargs: Any) -> Any
9 changes: 9 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,15 @@ deps =
commands =
mypy {posargs: scrapy tests}

[testenv:typing-tests]
basepython = python3.8
deps =
{[test-requirements]deps}
{[testenv:typing]deps}
pytest-mypy-testing==0.1.3
commands =
pytest {posargs: tests_typing}

[testenv:pre-commit]
basepython = python3
deps =
Expand Down