Skip to content

Commit

Permalink
Use ParamSpec for callables. (#6353)
Browse files Browse the repository at this point in the history
  • Loading branch information
wRAR committed May 13, 2024
1 parent 93f0628 commit 4ed5c5a
Show file tree
Hide file tree
Showing 13 changed files with 204 additions and 101 deletions.
15 changes: 13 additions & 2 deletions scrapy/cmdline.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from __future__ import annotations

import argparse
import cProfile
import inspect
import os
import sys
from importlib.metadata import entry_points
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Type
from typing import TYPE_CHECKING, Callable, Dict, Iterable, List, Optional, Tuple, Type

import scrapy
from scrapy.commands import BaseRunSpiderCommand, ScrapyCommand, ScrapyHelpFormatter
Expand All @@ -15,6 +17,12 @@
from scrapy.utils.project import get_project_settings, inside_project
from scrapy.utils.python import garbage_collect

if TYPE_CHECKING:
# typing.ParamSpec requires Python 3.10
from typing_extensions import ParamSpec

_P = ParamSpec("_P")


class ScrapyArgumentParser(argparse.ArgumentParser):
def _parse_optional(
Expand Down Expand Up @@ -121,7 +129,10 @@ def _print_unknown_command(


def _run_print_help(
parser: argparse.ArgumentParser, func: Callable, *a: Any, **kw: Any
parser: argparse.ArgumentParser,
func: Callable[_P, None],
*a: _P.args,
**kw: _P.kwargs,
) -> None:
try:
func(*a, **kw)
Expand Down
10 changes: 4 additions & 6 deletions scrapy/core/downloader/handlers/ftp.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,12 +109,10 @@ def download_request(self, request: Request, spider: Spider) -> Deferred:
def gotClient(self, client: FTPClient, request: Request, filepath: str) -> Deferred:
self.client = client
protocol = ReceivedDataProtocol(request.meta.get("ftp_local_filename"))
return client.retrieveFile(filepath, protocol).addCallbacks(
callback=self._build_response,
callbackArgs=(request, protocol),
errback=self._failed,
errbackArgs=(request,),
)
d = client.retrieveFile(filepath, protocol)
d.addCallback(self._build_response, request, protocol)
d.addErrback(self._failed, request)
return d

def _build_response(
self, result: Any, request: Request, protocol: ReceivedDataProtocol
Expand Down
2 changes: 1 addition & 1 deletion scrapy/core/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ def _on_complete(_: Any) -> Any:

assert self.spider is not None
dwld = self.downloader.fetch(request, self.spider)
dwld.addCallbacks(_on_success)
dwld.addCallback(_on_success)
dwld.addBoth(_on_complete)
return dwld

Expand Down
10 changes: 6 additions & 4 deletions scrapy/core/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from typing import (
TYPE_CHECKING,
Any,
AsyncGenerator,
AsyncIterable,
Deque,
Generator,
Expand All @@ -18,6 +17,7 @@
Tuple,
Type,
Union,
cast,
)

from itemadapter import is_item
Expand Down Expand Up @@ -184,7 +184,9 @@ def _scrape(
result, request, spider
) # returns spider's processed output
dfd.addErrback(self.handle_spider_error, request, result, spider)
dfd.addCallback(self.handle_spider_output, request, result, spider)
dfd.addCallback(
self.handle_spider_output, request, cast(Response, result), spider
)
return dfd

def _scrape2(
Expand Down Expand Up @@ -256,12 +258,12 @@ def handle_spider_output(
self,
result: Union[Iterable, AsyncIterable],
request: Request,
response: Union[Response, Failure],
response: Response,
spider: Spider,
) -> Deferred:
if not result:
return defer_succeed(None)
it: Union[Generator, AsyncGenerator]
it: Union[Iterable, AsyncIterable]
if isinstance(result, AsyncIterable):
it = aiter_errback(
result, self.handle_spider_error, request, response, spider
Expand Down
6 changes: 2 additions & 4 deletions scrapy/core/spidermw.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,10 +303,8 @@ def process_spider_exception(_failure: Failure) -> Union[Failure, MutableChain]:
dfd = mustbe_deferred(
self._process_spider_input, scrape_func, response, request, spider
)
dfd.addCallbacks(
callback=deferred_f_from_coro_f(process_callback_output),
errback=process_spider_exception,
)
dfd.addCallback(deferred_f_from_coro_f(process_callback_output))
dfd.addErrback(process_spider_exception)
return dfd

def process_start_requests(
Expand Down
8 changes: 2 additions & 6 deletions scrapy/mail.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,12 +154,8 @@ def send(
return None

dfd = self._sendmail(rcpts, msg.as_string().encode(charset or "utf-8"))
dfd.addCallbacks(
callback=self._sent_ok,
errback=self._sent_failed,
callbackArgs=(to, cc, subject, len(attachs)),
errbackArgs=(to, cc, subject, len(attachs)),
)
dfd.addCallback(self._sent_ok, to, cc, subject, len(attachs))
dfd.addErrback(self._sent_failed, to, cc, subject, len(attachs))
reactor.addSystemEventTrigger("before", "shutdown", lambda: dfd)
return dfd

Expand Down
3 changes: 2 additions & 1 deletion scrapy/pipelines/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,8 @@ def _onsuccess(result):

path = self.file_path(request, info=info, item=item)
dfd = defer.maybeDeferred(self.store.stat_file, path, info)
dfd.addCallbacks(_onsuccess, lambda _: None)
dfd.addCallback(_onsuccess)
dfd.addErrback(lambda _: None)
dfd.addErrback(
lambda f: logger.error(
self.__class__.__name__ + ".store.stat_file",
Expand Down
27 changes: 11 additions & 16 deletions scrapy/pipelines/media.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,10 +106,17 @@ def _process_request(self, request, info, item):

# Return cached result if request was already seen
if fp in info.downloaded:
return defer_result(info.downloaded[fp]).addCallbacks(cb, eb)
d = defer_result(info.downloaded[fp])
d.addCallback(cb)
if eb:
d.addErrback(eb)
return d

# Otherwise, wait for result
wad = Deferred().addCallbacks(cb, eb)
wad = Deferred()
wad.addCallback(cb)
if eb:
wad.addErrback(eb)
info.waiting[fp].append(wad)

# Check if request is downloading right now to avoid doing it twice
Expand Down Expand Up @@ -140,23 +147,11 @@ def _check_media_to_download(self, result, request, info, item):
if self.download_func:
# this ugly code was left only to support tests. TODO: remove
dfd = mustbe_deferred(self.download_func, request, info.spider)
dfd.addCallbacks(
callback=self.media_downloaded,
callbackArgs=(request, info),
callbackKeywords={"item": item},
errback=self.media_failed,
errbackArgs=(request, info),
)
else:
self._modify_media_request(request)
dfd = self.crawler.engine.download(request)
dfd.addCallbacks(
callback=self.media_downloaded,
callbackArgs=(request, info),
callbackKeywords={"item": item},
errback=self.media_failed,
errbackArgs=(request, info),
)
dfd.addCallback(self.media_downloaded, request, info, item=item)
dfd.addErrback(self.media_failed, request, info)
return dfd

def _cache_result_and_execute_waiters(self, result, fp, info):
Expand Down
4 changes: 3 additions & 1 deletion scrapy/shell.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,9 @@ def _restore_callbacks(result: Any) -> Any:
d: defer.Deferred = defer.Deferred()
d.addBoth(_restore_callbacks)
if request.callback:
d.addCallbacks(request.callback, request.errback)
d.addCallback(request.callback)
if request.errback:
d.addErrback(request.errback)

request.callback, request.errback = d.callback, d.errback
return d
29 changes: 21 additions & 8 deletions scrapy/utils/decorators.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,34 @@
from __future__ import annotations

import warnings
from functools import wraps
from typing import Any, Callable
from typing import TYPE_CHECKING, Any, Callable, TypeVar

from twisted.internet import defer, threads
from twisted.internet.defer import Deferred

from scrapy.exceptions import ScrapyDeprecationWarning

if TYPE_CHECKING:
# typing.ParamSpec requires Python 3.10
from typing_extensions import ParamSpec

_P = ParamSpec("_P")


_T = TypeVar("_T")


def deprecated(use_instead: Any = None) -> Callable:
def deprecated(
use_instead: Any = None,
) -> Callable[[Callable[_P, _T]], Callable[_P, _T]]:
"""This is a decorator which can be used to mark functions
as deprecated. It will result in a warning being emitted
when the function is used."""

def deco(func: Callable) -> Callable:
def deco(func: Callable[_P, _T]) -> Callable[_P, _T]:
@wraps(func)
def wrapped(*args: Any, **kwargs: Any) -> Any:
def wrapped(*args: _P.args, **kwargs: _P.kwargs) -> Any:
message = f"Call to deprecated function {func.__name__}."
if use_instead:
message += f" Use {use_instead} instead."
Expand All @@ -30,23 +43,23 @@ def wrapped(*args: Any, **kwargs: Any) -> Any:
return deco


def defers(func: Callable) -> Callable[..., Deferred]:
def defers(func: Callable[_P, _T]) -> Callable[_P, Deferred[_T]]:
"""Decorator to make sure a function always returns a deferred"""

@wraps(func)
def wrapped(*a: Any, **kw: Any) -> Deferred:
def wrapped(*a: _P.args, **kw: _P.kwargs) -> Deferred[_T]:
return defer.maybeDeferred(func, *a, **kw)

return wrapped


def inthread(func: Callable) -> Callable[..., Deferred]:
def inthread(func: Callable[_P, _T]) -> Callable[_P, Deferred[_T]]:
"""Decorator to call a function in a thread and return a deferred with the
result
"""

@wraps(func)
def wrapped(*a: Any, **kw: Any) -> Deferred:
def wrapped(*a: _P.args, **kw: _P.kwargs) -> Deferred[_T]:
return threads.deferToThread(func, *a, **kw)

return wrapped

0 comments on commit 4ed5c5a

Please sign in to comment.