Skip to content

Commit

Permalink
chore: port routeFromHar, roll to 1.23.0 driver (#1384)
Browse files Browse the repository at this point in the history
This is part 5/n of the 1.23 port.

Relates #1308, #1374, #1376, #1382, #1383

Ports:

  - [x] microsoft/playwright@259c8d6 (feat: Page.routeFromHar (#14870))
  - [x] microsoft/playwright@79378dd (fix: add pw:api logging to har router (#14903))
  - [x] microsoft/playwright@030e7d2 (chore(har): allow replaying from zip har (#14962))
  - [x] microsoft/playwright@ed6b14f (fix(har): restart redirected navigation (#14939))
  - [x] microsoft/playwright@e5372c3 (chore: move har router into local utils (#14967))
  - [x] microsoft/playwright@920f1d5 (chore: allow routing by uncompressed har (#14987))
  - [x] microsoft/playwright@eb87966 (feat(har): disambiguate requests by post data (#14993))
  - [x] microsoft/playwright@6af6fab (fix(har): internal redirect in renderer-initiated navigations (#15000))
  - [x] microsoft/playwright@9525bed (feat(har): re-add routeFromHAR (#15024))
  • Loading branch information
rwoll committed Jun 28, 2022
1 parent 7b424eb commit b2e3b93
Show file tree
Hide file tree
Showing 22 changed files with 2,120 additions and 38 deletions.
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Expand Up @@ -6,6 +6,7 @@ repos:
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
exclude: tests/assets/har-sha1-main-response.txt
- id: check-yaml
- id: check-toml
- id: requirements-txt-fixer
Expand Down
2 changes: 1 addition & 1 deletion README.md
Expand Up @@ -4,7 +4,7 @@ Playwright is a Python library to automate [Chromium](https://www.chromium.org/H

| | Linux | macOS | Windows |
| :--- | :---: | :---: | :---: |
| Chromium <!-- GEN:chromium-version -->103.0.5060.53<!-- GEN:stop --> ||||
| Chromium <!-- GEN:chromium-version -->104.0.5112.20<!-- GEN:stop --> ||||
| WebKit <!-- GEN:webkit-version -->15.4<!-- GEN:stop --> ||||
| Firefox <!-- GEN:firefox-version -->100.0.2<!-- GEN:stop --> ||||

Expand Down
16 changes: 16 additions & 0 deletions playwright/_impl/_browser_context.py
Expand Up @@ -35,7 +35,9 @@
from playwright._impl._event_context_manager import EventContextManagerImpl
from playwright._impl._fetch import APIRequestContext
from playwright._impl._frame import Frame
from playwright._impl._har_router import HarRouter
from playwright._impl._helper import (
RouteFromHarNotFoundPolicy,
RouteHandler,
RouteHandlerCallback,
TimeoutSettings,
Expand Down Expand Up @@ -292,6 +294,20 @@ async def unroute(
if len(self._routes) == 0:
await self._disable_interception()

async def route_from_har(
self,
har: Union[Path, str],
url: URLMatch = None,
not_found: RouteFromHarNotFoundPolicy = None,
) -> None:
router = await HarRouter.create(
local_utils=self._connection.local_utils,
file=str(har),
not_found_action=not_found or "abort",
url_matcher=url,
)
await router.add_context_route(self)

async def _disable_interception(self) -> None:
await self._channel.send("setNetworkInterceptionEnabled", dict(enabled=False))

Expand Down
104 changes: 104 additions & 0 deletions playwright/_impl/_har_router.py
@@ -0,0 +1,104 @@
import asyncio
import base64
from typing import TYPE_CHECKING, Optional, cast

from playwright._impl._api_structures import HeadersArray
from playwright._impl._helper import (
HarLookupResult,
RouteFromHarNotFoundPolicy,
URLMatch,
)
from playwright._impl._local_utils import LocalUtils

if TYPE_CHECKING: # pragma: no cover
from playwright._impl._browser_context import BrowserContext
from playwright._impl._network import Route
from playwright._impl._page import Page


class HarRouter:
def __init__(
self,
local_utils: LocalUtils,
har_id: str,
not_found_action: RouteFromHarNotFoundPolicy,
url_matcher: Optional[URLMatch] = None,
) -> None:
self._local_utils: LocalUtils = local_utils
self._har_id: str = har_id
self._not_found_action: RouteFromHarNotFoundPolicy = not_found_action
self._options_url_match: Optional[URLMatch] = url_matcher

@staticmethod
async def create(
local_utils: LocalUtils,
file: str,
not_found_action: RouteFromHarNotFoundPolicy,
url_matcher: Optional[URLMatch] = None,
) -> "HarRouter":
har_id = await local_utils._channel.send("harOpen", {"file": file})
return HarRouter(
local_utils=local_utils,
har_id=har_id,
not_found_action=not_found_action,
url_matcher=url_matcher,
)

async def _handle(self, route: "Route") -> None:
request = route.request
response: HarLookupResult = await self._local_utils.har_lookup(
harId=self._har_id,
url=request.url,
method=request.method,
headers=await request.headers_array(),
postData=request.post_data_buffer,
isNavigationRequest=request.is_navigation_request(),
)
action = response["action"]
if action == "redirect":
redirect_url = response["redirectURL"]
assert redirect_url
await route._redirected_navigation_request(redirect_url)
return

if action == "fulfill":
body = response["body"]
assert body is not None
await route.fulfill(
status=response.get("status"),
headers={
v["name"]: v["value"]
for v in cast(HeadersArray, response.get("headers", []))
},
body=base64.b64decode(body),
)
return

if action == "error":
pass
# Report the error, but fall through to the default handler.

if self._not_found_action == "abort":
await route.abort()
return

await route.fallback()

async def add_context_route(self, context: "BrowserContext") -> None:
await context.route(
url=self._options_url_match or "**/*",
handler=lambda route, _: asyncio.create_task(self._handle(route)),
)
context.once("close", lambda _: self._dispose())

async def add_page_route(self, page: "Page") -> None:
await page.route(
url=self._options_url_match or "**/*",
handler=lambda route, _: asyncio.create_task(self._handle(route)),
)
page.once("close", lambda _: self._dispose())

def _dispose(self) -> None:
asyncio.create_task(
self._local_utils._channel.send("harClose", {"harId": self._har_id})
)
11 changes: 11 additions & 0 deletions playwright/_impl/_helper.py
Expand Up @@ -49,6 +49,7 @@


if TYPE_CHECKING: # pragma: no cover
from playwright._impl._api_structures import HeadersArray
from playwright._impl._network import Request, Response, Route

URLMatch = Union[str, Pattern, Callable[[str], bool]]
Expand All @@ -67,6 +68,7 @@
ServiceWorkersPolicy = Literal["allow", "block"]
HarMode = Literal["full", "minimal"]
HarContentPolicy = Literal["attach", "embed", "omit"]
RouteFromHarNotFoundPolicy = Literal["abort", "fallback"]


class ErrorPayload(TypedDict, total=False):
Expand Down Expand Up @@ -135,6 +137,15 @@ def matches(self, url: str) -> bool:
return False


class HarLookupResult(TypedDict, total=False):
action: Literal["error", "redirect", "fulfill", "noentry"]
message: Optional[str]
redirectURL: Optional[str]
status: Optional[int]
headers: Optional["HeadersArray"]
body: Optional[str]


class TimeoutSettings:
def __init__(self, parent: Optional["TimeoutSettings"]) -> None:
self._parent = parent
Expand Down
31 changes: 29 additions & 2 deletions playwright/_impl/_local_utils.py
Expand Up @@ -12,10 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Dict, List
import base64
from typing import Dict, List, Optional, cast

from playwright._impl._api_structures import NameValue
from playwright._impl._api_structures import HeadersArray, NameValue
from playwright._impl._connection import ChannelOwner
from playwright._impl._helper import HarLookupResult, locals_to_params


class LocalUtils(ChannelOwner):
Expand All @@ -26,3 +28,28 @@ def __init__(

async def zip(self, zip_file: str, entries: List[NameValue]) -> None:
await self._channel.send("zip", {"zipFile": zip_file, "entries": entries})

async def har_open(self, file: str) -> None:
params = locals_to_params(locals())
await self._channel.send("harOpen", params)

async def har_lookup(
self,
harId: str,
url: str,
method: str,
headers: HeadersArray,
isNavigationRequest: bool,
postData: Optional[bytes] = None,
) -> HarLookupResult:
params = locals_to_params(locals())
if "postData" in params:
params["postData"] = base64.b64encode(params["postData"]).decode()
return cast(
HarLookupResult,
await self._channel.send_return_as_dict("harLookup", params),
)

async def har_close(self, harId: str) -> None:
params = locals_to_params(locals())
await self._channel.send("harClose", params)
1 change: 0 additions & 1 deletion playwright/_impl/_network.py
Expand Up @@ -355,7 +355,6 @@ async def continue_route() -> None:

return continue_route()

# FIXME: Port corresponding tests, and call this method
async def _redirected_navigation_request(self, url: str) -> None:
self._check_not_handled()
await self._race_with_page_close(
Expand Down
16 changes: 16 additions & 0 deletions playwright/_impl/_page.py
Expand Up @@ -53,13 +53,15 @@
from playwright._impl._event_context_manager import EventContextManagerImpl
from playwright._impl._file_chooser import FileChooser
from playwright._impl._frame import Frame
from playwright._impl._har_router import HarRouter
from playwright._impl._helper import (
ColorScheme,
DocumentLoadState,
ForcedColors,
KeyboardModifier,
MouseButton,
ReducedMotion,
RouteFromHarNotFoundPolicy,
RouteHandler,
RouteHandlerCallback,
TimeoutSettings,
Expand Down Expand Up @@ -600,6 +602,20 @@ async def unroute(
if len(self._routes) == 0:
await self._disable_interception()

async def route_from_har(
self,
har: Union[Path, str],
url: URLMatch = None,
not_found: RouteFromHarNotFoundPolicy = None,
) -> None:
router = await HarRouter.create(
local_utils=self._connection.local_utils,
file=str(har),
not_found_action=not_found or "abort",
url_matcher=url,
)
await router.add_page_route(self)

async def _disable_interception(self) -> None:
await self._channel.send("setNetworkInterceptionEnabled", dict(enabled=False))

Expand Down
82 changes: 80 additions & 2 deletions playwright/async_api/_generated.py
Expand Up @@ -268,7 +268,9 @@ def timing(self) -> ResourceTiming:
def headers(self) -> typing.Dict[str, str]:
"""Request.headers

**DEPRECATED** Incomplete list of headers as seen by the rendering engine. Use `request.all_headers()` instead.
An object with the request HTTP headers. The header names are lower-cased. Note that this method does not return
security-related headers, including cookie-related ones. You can use `request.all_headers()` for complete list of
headers that include `cookie` information.

Returns
-------
Expand Down Expand Up @@ -411,7 +413,9 @@ def status_text(self) -> str:
def headers(self) -> typing.Dict[str, str]:
"""Response.headers

**DEPRECATED** Incomplete list of headers as seen by the rendering engine. Use `response.all_headers()` instead.
An object with the response HTTP headers. The header names are lower-cased. Note that this method does not return
security-related headers, including cookie-related ones. You can use `response.all_headers()` for complete list
of headers that include `cookie` information.

Returns
-------
Expand Down Expand Up @@ -7736,6 +7740,43 @@ async def unroute(
)
)

async def route_from_har(
self,
har: typing.Union[pathlib.Path, str],
*,
url: typing.Union[str, typing.Pattern, typing.Callable[[str], bool]] = None,
not_found: Literal["abort", "fallback"] = None
) -> NoneType:
"""Page.route_from_har

If specified the network requests that are made in the page will be served from the HAR file. Read more about
[Replaying from HAR](https://playwright.dev/python/docs/network#replaying-from-har).

Playwright will not serve requests intercepted by Service Worker from the HAR file. See
[this](https://github.com/microsoft/playwright/issues/1090) issue. We recommend disabling Service Workers when using
request interception by setting `Browser.newContext.serviceWorkers` to `'block'`.

Parameters
----------
har : Union[pathlib.Path, str]
Path to a [HAR](http://www.softwareishard.com/blog/har-12-spec) file with prerecorded network data. If `path` is a
relative path, then it is resolved relative to the current working directory.
url : Union[Callable[[str], bool], Pattern, str, NoneType]
A glob pattern, regular expression or predicate to match the request URL. Only requests with URL matching the pattern
will be surved from the HAR file. If not specified, all requests are served from the HAR file.
not_found : Union["abort", "fallback", NoneType]
- If set to 'abort' any request not found in the HAR file will be aborted.
- If set to 'fallback' missing requests will be sent to the network.

Defaults to abort.
"""

return mapping.from_maybe_impl(
await self._impl_obj.route_from_har(
har=har, url=self._wrap_handler(url), not_found=not_found
)
)

async def screenshot(
self,
*,
Expand Down Expand Up @@ -10432,6 +10473,43 @@ async def unroute(
)
)

async def route_from_har(
self,
har: typing.Union[pathlib.Path, str],
*,
url: typing.Union[str, typing.Pattern, typing.Callable[[str], bool]] = None,
not_found: Literal["abort", "fallback"] = None
) -> NoneType:
"""BrowserContext.route_from_har

If specified the network requests that are made in the context will be served from the HAR file. Read more about
[Replaying from HAR](https://playwright.dev/python/docs/network#replaying-from-har).

Playwright will not serve requests intercepted by Service Worker from the HAR file. See
[this](https://github.com/microsoft/playwright/issues/1090) issue. We recommend disabling Service Workers when using
request interception by setting `Browser.newContext.serviceWorkers` to `'block'`.

Parameters
----------
har : Union[pathlib.Path, str]
Path to a [HAR](http://www.softwareishard.com/blog/har-12-spec) file with prerecorded network data. If `path` is a
relative path, then it is resolved relative to the current working directory.
url : Union[Callable[[str], bool], Pattern, str, NoneType]
A glob pattern, regular expression or predicate to match the request URL. Only requests with URL matching the pattern
will be surved from the HAR file. If not specified, all requests are served from the HAR file.
not_found : Union["abort", "fallback", NoneType]
- If set to 'abort' any request not found in the HAR file will be aborted.
- If set to 'fallback' falls through to the next route handler in the handler chain.

Defaults to abort.
"""

return mapping.from_maybe_impl(
await self._impl_obj.route_from_har(
har=har, url=self._wrap_handler(url), not_found=not_found
)
)

def expect_event(
self, event: str, predicate: typing.Callable = None, *, timeout: float = None
) -> AsyncEventContextManager:
Expand Down

0 comments on commit b2e3b93

Please sign in to comment.