Skip to content

Commit

Permalink
Add fallback method for CSV download (#8452)
Browse files Browse the repository at this point in the history
## Describe your changes

This PR adds a fallback to an old browser download method for CSV
dataframe download feature. Some browser or situations might not
correctly support the file picker feature, in these cases it will
fallback to an older more commonly supported download method.

We also add iframe tests together with CSPs here. This can be used in
the future also by other tests.

## GitHub Issue Link (if applicable)

- Closes #8210

## Testing Plan

- E2E tests: added e2e tests for downloading a CSV file from the
dataframe; also within an iframe and with a content-security-policy
in-place

---

**Contribution License Agreement**

By submitting this pull request you agree that all contributions to this
project are made under the Apache 2.0 license.

---------

Co-authored-by: Karen Javadyan <kajarenc@gmail.com>
Co-authored-by: braethlein <benjamin.raethlein@gmail.com>
  • Loading branch information
3 people committed May 16, 2024
1 parent 82c9d3e commit 48f50ad
Show file tree
Hide file tree
Showing 7 changed files with 319 additions and 37 deletions.
103 changes: 101 additions & 2 deletions e2e_playwright/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,18 +27,26 @@
import subprocess
import sys
import time
from dataclasses import dataclass
from io import BytesIO
from pathlib import Path
from random import randint
from tempfile import TemporaryFile
from types import ModuleType
from typing import Any, Dict, Generator, List, Literal, Protocol, Tuple
from typing import Any, Callable, Dict, Generator, List, Literal, Protocol, Tuple
from urllib import parse

import pytest
import requests
from PIL import Image
from playwright.sync_api import ElementHandle, Locator, Page
from playwright.sync_api import (
ElementHandle,
FrameLocator,
Locator,
Page,
Response,
Route,
)
from pytest import FixtureRequest


Expand Down Expand Up @@ -258,6 +266,97 @@ def app_with_query_params(
return page, query_params


@dataclass
class IframedPage:
# the page to configure
page: Page
# opens the configured page via the iframe URL and returns the frame_locator pointing to the iframe
open_app: Callable[[], FrameLocator]


@pytest.fixture(scope="function")
def iframed_app(page: Page, app_port: int) -> IframedPage:
"""Fixture that returns an IframedPage.
The page object can be used to configure additional routes, for example to override the host-config.
The open_app function triggers the opening of the app in an iframe.
"""
# we are going to intercept the request, so the address is arbitrarily chose and does not have to exist
fake_iframe_server_origin = "http://localhost:1345"
fake_iframe_server_route = f"{fake_iframe_server_origin}/iframed_app.html"
# the url where the Streamlit server is reachable
app_url = f"http://localhost:{app_port}"
# the CSP header returned for the Streamlit index.html loaded in the iframe. This is similar to a common CSP we have seen in the wild.
app_csp_header = f"default-src 'none'; worker-src blob:; form-action 'none'; connect-src ws://localhost:{app_port}/_stcore/stream http://localhost:{app_port}/_stcore/allowed-message-origins http://localhost:{app_port}/_stcore/host-config http://localhost:{app_port}/_stcore/health; script-src 'unsafe-inline' 'unsafe-eval' {app_url}/static/js/; style-src 'unsafe-inline' {app_url}/static/css/; img-src data: {app_url}/favicon.png {app_url}/favicon.ico; font-src {app_url}/static/fonts/ {app_url}/static/media/; frame-ancestors {fake_iframe_server_origin};"

def fulfill_iframe_request(route: Route) -> None:
"""Return as response an iframe that loads the actual Streamlit app."""

browser = page.context.browser
# webkit requires the iframe's parent to have "blob:" set, for example if we want to download a CSV via the blob: url
# chrome seems to be more lax
frame_src_blob = ""
if browser is not None and (
browser.browser_type.name == "webkit"
or browser.browser_type.name == "firefox"
):
frame_src_blob = "blob:"

route.fulfill(
status=200,
body=f"""
<iframe
src="{app_url}"
title="Iframed Streamlit App"
allow="clipboard-write;"
sandbox="allow-popups allow-same-origin allow-scripts allow-downloads"
width="100%"
height="100%">
</iframe>
""",
headers={
"Content-Type": "text/html",
"Content-Security-Policy": f"frame-src {frame_src_blob} {app_url};",
},
)

# intercept all requests to the fake iframe server and fullfil the request in playwright
page.route(fake_iframe_server_route, fulfill_iframe_request)

def fullfill_streamlit_app_request(route: Route) -> None:
response = route.fetch()
route.fulfill(
body=response.body(),
headers={**response.headers, "Content-Security-Policy": app_csp_header},
)

page.route(f"{app_url}/", fullfill_streamlit_app_request)

def _open_app() -> FrameLocator:
def _expect_streamlit_app_loaded_in_iframe_with_added_header(
response: Response,
) -> bool:
"""Ensure that the routing-interception worked and that Streamlit app is indeed loaded with the CSP header we expect"""

return (
response.url == f"{app_url}/"
and response.headers["content-security-policy"] == app_csp_header
)

with page.expect_event(
"response",
predicate=_expect_streamlit_app_loaded_in_iframe_with_added_header,
):
page.goto(fake_iframe_server_route, wait_until="domcontentloaded")
frame_locator = page.frame_locator("iframe")
frame_locator.nth(0).get_by_test_id("stAppViewContainer").wait_for(
timeout=30000, state="attached"
)
return frame_locator

return IframedPage(page, _open_app)


@pytest.fixture(scope="session")
def browser_type_launch_args(browser_type_launch_args: Dict, browser_name: str):
"""Fixture that adds the fake device and ui args to the browser type launch args."""
Expand Down
110 changes: 106 additions & 4 deletions e2e_playwright/st_dataframe_interactions_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.


import pytest
from playwright.sync_api import Page, expect
from playwright.sync_api import FrameLocator, Locator, Page, Route, expect

from e2e_playwright.conftest import ImageCompareFunction, wait_for_app_run
from e2e_playwright.conftest import IframedPage, ImageCompareFunction, wait_for_app_run

# This test suite covers all interactions of dataframe & data_editor

Expand Down Expand Up @@ -265,11 +266,112 @@ def test_data_editor_keeps_state_after_unmounting(
)


def _test_csv_download(
page: Page,
locator: FrameLocator | Locator,
click_enter_on_file_picker: bool = False,
):
dataframe_element = locator.get_by_test_id("stDataFrame").nth(0)
dataframe_toolbar = dataframe_element.get_by_test_id("stElementToolbar")

download_csv_toolbar_button = dataframe_toolbar.get_by_test_id(
"stElementToolbarButton"
).first

# Activate toolbar:
dataframe_element.hover()
# Check that it is visible
expect(dataframe_toolbar).to_have_css("opacity", "1")

with page.expect_download(timeout=10000) as download_info:
download_csv_toolbar_button.click()

# playwright does not support all fileaccess APIs yet (see this issue: https://github.com/microsoft/playwright/issues/8850)
# this means we don't know if the system dialog opened to pick a location (expect_file_chooser does not work). So as a workaround, we wait for now and then press enter.
if click_enter_on_file_picker:
page.wait_for_timeout(1000)
page.keyboard.press("Enter")

download = download_info.value
download_path = download.path()
with open(download_path, "r", encoding="UTF-8") as f:
content = f.read()
# the app uses a fixed seed, so the data is always the same. This is the reason why we can check it here.
some_row = "1,-0.977277879876411,0.9500884175255894,-0.1513572082976979,-0.10321885179355784,0.41059850193837233"
# we usually try to avoid assert in playwright tests, but since we don't have to wait for any UI interaction or DOM state, it's ok here
assert some_row in content


def test_csv_download_button(
app: Page, browser_name: str, browser_type_launch_args: dict
):
"""Test that the csv download button works.
Note that the library we are using calls the file picker API to download the file. This is not supported in headless mode. Hence, the test
triggers different code paths in the app depending on the browser and the launch arguments.
"""

click_enter_on_file_picker = False

# right now the filechooser will only be opened on Chrome. Maybe this will change in the future and the
# check has to be updated; or maybe playwright will support the file-access APIs better.
# In headless mode, the file-access API our csv-download button uses under-the-hood does not work. So we monkey-patch it to throw an error and trigger our alternative download logic.
if browser_name == "chromium":
if browser_type_launch_args.get("headless", False):
click_enter_on_file_picker = True
else:
app.evaluate(
"() => window.showSaveFilePicker = () => {throw new Error('Monkey-patched showOpenFilePicker')}",
)
_test_csv_download(app, app.locator("body"), click_enter_on_file_picker)


def test_csv_download_button_in_iframe(iframed_app: IframedPage):
"""Test that the csv download button works in an iframe.
Based on the test behavior and the fact that we don't have to patch the 'window.showSaveFilePicker' as in the test above,
it seems that the fallback download method is used.
"""

page: Page = iframed_app.page
frame_locator: FrameLocator = iframed_app.open_app()

_test_csv_download(page, frame_locator)


def test_csv_download_button_in_iframe_with_new_tab_host_config(
iframed_app: IframedPage,
):
"""Test that the csv download button works in an iframe and the host-config enforced download in new tab.
Based on the test behavior and the fact that we don't have to patch the 'window.showSaveFilePicker' as in the test above,
it seems that the fallback download method is used.
If this ever changes, the host-config[enforceDownloadInNewTab] might not take any effect as it is only used in the fallback mechanism.
"""
page: Page = iframed_app.page

def fulfill_host_config_request(route: Route):
response = route.fetch()
result = response.json()
result["enforceDownloadInNewTab"] = True
route.fulfill(json=result)

page.route("**/_stcore/host-config", fulfill_host_config_request)

# ensure that the route interception works and we get the correct enforceDownloadInNewTab config
with page.expect_event(

Check failure on line 362 in e2e_playwright/st_dataframe_interactions_test.py

View workflow job for this annotation

GitHub Actions / test

test_csv_download_button_in_iframe_with_new_tab_host_config[webkit] playwright._impl._errors.TimeoutError: Timeout 10000ms exceeded while waiting for event "response" =========================== logs =========================== waiting for event "response" ============================================================
"response",
lambda response: response.url.endswith("_stcore/host-config")
and response.json()["enforceDownloadInNewTab"] == True,
timeout=10000,
):
frame_locator: FrameLocator = iframed_app.open_app()
_test_csv_download(page, frame_locator)


# TODO(lukasmasuch): Add additional interactive tests:
# - Selecting a cell
# - Opening a cell
# - Applying a cell edit
# - Copy data to clipboard
# - Paste in data
# - Download data via toolbar: I wasn't able to find out how to detect the
# showSaveFilePicker the filechooser doesn't work.
2 changes: 1 addition & 1 deletion frontend/lib/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
"moment": "^2.29.4",
"moment-duration-format": "^2.3.2",
"moment-timezone": "^0.5.40",
"native-file-system-adapter": "^3.0.0",
"native-file-system-adapter": "^3.0.1",
"node-emoji": "^1.11.0",
"numbro": "^2.3.6",
"plotly.js": "^2.30.1",
Expand Down
12 changes: 11 additions & 1 deletion frontend/lib/src/components/widgets/DataFrame/DataFrame.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ import { debounce, isNullOrUndefined } from "@streamlit/lib/src/util/utils"
import Toolbar, {
ToolbarAction,
} from "@streamlit/lib/src/components/shared/Toolbar"
import { LibContext } from "@streamlit/lib/src/components/core/LibContext"

import EditingState, { getColumnName } from "./EditingState"
import {
Expand Down Expand Up @@ -145,6 +146,10 @@ function DataFrame({

const { theme, headerIcons, tableBorderRadius } = useCustomTheme()

const {
libConfig: { enforceDownloadInNewTab = false }, // Default to false, if no libConfig, e.g. for tests
} = React.useContext(LibContext)

const [isFocused, setIsFocused] = React.useState<boolean>(true)
const [showSearch, setShowSearch] = React.useState(false)
const [hasVerticalScroll, setHasVerticalScroll] =
Expand Down Expand Up @@ -475,7 +480,12 @@ function DataFrame({
]
)

const { exportToCsv } = useDataExporter(getCellContent, columns, numRows)
const { exportToCsv } = useDataExporter(
getCellContent,
columns,
numRows,
enforceDownloadInNewTab
)

const { onCellEdited, onPaste, onRowAppended, onDelete, validateCell } =
useDataEditor(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ describe("useDataExporter hook", () => {

it("correctly writes data row-by-row to writable", async () => {
const { result } = renderHook(() => {
return useDataExporter(getCellContentMock, MOCK_COLUMNS, NUM_ROWS)
return useDataExporter(getCellContentMock, MOCK_COLUMNS, NUM_ROWS, false)
})

if (typeof result.current.exportToCsv !== "function") {
Expand All @@ -130,7 +130,7 @@ describe("useDataExporter hook", () => {

it("correctly creates a file picker", async () => {
const { result } = renderHook(() => {
return useDataExporter(getCellContentMock, MOCK_COLUMNS, NUM_ROWS)
return useDataExporter(getCellContentMock, MOCK_COLUMNS, NUM_ROWS, false)
})

if (typeof result.current.exportToCsv !== "function") {
Expand Down

0 comments on commit 48f50ad

Please sign in to comment.