Add fallback method for CSV download (#8452)

## Describe your changes This PR adds a fallback to an old browser download method for CSV dataframe download feature. Some browser or situations might not correctly support the file picker feature, in these cases it will fallback to an older more commonly supported download method. We also add iframe tests together with CSPs here. This can be used in the future also by other tests. ## GitHub Issue Link (if applicable) - Closes #8210 ## Testing Plan - E2E tests: added e2e tests for downloading a CSV file from the dataframe; also within an iframe and with a content-security-policy in-place --- **Contribution License Agreement** By submitting this pull request you agree that all contributions to this project are made under the Apache 2.0 license. --------- Co-authored-by: Karen Javadyan <kajarenc@gmail.com> Co-authored-by: braethlein <benjamin.raethlein@gmail.com>
streamlit · May 16, 2024 · 48f50ad · 48f50ad
1 parent 82c9d3e
commit 48f50ad
Show file tree

Hide file tree

Showing 7 changed files with 319 additions and 37 deletions.
diff --git a/e2e_playwright/conftest.py b/e2e_playwright/conftest.py
@@ -27,18 +27,26 @@
 import subprocess
 import sys
 import time
+from dataclasses import dataclass
 from io import BytesIO
 from pathlib import Path
 from random import randint
 from tempfile import TemporaryFile
 from types import ModuleType
-from typing import Any, Dict, Generator, List, Literal, Protocol, Tuple
+from typing import Any, Callable, Dict, Generator, List, Literal, Protocol, Tuple
 from urllib import parse
 
 import pytest
 import requests
 from PIL import Image
-from playwright.sync_api import ElementHandle, Locator, Page
+from playwright.sync_api import (
+    ElementHandle,
+    FrameLocator,
+    Locator,
+    Page,
+    Response,
+    Route,
+)
 from pytest import FixtureRequest
 
 
@@ -258,6 +266,97 @@ def app_with_query_params(
     return page, query_params
 
 
+@dataclass
+class IframedPage:
+    # the page to configure
+    page: Page
+    # opens the configured page via the iframe URL and returns the frame_locator pointing to the iframe
+    open_app: Callable[[], FrameLocator]
+
+
+@pytest.fixture(scope="function")
+def iframed_app(page: Page, app_port: int) -> IframedPage:
+    """Fixture that returns an IframedPage.
+
+    The page object can be used to configure additional routes, for example to override the host-config.
+    The open_app function triggers the opening of the app in an iframe.
+    """
+    # we are going to intercept the request, so the address is arbitrarily chose and does not have to exist
+    fake_iframe_server_origin = "http://localhost:1345"
+    fake_iframe_server_route = f"{fake_iframe_server_origin}/iframed_app.html"
+    # the url where the Streamlit server is reachable
+    app_url = f"http://localhost:{app_port}"
+    # the CSP header returned for the Streamlit index.html loaded in the iframe. This is similar to a common CSP we have seen in the wild.
+    app_csp_header = f"default-src 'none'; worker-src blob:; form-action 'none'; connect-src ws://localhost:{app_port}/_stcore/stream http://localhost:{app_port}/_stcore/allowed-message-origins http://localhost:{app_port}/_stcore/host-config http://localhost:{app_port}/_stcore/health; script-src 'unsafe-inline' 'unsafe-eval' {app_url}/static/js/; style-src 'unsafe-inline' {app_url}/static/css/; img-src data: {app_url}/favicon.png {app_url}/favicon.ico; font-src {app_url}/static/fonts/ {app_url}/static/media/; frame-ancestors {fake_iframe_server_origin};"
+
+    def fulfill_iframe_request(route: Route) -> None:
+        """Return as response an iframe that loads the actual Streamlit app."""
+
+        browser = page.context.browser
+        # webkit requires the iframe's parent to have "blob:" set, for example if we want to download a CSV via the blob: url
+        # chrome seems to be more lax
+        frame_src_blob = ""
+        if browser is not None and (
+            browser.browser_type.name == "webkit"
+            or browser.browser_type.name == "firefox"
+        ):
+            frame_src_blob = "blob:"
+
+        route.fulfill(
+            status=200,
+            body=f"""
+            <iframe
+                src="{app_url}"
+                title="Iframed Streamlit App"
+                allow="clipboard-write;"
+                sandbox="allow-popups allow-same-origin allow-scripts allow-downloads"
+                width="100%"
+                height="100%">
+            </iframe>
+            """,
+            headers={
+                "Content-Type": "text/html",
+                "Content-Security-Policy": f"frame-src {frame_src_blob} {app_url};",
+            },
+        )
+
+    # intercept all requests to the fake iframe server and fullfil the request in playwright
+    page.route(fake_iframe_server_route, fulfill_iframe_request)
+
+    def fullfill_streamlit_app_request(route: Route) -> None:
+        response = route.fetch()
+        route.fulfill(
+            body=response.body(),
+            headers={**response.headers, "Content-Security-Policy": app_csp_header},
+        )
+
+    page.route(f"{app_url}/", fullfill_streamlit_app_request)
+
+    def _open_app() -> FrameLocator:
+        def _expect_streamlit_app_loaded_in_iframe_with_added_header(
+            response: Response,
+        ) -> bool:
+            """Ensure that the routing-interception worked and that Streamlit app is indeed loaded with the CSP header we expect"""
+
+            return (
+                response.url == f"{app_url}/"
+                and response.headers["content-security-policy"] == app_csp_header
+            )
+
+        with page.expect_event(
+            "response",
+            predicate=_expect_streamlit_app_loaded_in_iframe_with_added_header,
+        ):
+            page.goto(fake_iframe_server_route, wait_until="domcontentloaded")
+            frame_locator = page.frame_locator("iframe")
+            frame_locator.nth(0).get_by_test_id("stAppViewContainer").wait_for(
+                timeout=30000, state="attached"
+            )
+        return frame_locator
+
+    return IframedPage(page, _open_app)
+
+
 @pytest.fixture(scope="session")
 def browser_type_launch_args(browser_type_launch_args: Dict, browser_name: str):
     """Fixture that adds the fake device and ui args to the browser type launch args."""

diff --git a/e2e_playwright/st_dataframe_interactions_test.py b/e2e_playwright/st_dataframe_interactions_test.py
@@ -12,10 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+
 import pytest
-from playwright.sync_api import Page, expect
+from playwright.sync_api import FrameLocator, Locator, Page, Route, expect
 
-from e2e_playwright.conftest import ImageCompareFunction, wait_for_app_run
+from e2e_playwright.conftest import IframedPage, ImageCompareFunction, wait_for_app_run
 
 # This test suite covers all interactions of dataframe & data_editor
 
@@ -265,11 +266,112 @@ def test_data_editor_keeps_state_after_unmounting(
     )
 
 
+def _test_csv_download(
+    page: Page,
+    locator: FrameLocator | Locator,
+    click_enter_on_file_picker: bool = False,
+):
+    dataframe_element = locator.get_by_test_id("stDataFrame").nth(0)
+    dataframe_toolbar = dataframe_element.get_by_test_id("stElementToolbar")
+
+    download_csv_toolbar_button = dataframe_toolbar.get_by_test_id(
+        "stElementToolbarButton"
+    ).first
+
+    # Activate toolbar:
+    dataframe_element.hover()
+    # Check that it is visible
+    expect(dataframe_toolbar).to_have_css("opacity", "1")
+
+    with page.expect_download(timeout=10000) as download_info:
+        download_csv_toolbar_button.click()
+
+        # playwright does not support all fileaccess APIs yet (see this issue: https://github.com/microsoft/playwright/issues/8850)
+        # this means we don't know if the system dialog opened to pick a location (expect_file_chooser does not work). So as a workaround, we wait for now and then press enter.
+        if click_enter_on_file_picker:
+            page.wait_for_timeout(1000)
+            page.keyboard.press("Enter")
+
+    download = download_info.value
+    download_path = download.path()
+    with open(download_path, "r", encoding="UTF-8") as f:
+        content = f.read()
+        # the app uses a fixed seed, so the data is always the same. This is the reason why we can check it here.
+        some_row = "1,-0.977277879876411,0.9500884175255894,-0.1513572082976979,-0.10321885179355784,0.41059850193837233"
+        # we usually try to avoid assert in playwright tests, but since we don't have to wait for any UI interaction or DOM state, it's ok here
+        assert some_row in content
+
+
+def test_csv_download_button(
+    app: Page, browser_name: str, browser_type_launch_args: dict
+):
+    """Test that the csv download button works.
+
+    Note that the library we are using calls the file picker API to download the file. This is not supported in headless mode. Hence, the test
+    triggers different code paths in the app depending on the browser and the launch arguments.
+    """
+
+    click_enter_on_file_picker = False
+
+    # right now the filechooser will only be opened on Chrome. Maybe this will change in the future and the
+    # check has to be updated; or maybe playwright will support the file-access APIs better.
+    # In headless mode, the file-access API our csv-download button uses under-the-hood does not work. So we monkey-patch it to throw an error and trigger our alternative download logic.
+    if browser_name == "chromium":
+        if browser_type_launch_args.get("headless", False):
+            click_enter_on_file_picker = True
+        else:
+            app.evaluate(
+                "() => window.showSaveFilePicker = () => {throw new Error('Monkey-patched showOpenFilePicker')}",
+            )
+    _test_csv_download(app, app.locator("body"), click_enter_on_file_picker)
+
+
+def test_csv_download_button_in_iframe(iframed_app: IframedPage):
+    """Test that the csv download button works in an iframe.
+
+    Based on the test behavior and the fact that we don't have to patch the 'window.showSaveFilePicker' as in the test above,
+    it seems that the fallback download method is used.
+    """
+
+    page: Page = iframed_app.page
+    frame_locator: FrameLocator = iframed_app.open_app()
+
+    _test_csv_download(page, frame_locator)
+
+
+def test_csv_download_button_in_iframe_with_new_tab_host_config(
+    iframed_app: IframedPage,
+):
+    """Test that the csv download button works in an iframe and the host-config enforced download in new tab.
+
+    Based on the test behavior and the fact that we don't have to patch the 'window.showSaveFilePicker' as in the test above,
+    it seems that the fallback download method is used.
+    If this ever changes, the host-config[enforceDownloadInNewTab] might not take any effect as it is only used in the fallback mechanism.
+    """
+    page: Page = iframed_app.page
+
+    def fulfill_host_config_request(route: Route):
+        response = route.fetch()
+        result = response.json()
+        result["enforceDownloadInNewTab"] = True
+        route.fulfill(json=result)
+
+    page.route("**/_stcore/host-config", fulfill_host_config_request)
+
+    # ensure that the route interception works and we get the correct enforceDownloadInNewTab config
+    with page.expect_event(
+        "response",
+        lambda response: response.url.endswith("_stcore/host-config")
+        and response.json()["enforceDownloadInNewTab"] == True,
+        timeout=10000,
+    ):
+        frame_locator: FrameLocator = iframed_app.open_app()
+        _test_csv_download(page, frame_locator)
+
+
 # TODO(lukasmasuch): Add additional interactive tests:
 # - Selecting a cell
 # - Opening a cell
 # - Applying a cell edit
 # - Copy data to clipboard
 # - Paste in data
-# - Download data via toolbar: I wasn't able to find out how to detect the
-#   showSaveFilePicker the filechooser doesn't work.
diff --git a/frontend/lib/package.json b/frontend/lib/package.json
@@ -69,7 +69,7 @@
     "moment": "^2.29.4",
     "moment-duration-format": "^2.3.2",
     "moment-timezone": "^0.5.40",
-    "native-file-system-adapter": "^3.0.0",
+    "native-file-system-adapter": "^3.0.1",
     "node-emoji": "^1.11.0",
     "numbro": "^2.3.6",
     "plotly.js": "^2.30.1",

diff --git a/frontend/lib/src/components/widgets/DataFrame/DataFrame.tsx b/frontend/lib/src/components/widgets/DataFrame/DataFrame.tsx
@@ -45,6 +45,7 @@ import { debounce, isNullOrUndefined } from "@streamlit/lib/src/util/utils"
 import Toolbar, {
   ToolbarAction,
 } from "@streamlit/lib/src/components/shared/Toolbar"
+import { LibContext } from "@streamlit/lib/src/components/core/LibContext"
 
 import EditingState, { getColumnName } from "./EditingState"
 import {
@@ -145,6 +146,10 @@ function DataFrame({
 
   const { theme, headerIcons, tableBorderRadius } = useCustomTheme()
 
+  const {
+    libConfig: { enforceDownloadInNewTab = false }, // Default to false, if no libConfig, e.g. for tests
+  } = React.useContext(LibContext)
+
   const [isFocused, setIsFocused] = React.useState<boolean>(true)
   const [showSearch, setShowSearch] = React.useState(false)
   const [hasVerticalScroll, setHasVerticalScroll] =
@@ -475,7 +480,12 @@ function DataFrame({
     ]
   )
 
-  const { exportToCsv } = useDataExporter(getCellContent, columns, numRows)
+  const { exportToCsv } = useDataExporter(
+    getCellContent,
+    columns,
+    numRows,
+    enforceDownloadInNewTab
+  )
 
   const { onCellEdited, onPaste, onRowAppended, onDelete, validateCell } =
     useDataEditor(

diff --git a/frontend/lib/src/components/widgets/DataFrame/hooks/useDataExporter.test.ts b/frontend/lib/src/components/widgets/DataFrame/hooks/useDataExporter.test.ts
@@ -108,7 +108,7 @@ describe("useDataExporter hook", () => {
 
   it("correctly writes data row-by-row to writable", async () => {
     const { result } = renderHook(() => {
-      return useDataExporter(getCellContentMock, MOCK_COLUMNS, NUM_ROWS)
+      return useDataExporter(getCellContentMock, MOCK_COLUMNS, NUM_ROWS, false)
     })
 
     if (typeof result.current.exportToCsv !== "function") {
@@ -130,7 +130,7 @@ describe("useDataExporter hook", () => {
 
   it("correctly creates a file picker", async () => {
     const { result } = renderHook(() => {
-      return useDataExporter(getCellContentMock, MOCK_COLUMNS, NUM_ROWS)
+      return useDataExporter(getCellContentMock, MOCK_COLUMNS, NUM_ROWS, false)
     })
 
     if (typeof result.current.exportToCsv !== "function") {