Skip to content

Commit

Permalink
Snapshots: allow snapshotting some user code (#4720)
Browse files Browse the repository at this point in the history
This adds some basic ability to snapshot after executing user code. It is pretty
brittle right now:
1. It will crash if the user loads any binary extensions before taking the
snapshot
2. It doesn't track changes to the file system

Snapshots will probably have to be experimental for quite a while.

1. I think I have a pretty good solution for this, which I will work on in a
followup.

2. One possibility here is we could serialize the entire filesystem state into
the memory snapshot. This would be hard and make the snapshot big, but we
wouldn't have to load python_stdlib.zip when restoring from a snapshot so it
probably wouldn't increase the total download size by much...
  • Loading branch information
hoodmane committed May 10, 2024
1 parent afe7215 commit a802179
Show file tree
Hide file tree
Showing 9 changed files with 477 additions and 121 deletions.
93 changes: 47 additions & 46 deletions Makefile.envs
Original file line number Diff line number Diff line change
Expand Up @@ -164,52 +164,53 @@ export MAIN_MODULE_LDFLAGS= $(LDFLAGS_BASE) \
-lsdl.js \
-sGL_WORKAROUND_SAFARI_GETCONTEXT_BUG=0

EXPORTS= _main\
\
,_free \
\
,_hiwire_new \
,_hiwire_intern \
,_hiwire_num_refs \
,_hiwire_get \
,_hiwire_incref \
,_hiwire_decref \
,_hiwire_pop \
,__hiwire_set \
,__hiwire_immortal_add \
,_jslib_init \
,_init_pyodide_proxy \
\
,_PyBuffer_Release \
,_Py_DecRef \
,_PyDict_New \
,_PyDict_SetItem \
,__PyErr_CheckSignals \
,_PyErr_CheckSignals \
,_PyErr_Clear \
,_PyErr_Occurred \
,_PyErr_Print \
,_PyErr_SetRaisedException \
,_PyErr_SetString \
,_PyEval_SaveThread \
,_PyEval_RestoreThread \
,_PyFloat_FromDouble \
,_PyGILState_Check \
,_Py_IncRef \
,_PyList_New \
,_PyList_SetItem \
,__PyLong_FromByteArray \
,_PyLong_FromDouble \
,_PyMem_Free \
,_PyObject_GetAIter \
,_PyObject_GetIter \
,_PyObject_Size \
,_PyRun_SimpleString \
,_PySet_Add \
,_PySet_New \
,__PyTraceback_Add \
,_PyUnicode_Data \
,_PyUnicode_New \
EXPORTS=_main \
,_free \
\
,_hiwire_new \
,_hiwire_intern \
,_hiwire_num_refs \
,_hiwire_get \
,_hiwire_incref \
,_hiwire_decref \
,_hiwire_pop \
,__hiwire_get \
,__hiwire_set \
,__hiwire_immortal_get \
,__hiwire_immortal_add \
,_jslib_init \
,_init_pyodide_proxy \
\
,_PyBuffer_Release \
,_Py_DecRef \
,_PyDict_New \
,_PyDict_SetItem \
,__PyErr_CheckSignals \
,_PyErr_CheckSignals \
,_PyErr_Clear \
,_PyErr_Occurred \
,_PyErr_Print \
,_PyErr_SetRaisedException \
,_PyErr_SetString \
,_PyEval_SaveThread \
,_PyEval_RestoreThread \
,_PyFloat_FromDouble \
,_PyGILState_Check \
,_Py_IncRef \
,_PyList_New \
,_PyList_SetItem \
,__PyLong_FromByteArray \
,_PyLong_FromDouble \
,_PyMem_Free \
,_PyObject_GetAIter \
,_PyObject_GetIter \
,_PyObject_Size \
,_PyRun_SimpleString \
,_PySet_Add \
,_PySet_New \
,__PyTraceback_Add \
,_PyUnicode_Data \
,_PyUnicode_New \


ifeq ($(DISABLE_DYLINK), 1)
Expand Down
11 changes: 5 additions & 6 deletions src/core/_pyodide_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,11 @@ EM_JS(void, set_pyodide_module, (JsVal mod), {
int
init_pyodide_proxy()
{
EM_ASM({
// sourmash needs open64 to mean the same thing as open.
// Emscripten 3.1.44 seems to have removed it??
wasmImports["open64"] = wasmImports["open"];
});
bool success = false;
// Enable JavaScript access to the _pyodide module.
PyObject* _pyodide = PyImport_ImportModule("_pyodide");
Expand All @@ -83,12 +88,6 @@ EM_JS_DEPS(pyodide_core_deps, "stackAlloc,stackRestore,stackSave");
PyObject*
PyInit__pyodide_core(void)
{
EM_ASM({
// sourmash needs open64 to mean the same thing as open.
// Emscripten 3.1.44 seems to have removed it??
wasmImports["open64"] = wasmImports["open"];
});

bool success = false;
PyObject* _pyodide = NULL;
PyObject* core_module = NULL;
Expand Down
81 changes: 27 additions & 54 deletions src/js/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@ import { scheduleCallback } from "./scheduler";
import { TypedArray } from "./types";
import { IN_NODE, detectEnvironment } from "./environments";
import "./literal-map.js";
import {
makeGlobalsProxy,
SnapshotConfig,
syncUpSnapshotLoad1,
syncUpSnapshotLoad2,
} from "./snapshot";

// Exported for micropip
API.loadBinaryFile = loadBinaryFile;
Expand Down Expand Up @@ -645,6 +651,15 @@ export class PyodideAPI {
API.debug_ffi = debug;
return orig;
}

static makeMemorySnapshot(): Uint8Array {
if (!API.config._makeSnapshot) {
throw new Error(
"Can only use pyodide.makeMemorySnapshot if the _makeSnapshot option is passed to loadPyodide",
);
}
return API.makeSnapshot();
}
}

/** @hidden */
Expand Down Expand Up @@ -699,7 +714,7 @@ API.bootstrapFinalizedPromise = new Promise<void>(
(r) => (bootstrapFinalized = r),
);

function jsFinderHook(o: object) {
export function jsFinderHook(o: object) {
if ("__all__" in o) {
return;
}
Expand All @@ -713,63 +728,17 @@ function jsFinderHook(o: object) {
});
}

/**
* Set up some of the JavaScript state that is normally set up by C initialization code. TODO:
* adjust C code to simplify.
*
* This is divided up into two parts: syncUpSnapshotLoad1 has to happen at the beginning of
* finalizeBootstrap before the public API is setup, syncUpSnapshotLoad2 happens near the end.
*
* This code is quite sensitive to the details of our setup, so it might break if we move stuff
* around far away in the code base. Ideally over time we can structure the code to make it less
* brittle.
*/
function syncUpSnapshotLoad1() {
// hiwire init puts a null at the beginning of both the mortal and immortal tables.
Module.__hiwire_set(0, null);
Module.__hiwire_immortal_add(null);
// Usually importing _pyodide_core would trigger jslib_init but we need to manually call it.
Module._jslib_init();
// Puts deduplication map into the immortal table.
// TODO: Add support for snapshots to hiwire and move this to a hiwire_snapshot_init function.
Module.__hiwire_immortal_add(new Map());
// An interned JS string.
// TODO: Better system for handling interned strings.
Module.__hiwire_immortal_add(
"This borrowed proxy was automatically destroyed at the end of a function call. Try using create_proxy or create_once_callable.",
);
// Set API._pyodide to a proxy of the _pyodide module.
// Normally called by import _pyodide.
Module._init_pyodide_proxy();
}

/**
* Fill in the JsRef table.
*/
function syncUpSnapshotLoad2() {
[
null,
jsFinderHook,
API.config.jsglobals,
API.public_api,
Module.API,
scheduleCallback,
Module.API,
{},
null,
null,
].forEach((v, idx) => Module.__hiwire_set(idx, v));
}

/**
* This function is called after the emscripten module is finished initializing,
* so eval_code is newly available.
* It finishes the bootstrap so that once it is complete, it is possible to use
* the core `pyodide` apis. (But package loading is not ready quite yet.)
* @private
*/
API.finalizeBootstrap = function (fromSnapshot?: boolean): PyodideInterface {
if (fromSnapshot) {
API.finalizeBootstrap = function (
snapshotConfig?: SnapshotConfig,
): PyodideInterface {
if (snapshotConfig) {
syncUpSnapshotLoad1();
}
let [err, captured_stderr] = API.rawRun("import _pyodide_core");
Expand Down Expand Up @@ -802,11 +771,15 @@ API.finalizeBootstrap = function (fromSnapshot?: boolean): PyodideInterface {
// Set up key Javascript modules.
let importhook = API._pyodide._importhook;
let pyodide = makePublicAPI();
if (fromSnapshot) {
syncUpSnapshotLoad2();
if (API.config._makeSnapshot) {
API.config.jsglobals = makeGlobalsProxy(API.config.jsglobals);
}
const jsglobals = API.config.jsglobals;
if (snapshotConfig) {
syncUpSnapshotLoad2(jsglobals, snapshotConfig);
} else {
importhook.register_js_finder.callKwargs({ hook: jsFinderHook });
importhook.register_js_module("js", API.config.jsglobals);
importhook.register_js_module("js", jsglobals);
importhook.register_js_module("pyodide_js", pyodide);
}

Expand Down
23 changes: 11 additions & 12 deletions src/js/pyodide.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import type { PyodideInterface } from "./api.js";
import type { TypedArray, Module } from "./types";
import type { EmscriptenSettings } from "./emscripten-settings";
import type { PackageData } from "./load-package";
import { SnapshotConfig } from "./snapshot";
export type { PyodideInterface, TypedArray };

export { version, type PackageData };
Expand All @@ -42,6 +43,7 @@ export type ConfigType = {
_node_mounts: string[];
env: { [key: string]: string };
packages: string[];
_makeSnapshot: boolean;
};

/**
Expand Down Expand Up @@ -213,11 +215,13 @@ export async function loadPyodide(
await loadScript(scriptSrc);
}

let snapshot;
let snapshot: Uint8Array | undefined = undefined;
if (options._loadSnapshot) {
snapshot = await options._loadSnapshot;
if (snapshot?.constructor?.name === "ArrayBuffer") {
snapshot = new Uint8Array(snapshot);
const snp = await options._loadSnapshot;
if (ArrayBuffer.isView(snp)) {
snapshot = snp;
} else {
snapshot = new Uint8Array(snp);
}
emscriptenSettings.noInitialRun = true;
// @ts-ignore
Expand Down Expand Up @@ -248,17 +252,12 @@ If you updated the Pyodide version, make sure you also updated the 'indexURL' pa
throw new Error("Didn't expect to load any more file_packager files!");
};

let snapshotConfig: SnapshotConfig | undefined = undefined;
if (snapshot) {
// @ts-ignore
Module.HEAP8.set(snapshot);
snapshotConfig = API.restoreSnapshot(snapshot);
}
// runPython works starting after the call to finalizeBootstrap.
const pyodide = API.finalizeBootstrap(!!snapshot);

if (options._makeSnapshot) {
// @ts-ignore
pyodide._snapshot = Module.HEAP8.slice();
}
const pyodide = API.finalizeBootstrap(snapshotConfig);
API.sys.path.insert(0, API.config.env.HOME);

if (!pyodide.version.includes("dev")) {
Expand Down

0 comments on commit a802179

Please sign in to comment.