Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CONTENT_FALLBACK_DIRS option along with cherrypy/devserver support. #6865

Merged
merged 11 commits into from
May 26, 2020
Merged
42 changes: 38 additions & 4 deletions kolibri/core/content/utils/paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,26 @@ def get_content_dir_path(datafolder=None):
)


def get_content_fallback_paths():
return [
path.strip()
for path in conf.OPTIONS["Paths"]["CONTENT_FALLBACK_DIRS"]
.replace(",", ";")
.split(";")
if path.strip()
]


def existing_file_path_in_content_fallback_dirs(subpath):
# see whether the file exists in any of our content fallback directories
for prefix in get_content_fallback_paths():
path = os.path.join(prefix, subpath)
if os.path.exists(path):
return path
# if not, return None
return None


def get_content_database_dir_path(datafolder=None):
"""
Returns the path to the content sqlite databases
Expand All @@ -78,9 +98,16 @@ def get_content_database_file_path(channel_id, datafolder=None):
Given a channel_id, returns the path to the sqlite3 file
($HOME/.kolibri/content/databases/<channel_id>.sqlite3 on POSIX systems, by default)
"""
return os.path.join(
get_content_database_dir_path(datafolder), "{}.sqlite3".format(channel_id)
suffix = "{}.sqlite3".format(channel_id)
primary_path = os.path.join(get_content_database_dir_path(datafolder), suffix)
# if the primary path already exists, or the datapath is overridden, use the primary path
if os.path.exists(primary_path) or datafolder is not None:
return primary_path
backup_path = existing_file_path_in_content_fallback_dirs(
os.path.join("databases", suffix)
)
# return backup path if one exists; otherwise, return the primary path (even though it doesn't exist yet)
return backup_path or primary_path


def get_upgrade_content_database_file_path(channel_id, datafolder=None):
Expand Down Expand Up @@ -109,9 +136,16 @@ def get_content_storage_file_path(filename, datafolder=None):
raise InvalidStorageFilenameError(
"'{}' is not a valid content storage filename".format(filename)
)
return os.path.join(
get_content_storage_dir_path(datafolder), filename[0], filename[1], filename
suffix = os.path.join(filename[0], filename[1], filename)
primary_path = os.path.join(get_content_storage_dir_path(datafolder), suffix)
# if the primary path already exists, or the datapath is overridden, use the primary path
if os.path.exists(primary_path) or datafolder is not None:
return primary_path
backup_path = existing_file_path_in_content_fallback_dirs(
os.path.join("storage", suffix)
)
# return backup path if one exists; otherwise, return the primary path (even though it doesn't exist yet)
return backup_path or primary_path


# URL PATHS
Expand Down
8 changes: 4 additions & 4 deletions kolibri/core/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,11 @@
from django.conf.urls.static import static

from .views import GuestRedirectView
from .views import StatusCheckView
from .views import logout_view
from .views import RootURLRedirectView
from .views import set_language
from .views import static_serve_with_fallbacks
from .views import StatusCheckView
from .views import UnsupportedBrowserView
from kolibri.core.content.utils import paths
from kolibri.core.device.translation import i18n_patterns
Expand Down Expand Up @@ -75,6 +76,5 @@

urlpatterns += static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT)

urlpatterns += static(
paths.get_content_url("/"), document_root=paths.get_content_dir_path()
)
content_dirs = [paths.get_content_dir_path()] + paths.get_content_fallback_paths()
urlpatterns += static(paths.get_content_url("/"), view=static_serve_with_fallbacks(content_dirs))
21 changes: 21 additions & 0 deletions kolibri/core/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from django.views.generic.base import TemplateView
from django.views.generic.base import View
from django.views.i18n import LANGUAGE_QUERY_PARAMETER
from django.views.static import serve

from kolibri.core.auth.constants import user_kinds
from kolibri.core.auth.models import Role
Expand Down Expand Up @@ -187,3 +188,23 @@ def get(self, request):
Confirms that the server is up
"""
return HttpResponse()


def static_serve_with_fallbacks(search_paths):
"""
Serve a static file by iterating over search_paths until a matching file is found.
If a matching file is not found on any of the paths, a 404 will be raised.
"""

def serve_func(request, path, document_root=None):

for search_path in search_paths:
try:
return serve(request, path, document_root=search_path)
except Http404:
pass

# allow the Http404 to be raised, since we couldn't find the file anywhere
return serve(request, path, document_root=search_paths[0])

return serve_func
12 changes: 9 additions & 3 deletions kolibri/utils/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,12 @@ def language_list(value):
"type": "string",
"default": "content",
"envvars": ("KOLIBRI_CONTENT_DIR",),
}
},
"CONTENT_FALLBACK_DIRS": {
"type": "string",
rtibbles marked this conversation as resolved.
Show resolved Hide resolved
"default": "",
"envvars": ("KOLIBRI_CONTENT_FALLBACK_DIRS",),
},
},
"Urls": {
"CENTRAL_CONTENT_BASE_URL": {
Expand Down Expand Up @@ -239,7 +244,7 @@ def language_list(value):
"type": "integer",
"default": 2,
"envvars": ("KOLIBRI_PICKLE_PROTOCOL",),
},
rtibbles marked this conversation as resolved.
Show resolved Hide resolved
}
},
}

Expand Down Expand Up @@ -396,7 +401,8 @@ def _expand_paths(basepath, pathdict):
Resolve all paths in a dict, relative to a base path, and after expanding "~" into the user's home directory.
"""
for key, path in pathdict.items():
pathdict[key] = os.path.join(basepath, os.path.expanduser(path))
if path:
pathdict[key] = os.path.join(basepath, os.path.expanduser(path))


def update_options_file(section, key, value, KOLIBRI_HOME, ini_filename="options.ini"):
Expand Down
65 changes: 56 additions & 9 deletions kolibri/utils/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,54 @@ def calculate_cache_size():
return MIN_CACHE


class MultiStaticDispatcher(cherrypy._cpdispatch.Dispatcher):
"""
A special cherrypy Dispatcher extension to dispatch static content from a series
of directories on a search path. The first directory in which a file is found for
the path is used, and if it's not found in any of them, then the handler for the
first one is used, which will then likely return a NotFound error.
"""

def __init__(self, search_paths, *args, **kwargs):

assert len(search_paths) >= 1, "Must provide at least one path in search_paths"

self.static_handlers = []

# build a cherrypy static file handler for each of the directories in search path
for search_path in search_paths:

search_path = os.path.normpath(os.path.expanduser(search_path))

content_files_handler = cherrypy.tools.staticdir.handler(
section="/", dir=search_path
)

content_files_handler.search_path = search_path

self.static_handlers.append(content_files_handler)

super(MultiStaticDispatcher, self).__init__(*args, **kwargs)

def find_handler(self, path):

super(MultiStaticDispatcher, self).find_handler(path)

# loop over all the static handlers to see if they have the file we want
for handler in self.static_handlers:

filepath = os.path.join(handler.search_path, path.strip("/"))

# ensure the user-provided path doesn't try to jump up levels
if not os.path.normpath(filepath).startswith(handler.search_path):
continue

if os.path.exists(filepath):
return (handler, [])

return (self.static_handlers[0], [])


def configure_http_server(port):
# Mount the application
from kolibri.deployment.default.wsgi import application
Expand All @@ -227,16 +275,15 @@ def configure_http_server(port):
)

# Mount content files
content_files_handler = cherrypy.tools.staticdir.handler(
section="/", dir=paths.get_content_dir_path()
)

url_path_prefix = conf.OPTIONS["Deployment"]["URL_PATH_PREFIX"]

CONTENT_ROOT = "/" + paths.get_content_url(
conf.OPTIONS["Deployment"]["URL_PATH_PREFIX"]
).lstrip("/")
content_dirs = [paths.get_content_dir_path()] + paths.get_content_fallback_paths()
dispatcher = MultiStaticDispatcher(content_dirs)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the case where the fallback paths are not used, how much overhead does the multistatic dispatcher add compared to how cherrypy normally handles static files?

One way to be conservative about this would be to only use the MultiStaticDispatcher in the case where there are fallback content paths.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's still using the same cherrypy static handlers. The additional code that's executed per file request is:
https://github.com/learningequality/kolibri/pull/6865/files/78ae3149620cc3e98743ca87968e6557529b95c0#diff-601cde52caf17fa9d77c64ca9e73e927R239-R248

In the case of no fallback content paths, this loop will only be executed once, but it does do an extra os.path.exists call in there, which wouldn't be needed in the "single handler" case. Actually, the simplest way to "handle" that might be to just short-circuit the find_handler method if there's only one candidate handler anyway.

cherrypy.tree.mount(
content_files_handler,
"/{}".format(paths.get_content_url(url_path_prefix).lstrip("/")),
config={"/": {"tools.caching.on": False}},
None,
CONTENT_ROOT,
config={"/": {"tools.caching.on": False, "request.dispatch": dispatcher}},
)

# Instantiate a new server object
Expand Down