Skip to content

Commit

Permalink
Merge pull request #6865 from jamalex/content_fallback_paths
Browse files Browse the repository at this point in the history
CONTENT_FALLBACK_DIRS option along with cherrypy/devserver support.
  • Loading branch information
jamalex committed May 26, 2020
2 parents 64d0ea0 + 5d05c61 commit aa4e9d5
Show file tree
Hide file tree
Showing 9 changed files with 334 additions and 72 deletions.
112 changes: 90 additions & 22 deletions kolibri/core/content/management/commands/scanforcontent.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@
from ...utils.channel_import import FutureSchemaError
from ...utils.channel_import import import_channel_from_local_db
from ...utils.channel_import import InvalidSchemaVersionError
from ...utils.channels import get_channel_ids_for_content_database_dir
from ...utils.channels import get_channel_ids_for_content_dirs
from ...utils.channels import read_channel_metadata_from_db_file
from ...utils.paths import get_content_database_file_path
from kolibri.core.content.models import ChannelMetadata
from kolibri.core.content.utils.paths import get_content_database_dir_path
from kolibri.core.content.utils.paths import get_all_content_dir_paths

logger = logging.getLogger(__name__)

Expand All @@ -23,30 +25,96 @@ class Command(BaseCommand):

help = "Scan content and databases in Kolibri folder and updates the database to show if available"

def add_arguments(self, parser):

channel_import_mode_help_text = """
Specify the desired behavior for import of channel metadata databases. Value must be one of:
- newer: only import if database version is higher than what we already have (default)
- missing: only import if we do not yet have the channel at all in the primary database
- none: do not import new channel databases, and only annotate for channels we already have
"""
parser.add_argument(
"--channel-import-mode",
type=str,
default="newer",
choices=["newer", "missing", "none"],
required=False,
dest="channel_import_mode",
help=channel_import_mode_help_text,
)

channels_help_text = """
Constrain the content scan to a particular set of channels. Other channels will not be imported
or annotated. Separate multiple channel IDs with commas.
"""
parser.add_argument(
"--channels",
# Split the comma separated string we get, into a list of strings
type=lambda x: x.split(","),
default=None,
required=False,
dest="channels",
help=channels_help_text,
)

def handle(self, *args, **options):
storage_channel_ids = get_channel_ids_for_content_database_dir(
get_content_database_dir_path()

channel_import_mode = options["channel_import_mode"]
channels_to_include = options["channels"]

storage_channel_ids = get_channel_ids_for_content_dirs(
get_all_content_dir_paths()
)
database_channel_ids = list(
ChannelMetadata.objects.all().values_list("id", flat=True)
)
all_channel_ids = set(storage_channel_ids + database_channel_ids)

# if told not to import any channel databases, constrain to ones we already have
if channel_import_mode == "none":
all_channel_ids = set(database_channel_ids)

# if an explicit set of channels was specified, filter out anything not included in that
if channels_to_include:
all_channel_ids = all_channel_ids.intersection(channels_to_include)

for channel_id in all_channel_ids:
if channel_id not in database_channel_ids:
try:
import_channel_from_local_db(channel_id)
set_content_visibility_from_disk(channel_id)
except (InvalidSchemaVersionError, FutureSchemaError):
logger.warning(
"Tried to import channel {channel_id}, but database file was incompatible".format(
channel_id=channel_id
)
)
except DatabaseError:
logger.warning(
"Tried to import channel {channel_id}, but database file was corrupted.".format(
channel_id=channel_id
)
)
else:
set_content_visibility_from_disk(channel_id)

disk_path = get_content_database_file_path(channel_id)

if channel_id not in storage_channel_ids or channel_import_mode == "none":
import_database = False
elif channel_import_mode == "missing":
import_database = channel_id not in database_channel_ids
if channel_import_mode == "newer":
import_database = self.database_file_is_newer(channel_id, disk_path)
if import_database:
self.import_channel_database(channel_id, disk_path)

self.annotate_channel(channel_id)

def database_file_is_newer(self, channel_id, disk_path):
try:
disk_channel = read_channel_metadata_from_db_file(disk_path)
db_channel = ChannelMetadata.objects.get(id=channel_id)
# the version in the primary database is older than the one on disk
return disk_channel.version > db_channel.version
except DatabaseError:
# problem with the database on disk; it can't be considered newer
return False
except ChannelMetadata.DoesNotExist:
# we don't have the channel in our primary database, so it's newer by default
return True

def import_channel_database(self, channel_id, disk_path):
logger.info("Attempting import of channel database at: {}".format(disk_path))
try:
import_channel_from_local_db(channel_id)
except (InvalidSchemaVersionError, FutureSchemaError):
logger.warning("Database file was incompatible; skipping.")
except DatabaseError:
logger.warning("Database file was corrupted; skipping.")

def annotate_channel(self, channel_id):
logger.info("Annotating availability for channel: {}".format(channel_id))
set_content_visibility_from_disk(channel_id)
8 changes: 3 additions & 5 deletions kolibri/core/content/upgrade.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
from kolibri.core.content.utils.channel_import import FutureSchemaError
from kolibri.core.content.utils.channel_import import import_channel_from_local_db
from kolibri.core.content.utils.channel_import import InvalidSchemaVersionError
from kolibri.core.content.utils.channels import get_channel_ids_for_content_database_dir
from kolibri.core.content.utils.paths import get_content_database_dir_path
from kolibri.core.content.utils.channels import get_channel_ids_for_content_dirs
from kolibri.core.content.utils.paths import get_all_content_dir_paths
from kolibri.core.content.utils.paths import get_content_database_file_path
from kolibri.core.content.utils.sqlalchemybridge import Bridge
from kolibri.core.upgrade import version_upgrade
Expand All @@ -40,9 +40,7 @@ def import_external_content_dbs():
scan through the content database folder for all channel content databases,
and pull the data from each database if we have not already imported it.
"""
channel_ids = get_channel_ids_for_content_database_dir(
get_content_database_dir_path()
)
channel_ids = get_channel_ids_for_content_dirs(get_all_content_dir_paths())
for channel_id in channel_ids:
if not ChannelMetadata.objects.filter(id=channel_id).exists():
try:
Expand Down
10 changes: 10 additions & 0 deletions kolibri/core/content/utils/channels.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,16 @@
logger = logging.getLogger(__name__)


def get_channel_ids_for_content_dirs(content_dirs):
database_dir_paths = [
get_content_database_dir_path(contentfolder=path) for path in content_dirs
]
channel_ids = set()
for path in database_dir_paths:
channel_ids.update(get_channel_ids_for_content_database_dir(path))
return list(channel_ids)


def get_channel_ids_for_content_database_dir(content_database_dir):
"""
Returns a list of channel IDs for the channel databases that exist in a content database directory.
Expand Down
133 changes: 104 additions & 29 deletions kolibri/core/content/utils/paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,16 @@
# TODO: add ".epub" and ".epub3" if epub-equivalent of ZipContentView implemented


def _maybe_makedirs(path):
if not os.path.isdir(path):
try:
os.makedirs(path)
# When importing from USB etc, it does not need to create
# directories under external drives that are not writable.
except OSError:
pass


def get_attribute(obj, key):
"""
Get an attribute from an object, regardless of whether it is a dict or an object
Expand Down Expand Up @@ -49,69 +59,134 @@ def get_local_content_storage_file_url(obj):
# DISK PATHS


def get_content_dir_path(datafolder=None):
return (
os.path.join(datafolder, "content")
if datafolder
else conf.OPTIONS["Paths"]["CONTENT_DIR"]
)
def get_content_dir_path(datafolder=None, contentfolder=None):
if contentfolder:
return contentfolder
elif datafolder:
return os.path.join(datafolder, "content")
else:
return conf.OPTIONS["Paths"]["CONTENT_DIR"]


def get_content_database_dir_path(datafolder=None):
def get_content_fallback_paths():
paths = []
fallback_dirs = conf.OPTIONS["Paths"]["CONTENT_FALLBACK_DIRS"]
for path in fallback_dirs:
path = path.strip()
if not path:
continue
paths.append(path)
return paths


def get_all_content_dir_paths():
return [get_content_dir_path()] + get_content_fallback_paths()


def existing_file_path_in_content_fallback_dirs(subpath):
# see whether the file exists in any of our content fallback directories
for prefix in get_content_fallback_paths():
path = os.path.join(prefix, subpath)
if os.path.exists(path):
return path
# if not, return None
return None


def get_content_database_dir_path(datafolder=None, contentfolder=None):
"""
Returns the path to the content sqlite databases
($HOME/.kolibri/content/databases on POSIX systems, by default)
"""
path = os.path.join(get_content_dir_path(datafolder), "databases")
if not os.path.isdir(path):
try:
os.makedirs(path)
# When importing from USB, it does not need to create a database
# directory under the external drives that are not writable.
except OSError:
pass
path = os.path.join(
get_content_dir_path(datafolder=datafolder, contentfolder=contentfolder),
"databases",
)
_maybe_makedirs(path)
return path


def get_content_database_file_path(channel_id, datafolder=None):
def get_content_database_file_path(channel_id, datafolder=None, contentfolder=None):
"""
Given a channel_id, returns the path to the sqlite3 file
($HOME/.kolibri/content/databases/<channel_id>.sqlite3 on POSIX systems, by default)
"""
return os.path.join(
get_content_database_dir_path(datafolder), "{}.sqlite3".format(channel_id)
suffix = "{}.sqlite3".format(channel_id)
primary_path = os.path.join(
get_content_database_dir_path(
datafolder=datafolder, contentfolder=contentfolder
),
suffix,
)
# if the primary path already exists, or the datafolder/contentfolder is overridden, use the primary path
if (
os.path.exists(primary_path)
or datafolder is not None
or contentfolder is not None
):
return primary_path
backup_path = existing_file_path_in_content_fallback_dirs(
os.path.join("databases", suffix)
)
# return backup path if one exists; otherwise, return primary path (even though it doesn't exist yet)
return backup_path or primary_path


def get_upgrade_content_database_file_path(channel_id, datafolder=None):
def get_upgrade_content_database_file_path(
channel_id, datafolder=None, contentfolder=None
):
return os.path.join(
get_content_database_dir_path(datafolder),
get_content_database_dir_path(
datafolder=datafolder, contentfolder=contentfolder
),
"{}-upgrade.sqlite3".format(channel_id),
)


def get_annotated_content_database_file_path(channel_id, datafolder=None):
def get_annotated_content_database_file_path(
channel_id, datafolder=None, contentfolder=None
):
return os.path.join(
get_content_database_dir_path(datafolder),
get_content_database_dir_path(
datafolder=datafolder, contentfolder=contentfolder
),
"{}-annotated.sqlite3".format(channel_id),
)


def get_content_storage_dir_path(datafolder=None):
path = os.path.join(get_content_dir_path(datafolder), "storage")
if not os.path.isdir(path):
os.makedirs(path)
def get_content_storage_dir_path(datafolder=None, contentfolder=None):
path = os.path.join(
get_content_dir_path(datafolder=datafolder, contentfolder=contentfolder),
"storage",
)
_maybe_makedirs(path)
return path


def get_content_storage_file_path(filename, datafolder=None):
def get_content_storage_file_path(filename, datafolder=None, contentfolder=None):
if not VALID_STORAGE_FILENAME.match(filename):
raise InvalidStorageFilenameError(
"'{}' is not a valid content storage filename".format(filename)
)
return os.path.join(
get_content_storage_dir_path(datafolder), filename[0], filename[1], filename
suffix = os.path.join(filename[0], filename[1], filename)
primary_path = os.path.join(
get_content_storage_dir_path(
datafolder=datafolder, contentfolder=contentfolder
),
suffix,
)
# if the primary path already exists, or the datapath is overridden, use the primary path
if (
os.path.exists(primary_path)
or datafolder is not None
or contentfolder is not None
):
return primary_path
backup_path = existing_file_path_in_content_fallback_dirs(
os.path.join("storage", suffix)
)
# return backup path if one exists; otherwise, return the primary path (even though it doesn't exist yet)
return backup_path or primary_path


# URL PATHS
Expand Down
6 changes: 4 additions & 2 deletions kolibri/core/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,11 @@
from django.conf.urls.static import static

from .views import GuestRedirectView
from .views import StatusCheckView
from .views import logout_view
from .views import RootURLRedirectView
from .views import set_language
from .views import static_serve_with_fallbacks
from .views import StatusCheckView
from .views import UnsupportedBrowserView
from kolibri.core.content.utils import paths
from kolibri.core.device.translation import i18n_patterns
Expand Down Expand Up @@ -75,6 +76,7 @@

urlpatterns += static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT)

content_dirs = [paths.get_content_dir_path()] + paths.get_content_fallback_paths()
urlpatterns += static(
paths.get_content_url("/"), document_root=paths.get_content_dir_path()
paths.get_content_url("/"), view=static_serve_with_fallbacks(content_dirs)
)

0 comments on commit aa4e9d5

Please sign in to comment.