Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CONTENT_FALLBACK_DIRS option along with cherrypy/devserver support. #6865

Merged
merged 11 commits into from
May 26, 2020
Merged
112 changes: 90 additions & 22 deletions kolibri/core/content/management/commands/scanforcontent.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@
from ...utils.channel_import import FutureSchemaError
from ...utils.channel_import import import_channel_from_local_db
from ...utils.channel_import import InvalidSchemaVersionError
from ...utils.channels import get_channel_ids_for_content_database_dir
from ...utils.channels import get_channel_ids_for_content_dirs
from ...utils.channels import read_channel_metadata_from_db_file
from ...utils.paths import get_content_database_file_path
from kolibri.core.content.models import ChannelMetadata
from kolibri.core.content.utils.paths import get_content_database_dir_path
from kolibri.core.content.utils.paths import get_all_content_dir_paths

logger = logging.getLogger(__name__)

Expand All @@ -23,30 +25,96 @@ class Command(BaseCommand):

help = "Scan content and databases in Kolibri folder and updates the database to show if available"

def add_arguments(self, parser):

channel_import_mode_help_text = """
Specify the desired behavior for import of channel metadata databases. Value must be one of:
- newer: only import if database version is higher than what we already have (default)
- missing: only import if we do not yet have the channel at all in the primary database
- none: do not import new channel databases, and only annotate for channels we already have
"""
parser.add_argument(
"--channel-import-mode",
type=str,
default="newer",
choices=["newer", "missing", "none"],
required=False,
dest="channel_import_mode",
help=channel_import_mode_help_text,
)

channels_help_text = """
Constrain the content scan to a particular set of channels. Other channels will not be imported
or annotated. Separate multiple channel IDs with commas.
"""
parser.add_argument(
"--channels",
# Split the comma separated string we get, into a list of strings
type=lambda x: x.split(","),
default=None,
required=False,
dest="channels",
help=channels_help_text,
)

def handle(self, *args, **options):
storage_channel_ids = get_channel_ids_for_content_database_dir(
get_content_database_dir_path()

channel_import_mode = options["channel_import_mode"]
channels_to_include = options["channels"]

storage_channel_ids = get_channel_ids_for_content_dirs(
get_all_content_dir_paths()
)
database_channel_ids = list(
ChannelMetadata.objects.all().values_list("id", flat=True)
)
all_channel_ids = set(storage_channel_ids + database_channel_ids)

# if told not to import any channel databases, constrain to ones we already have
if channel_import_mode == "none":
all_channel_ids = set(database_channel_ids)

# if an explicit set of channels was specified, filter out anything not included in that
if channels_to_include:
all_channel_ids = all_channel_ids.intersection(channels_to_include)

for channel_id in all_channel_ids:
if channel_id not in database_channel_ids:
try:
import_channel_from_local_db(channel_id)
set_content_visibility_from_disk(channel_id)
except (InvalidSchemaVersionError, FutureSchemaError):
logger.warning(
"Tried to import channel {channel_id}, but database file was incompatible".format(
channel_id=channel_id
)
)
except DatabaseError:
logger.warning(
"Tried to import channel {channel_id}, but database file was corrupted.".format(
channel_id=channel_id
)
)
else:
set_content_visibility_from_disk(channel_id)

disk_path = get_content_database_file_path(channel_id)

if channel_id not in storage_channel_ids or channel_import_mode == "none":
import_database = False
elif channel_import_mode == "missing":
import_database = channel_id not in database_channel_ids
if channel_import_mode == "newer":
import_database = self.database_file_is_newer(channel_id, disk_path)
if import_database:
self.import_channel_database(channel_id, disk_path)

self.annotate_channel(channel_id)

def database_file_is_newer(self, channel_id, disk_path):
try:
disk_channel = read_channel_metadata_from_db_file(disk_path)
db_channel = ChannelMetadata.objects.get(id=channel_id)
# the version in the primary database is older than the one on disk
return disk_channel.version > db_channel.version
except DatabaseError:
# problem with the database on disk; it can't be considered newer
return False
except ChannelMetadata.DoesNotExist:
# we don't have the channel in our primary database, so it's newer by default
return True

def import_channel_database(self, channel_id, disk_path):
logger.info("Attempting import of channel database at: {}".format(disk_path))
try:
import_channel_from_local_db(channel_id)
except (InvalidSchemaVersionError, FutureSchemaError):
logger.warning("Database file was incompatible; skipping.")
except DatabaseError:
logger.warning("Database file was corrupted; skipping.")

def annotate_channel(self, channel_id):
logger.info("Annotating availability for channel: {}".format(channel_id))
set_content_visibility_from_disk(channel_id)
8 changes: 3 additions & 5 deletions kolibri/core/content/upgrade.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
from kolibri.core.content.utils.channel_import import FutureSchemaError
from kolibri.core.content.utils.channel_import import import_channel_from_local_db
from kolibri.core.content.utils.channel_import import InvalidSchemaVersionError
from kolibri.core.content.utils.channels import get_channel_ids_for_content_database_dir
from kolibri.core.content.utils.paths import get_content_database_dir_path
from kolibri.core.content.utils.channels import get_channel_ids_for_content_dirs
from kolibri.core.content.utils.paths import get_all_content_dir_paths
from kolibri.core.content.utils.paths import get_content_database_file_path
from kolibri.core.content.utils.sqlalchemybridge import Bridge
from kolibri.core.upgrade import version_upgrade
Expand All @@ -40,9 +40,7 @@ def import_external_content_dbs():
scan through the content database folder for all channel content databases,
and pull the data from each database if we have not already imported it.
"""
channel_ids = get_channel_ids_for_content_database_dir(
get_content_database_dir_path()
)
channel_ids = get_channel_ids_for_content_dirs(get_all_content_dir_paths())
for channel_id in channel_ids:
if not ChannelMetadata.objects.filter(id=channel_id).exists():
try:
Expand Down
10 changes: 10 additions & 0 deletions kolibri/core/content/utils/channels.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,16 @@
logger = logging.getLogger(__name__)


def get_channel_ids_for_content_dirs(content_dirs):
database_dir_paths = [
get_content_database_dir_path(contentfolder=path) for path in content_dirs
]
channel_ids = set()
for path in database_dir_paths:
channel_ids.update(get_channel_ids_for_content_database_dir(path))
return list(channel_ids)


def get_channel_ids_for_content_database_dir(content_database_dir):
"""
Returns a list of channel IDs for the channel databases that exist in a content database directory.
Expand Down
133 changes: 104 additions & 29 deletions kolibri/core/content/utils/paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,16 @@
# TODO: add ".epub" and ".epub3" if epub-equivalent of ZipContentView implemented


def _maybe_makedirs(path):
if not os.path.isdir(path):
try:
os.makedirs(path)
# When importing from USB etc, it does not need to create
# directories under external drives that are not writable.
except OSError:
pass


def get_attribute(obj, key):
"""
Get an attribute from an object, regardless of whether it is a dict or an object
Expand Down Expand Up @@ -49,69 +59,134 @@ def get_local_content_storage_file_url(obj):
# DISK PATHS


def get_content_dir_path(datafolder=None):
return (
os.path.join(datafolder, "content")
if datafolder
else conf.OPTIONS["Paths"]["CONTENT_DIR"]
)
def get_content_dir_path(datafolder=None, contentfolder=None):
if contentfolder:
return contentfolder
elif datafolder:
return os.path.join(datafolder, "content")
else:
return conf.OPTIONS["Paths"]["CONTENT_DIR"]


def get_content_database_dir_path(datafolder=None):
def get_content_fallback_paths():
paths = []
fallback_dirs = conf.OPTIONS["Paths"]["CONTENT_FALLBACK_DIRS"]
for path in fallback_dirs:
path = path.strip()
if not path:
continue
paths.append(path)
return paths


def get_all_content_dir_paths():
return [get_content_dir_path()] + get_content_fallback_paths()


def existing_file_path_in_content_fallback_dirs(subpath):
# see whether the file exists in any of our content fallback directories
for prefix in get_content_fallback_paths():
path = os.path.join(prefix, subpath)
if os.path.exists(path):
return path
# if not, return None
return None


def get_content_database_dir_path(datafolder=None, contentfolder=None):
"""
Returns the path to the content sqlite databases
($HOME/.kolibri/content/databases on POSIX systems, by default)
"""
path = os.path.join(get_content_dir_path(datafolder), "databases")
if not os.path.isdir(path):
try:
os.makedirs(path)
# When importing from USB, it does not need to create a database
# directory under the external drives that are not writable.
except OSError:
pass
path = os.path.join(
get_content_dir_path(datafolder=datafolder, contentfolder=contentfolder),
"databases",
)
_maybe_makedirs(path)
return path


def get_content_database_file_path(channel_id, datafolder=None):
def get_content_database_file_path(channel_id, datafolder=None, contentfolder=None):
"""
Given a channel_id, returns the path to the sqlite3 file
($HOME/.kolibri/content/databases/<channel_id>.sqlite3 on POSIX systems, by default)
"""
return os.path.join(
get_content_database_dir_path(datafolder), "{}.sqlite3".format(channel_id)
suffix = "{}.sqlite3".format(channel_id)
primary_path = os.path.join(
get_content_database_dir_path(
datafolder=datafolder, contentfolder=contentfolder
),
suffix,
)
# if the primary path already exists, or the datafolder/contentfolder is overridden, use the primary path
if (
os.path.exists(primary_path)
or datafolder is not None
or contentfolder is not None
):
return primary_path
backup_path = existing_file_path_in_content_fallback_dirs(
os.path.join("databases", suffix)
)
# return backup path if one exists; otherwise, return primary path (even though it doesn't exist yet)
return backup_path or primary_path


def get_upgrade_content_database_file_path(channel_id, datafolder=None):
def get_upgrade_content_database_file_path(
channel_id, datafolder=None, contentfolder=None
):
return os.path.join(
get_content_database_dir_path(datafolder),
get_content_database_dir_path(
datafolder=datafolder, contentfolder=contentfolder
),
"{}-upgrade.sqlite3".format(channel_id),
)


def get_annotated_content_database_file_path(channel_id, datafolder=None):
def get_annotated_content_database_file_path(
channel_id, datafolder=None, contentfolder=None
):
return os.path.join(
get_content_database_dir_path(datafolder),
get_content_database_dir_path(
datafolder=datafolder, contentfolder=contentfolder
),
"{}-annotated.sqlite3".format(channel_id),
)


def get_content_storage_dir_path(datafolder=None):
path = os.path.join(get_content_dir_path(datafolder), "storage")
if not os.path.isdir(path):
os.makedirs(path)
def get_content_storage_dir_path(datafolder=None, contentfolder=None):
path = os.path.join(
get_content_dir_path(datafolder=datafolder, contentfolder=contentfolder),
"storage",
)
_maybe_makedirs(path)
return path


def get_content_storage_file_path(filename, datafolder=None):
def get_content_storage_file_path(filename, datafolder=None, contentfolder=None):
if not VALID_STORAGE_FILENAME.match(filename):
raise InvalidStorageFilenameError(
"'{}' is not a valid content storage filename".format(filename)
)
return os.path.join(
get_content_storage_dir_path(datafolder), filename[0], filename[1], filename
suffix = os.path.join(filename[0], filename[1], filename)
primary_path = os.path.join(
get_content_storage_dir_path(
datafolder=datafolder, contentfolder=contentfolder
),
suffix,
)
# if the primary path already exists, or the datapath is overridden, use the primary path
if (
os.path.exists(primary_path)
or datafolder is not None
or contentfolder is not None
):
return primary_path
backup_path = existing_file_path_in_content_fallback_dirs(
os.path.join("storage", suffix)
)
# return backup path if one exists; otherwise, return the primary path (even though it doesn't exist yet)
return backup_path or primary_path


# URL PATHS
Expand Down
6 changes: 4 additions & 2 deletions kolibri/core/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,11 @@
from django.conf.urls.static import static

from .views import GuestRedirectView
from .views import StatusCheckView
from .views import logout_view
from .views import RootURLRedirectView
from .views import set_language
from .views import static_serve_with_fallbacks
from .views import StatusCheckView
from .views import UnsupportedBrowserView
from kolibri.core.content.utils import paths
from kolibri.core.device.translation import i18n_patterns
Expand Down Expand Up @@ -75,6 +76,7 @@

urlpatterns += static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT)

content_dirs = [paths.get_content_dir_path()] + paths.get_content_fallback_paths()
urlpatterns += static(
paths.get_content_url("/"), document_root=paths.get_content_dir_path()
paths.get_content_url("/"), view=static_serve_with_fallbacks(content_dirs)
)