diff --git a/api/rbdigital.py b/api/rbdigital.py index 496d60c38..ee9214a64 100644 --- a/api/rbdigital.py +++ b/api/rbdigital.py @@ -1608,9 +1608,22 @@ def isbn_info_to_metadata(cls, book, include_bibliographic=True, include_formats Identifier.RB_DIGITAL_ID, rbdigital_id ) + # medium is both bibliographic and format information. + + # options are: "eBook", "eAudio" + rbdigital_medium = book.get('mediaType', None) + if rbdigital_medium and rbdigital_medium not in cls.rbdigital_medium_to_simplified_medium: + cls.log.error( + "Could not process medium %s for %s", rbdigital_medium, rbdigital_id) + + medium = cls.rbdigital_medium_to_simplified_medium.get( + rbdigital_medium, Edition.BOOK_MEDIUM + ) + metadata = Metadata( data_source=DataSource.RB_DIGITAL, primary_identifier=primary_identifier, + medium=medium, ) if include_bibliographic: @@ -1706,16 +1719,6 @@ def isbn_info_to_metadata(cls, book, include_bibliographic=True, include_formats ) subjects.append(subject) - # options are: "eBook", "eAudio" - rbdigital_medium = book.get('mediaType', None) - if rbdigital_medium and rbdigital_medium not in cls.rbdigital_medium_to_simplified_medium: - cls.log.error( - "Could not process medium %s for %s", rbdigital_medium, rbdigital_id) - - medium = cls.rbdigital_medium_to_simplified_medium.get( - rbdigital_medium, Edition.BOOK_MEDIUM - ) - # passed to metadata.apply, the isbn_identifier will create an equivalency # between the RBDigital-labeled and the ISBN-labeled identifier rows, which # will in turn allow us to ask the MetadataWrangler for more info about the book. @@ -1760,7 +1763,6 @@ def isbn_info_to_metadata(cls, book, include_bibliographic=True, include_formats metadata.title = title metadata.language = language - metadata.medium = medium metadata.series = series_name metadata.series_position = series_position metadata.publisher = publisher @@ -1781,7 +1783,7 @@ def isbn_info_to_metadata(cls, book, include_bibliographic=True, include_formats drm_scheme = DeliveryMechanism.NO_DRM formats.append(FormatData(content_type, drm_scheme)) else: - cls.log.warn("Unfamiliar format: %s", format_id) + cls.log.warn("Unfamiliar format: %s", metadata.medium) # Make a CirculationData so we can write the formats, circulationdata = CirculationData( diff --git a/core b/core index 328e9e97c..ed64447a2 160000 --- a/core +++ b/core @@ -1 +1 @@ -Subproject commit 328e9e97c45289a494397196ca99eff867ea2ec3 +Subproject commit ed64447a2133ccc6e19d1a33d57be682b02b8bcd diff --git a/migration/20181016-recalculate-presentation-for-audiobooks-believed-to-be-books.py b/migration/20181016-recalculate-presentation-for-audiobooks-believed-to-be-books.py new file mode 100755 index 000000000..69f5c65d3 --- /dev/null +++ b/migration/20181016-recalculate-presentation-for-audiobooks-believed-to-be-books.py @@ -0,0 +1,51 @@ +import os +import sys +from nose.tools import set_trace +from sqlalchemy.sql import select +from sqlalchemy.sql.expression import ( + join, + and_, +) +bin_dir = os.path.split(__file__)[0] +package_dir = os.path.join(bin_dir, "..") +sys.path.append(os.path.abspath(package_dir)) +from core.model import ( + dump_query, + production_session, + LicensePool, + DataSource, + Edition, + PresentationCalculationPolicy, +) + +# Find all books where the edition associated with the LicensePool has a +# different medium from the presentation edition. +_db = production_session() + +# Find all the LicensePools that aren't books. +subq = select([LicensePool.id]).select_from( + join(LicensePool, Edition, + and_(LicensePool.data_source_id==Edition.data_source_id, + LicensePool.identifier_id==Edition.primary_identifier_id) + ) +).where(Edition.medium != Edition.BOOK_MEDIUM) + +# Of those LicensePools, find every LicensePool whose presentation +# edition says it _is_ a book. +qu = _db.query(LicensePool).join( + Edition, LicensePool.presentation_edition_id==Edition.id +).filter(LicensePool.id.in_(subq)).filter(Edition.medium == Edition.BOOK_MEDIUM) + +print "Recalculating presentation edition for %d LicensePools." % qu.count() + +for lp in qu: + # Recalculate that LicensePool's presentation edition, and then its + # work presentation. + lp.set_presentation_edition() + policy = PresentationCalculationPolicy( + regenerate_opds_entries=True, update_search_index=True + ) + work, is_new = lp.calculate_work() + work.calculate_presentation(policy) + print "New medium: %s" % lp.presentation_edition.medium + _db.commit() diff --git a/requirements.txt b/requirements.txt index 066de535a..35ed7c5f4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,6 +5,7 @@ elasticsearch-dsl<2.0.0 pillow psycopg2 requests==2.18.4 +urllib3<1.24 # Travis problem introduced 20181016 - check to see when we can remove sqlalchemy==1.2.0 flask-sqlalchemy-session lxml