From 29b8b97bc5b65c2f9b4c536a73a049e04cad56a5 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Tue, 16 Oct 2018 14:23:42 -0400 Subject: [PATCH 1/5] Moved migration script in from core. --- core | 2 +- ...ion-for-audiobooks-believed-to-be-books.py | 49 +++++++++++++++++++ 2 files changed, 50 insertions(+), 1 deletion(-) create mode 100755 migration/20181016-recalculate-presentation-for-audiobooks-believed-to-be-books.py diff --git a/core b/core index 328e9e97c..ed64447a2 160000 --- a/core +++ b/core @@ -1 +1 @@ -Subproject commit 328e9e97c45289a494397196ca99eff867ea2ec3 +Subproject commit ed64447a2133ccc6e19d1a33d57be682b02b8bcd diff --git a/migration/20181016-recalculate-presentation-for-audiobooks-believed-to-be-books.py b/migration/20181016-recalculate-presentation-for-audiobooks-believed-to-be-books.py new file mode 100755 index 000000000..25ac40462 --- /dev/null +++ b/migration/20181016-recalculate-presentation-for-audiobooks-believed-to-be-books.py @@ -0,0 +1,49 @@ +import os +import sys +from nose.tools import set_trace +from sqlalchemy.sql import select +from sqlalchemy.sql.expression import ( + join, + and_, +) +bin_dir = os.path.split(__file__)[0] +package_dir = os.path.join(bin_dir, "..") +sys.path.append(os.path.abspath(package_dir)) +from core.model import ( + dump_query, + production_session, + LicensePool, + DataSource, + Edition, + PresentationCalculationPolicy, +) + +# Find all books where the edition associated with the LicensePool has a +# different medium from the presentation edition. +_db = production_session() + +# Find all the LicensePools that aren't books. +subq = select([LicensePool.id]).select_from( + join(LicensePool, Edition, + and_(LicensePool.data_source_id==Edition.data_source_id, + LicensePool.identifier_id==Edition.primary_identifier_id) + ) +).where(Edition.medium != Edition.BOOK_MEDIUM) + +# Of those LicensePools, find every LicensePool whose presentation +# edition says it _is_ a book. +qu = _db.query(LicensePool).join( + Edition, LicensePool.presentation_edition_id==Edition.id +).filter(LicensePool.id.in_(subq)).filter(Edition.medium == Edition.BOOK_MEDIUM) + +print "Recalculating presentation edition for %d LicensePools." % qu.count() + +for lp in qu: + # Recalculate that LicensePool's presentation edition, and then its + # work presentation. + lp.set_presentation_edition() + policy = PresentationCalculationPolicy(regenerate_opds_entries=True) + work, is_new = lp.calculate_work() + work.calculate_presentation(policy) + print "New medium: %s" % lp.presentation_edition.medium + _db.commit() From ee8016118d0889a12dac8a8e9e292f3d8cdeda1a Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Tue, 16 Oct 2018 15:06:24 -0400 Subject: [PATCH 2/5] Exclude a just-released version of urllib3 which causes problems. --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 066de535a..e31018284 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,6 +5,7 @@ elasticsearch-dsl<2.0.0 pillow psycopg2 requests==2.18.4 +urllib3!=1.24 sqlalchemy==1.2.0 flask-sqlalchemy-session lxml From 1acb3eca2136d23306b30016cd74a77c4cbc8373 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Tue, 16 Oct 2018 15:11:49 -0400 Subject: [PATCH 3/5] Change urllib3 pin and add note to check when we can remove it. --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index e31018284..35ed7c5f4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,7 @@ elasticsearch-dsl<2.0.0 pillow psycopg2 requests==2.18.4 -urllib3!=1.24 +urllib3<1.24 # Travis problem introduced 20181016 - check to see when we can remove sqlalchemy==1.2.0 flask-sqlalchemy-session lxml From 3725fc13d990ea96a81b82cb5d1886441851c42b Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Tue, 16 Oct 2018 15:21:08 -0400 Subject: [PATCH 4/5] Update the search index when migrating. --- ...culate-presentation-for-audiobooks-believed-to-be-books.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/migration/20181016-recalculate-presentation-for-audiobooks-believed-to-be-books.py b/migration/20181016-recalculate-presentation-for-audiobooks-believed-to-be-books.py index 25ac40462..69f5c65d3 100755 --- a/migration/20181016-recalculate-presentation-for-audiobooks-believed-to-be-books.py +++ b/migration/20181016-recalculate-presentation-for-audiobooks-believed-to-be-books.py @@ -42,7 +42,9 @@ # Recalculate that LicensePool's presentation edition, and then its # work presentation. lp.set_presentation_edition() - policy = PresentationCalculationPolicy(regenerate_opds_entries=True) + policy = PresentationCalculationPolicy( + regenerate_opds_entries=True, update_search_index=True + ) work, is_new = lp.calculate_work() work.calculate_presentation(policy) print "New medium: %s" % lp.presentation_edition.medium From 2edfbf8fcf47b4fe39e2153d03c5d685de0a15c0 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Tue, 16 Oct 2018 15:41:52 -0400 Subject: [PATCH 5/5] Make sure that a Metdata object created for RBdigital always has a .medium. --- api/rbdigital.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/api/rbdigital.py b/api/rbdigital.py index 496d60c38..ee9214a64 100644 --- a/api/rbdigital.py +++ b/api/rbdigital.py @@ -1608,9 +1608,22 @@ def isbn_info_to_metadata(cls, book, include_bibliographic=True, include_formats Identifier.RB_DIGITAL_ID, rbdigital_id ) + # medium is both bibliographic and format information. + + # options are: "eBook", "eAudio" + rbdigital_medium = book.get('mediaType', None) + if rbdigital_medium and rbdigital_medium not in cls.rbdigital_medium_to_simplified_medium: + cls.log.error( + "Could not process medium %s for %s", rbdigital_medium, rbdigital_id) + + medium = cls.rbdigital_medium_to_simplified_medium.get( + rbdigital_medium, Edition.BOOK_MEDIUM + ) + metadata = Metadata( data_source=DataSource.RB_DIGITAL, primary_identifier=primary_identifier, + medium=medium, ) if include_bibliographic: @@ -1706,16 +1719,6 @@ def isbn_info_to_metadata(cls, book, include_bibliographic=True, include_formats ) subjects.append(subject) - # options are: "eBook", "eAudio" - rbdigital_medium = book.get('mediaType', None) - if rbdigital_medium and rbdigital_medium not in cls.rbdigital_medium_to_simplified_medium: - cls.log.error( - "Could not process medium %s for %s", rbdigital_medium, rbdigital_id) - - medium = cls.rbdigital_medium_to_simplified_medium.get( - rbdigital_medium, Edition.BOOK_MEDIUM - ) - # passed to metadata.apply, the isbn_identifier will create an equivalency # between the RBDigital-labeled and the ISBN-labeled identifier rows, which # will in turn allow us to ask the MetadataWrangler for more info about the book. @@ -1760,7 +1763,6 @@ def isbn_info_to_metadata(cls, book, include_bibliographic=True, include_formats metadata.title = title metadata.language = language - metadata.medium = medium metadata.series = series_name metadata.series_position = series_position metadata.publisher = publisher @@ -1781,7 +1783,7 @@ def isbn_info_to_metadata(cls, book, include_bibliographic=True, include_formats drm_scheme = DeliveryMechanism.NO_DRM formats.append(FormatData(content_type, drm_scheme)) else: - cls.log.warn("Unfamiliar format: %s", format_id) + cls.log.warn("Unfamiliar format: %s", metadata.medium) # Make a CirculationData so we can write the formats, circulationdata = CirculationData(