From 3f5f6225fe82e0c2e0241e4f66bfb861cdf4fe9d Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Tue, 13 Dec 2022 16:08:08 +0100 Subject: [PATCH 1/3] GPKG: fix issue with StartTransaction() causing features to be omitted when creating background RTree (fixes https://github.com/qgis/QGIS/issues/51188) --- ogr/ogrsf_frmts/gpkg/ogrgeopackagetablelayer.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ogr/ogrsf_frmts/gpkg/ogrgeopackagetablelayer.cpp b/ogr/ogrsf_frmts/gpkg/ogrgeopackagetablelayer.cpp index bfcb9281071c..ef9697c0df7c 100644 --- a/ogr/ogrsf_frmts/gpkg/ogrgeopackagetablelayer.cpp +++ b/ogr/ogrsf_frmts/gpkg/ogrgeopackagetablelayer.cpp @@ -2868,6 +2868,9 @@ OGRErr OGRGeoPackageTableLayer::DeleteFeature(GIntBig nFID) bool OGRGeoPackageTableLayer::DoJobAtTransactionCommit() { + if( m_bAllowedRTreeThread ) + return true; + bool ret = RunDeferredCreationIfNecessary() == OGRERR_NONE && RunDeferredSpatialIndexUpdate(); m_nCountInsertInTransaction = 0; From 8be83d7b544d9096cae8f106800dff103fbaa7b3 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Tue, 13 Dec 2022 16:08:41 +0100 Subject: [PATCH 2/3] GPKG: add debug traces and testing for issue of https://github.com/qgis/QGIS/issues/51188 --- autotest/ogr/ogr_gpkg.py | 5 +++++ .../gpkg/ogrgeopackagetablelayer.cpp | 18 +++++++++++++++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/autotest/ogr/ogr_gpkg.py b/autotest/ogr/ogr_gpkg.py index c1b68f26390c..039e450b4d03 100755 --- a/autotest/ogr/ogr_gpkg.py +++ b/autotest/ogr/ogr_gpkg.py @@ -7853,10 +7853,15 @@ def test_ogr_gpkg_background_rtree_build(filename): ds = gdaltest.gpkg_dr.CreateDataSource(filename) with gdaltest.config_option("OGR_GPKG_THREADED_RTREE_AT_FIRST_FEATURE", "YES"): lyr = ds.CreateLayer("foo") + assert lyr.StartTransaction() == ogr.OGRERR_NONE for i in range(1000): f = ogr.Feature(lyr.GetLayerDefn()) f.SetGeometryDirectly(ogr.CreateGeometryFromWkt("POINT(%d %d)" % (i, i))) assert lyr.CreateFeature(f) == ogr.OGRERR_NONE + if i == 500: + assert lyr.CommitTransaction() == ogr.OGRERR_NONE + assert lyr.StartTransaction() == ogr.OGRERR_NONE + assert lyr.CommitTransaction() == ogr.OGRERR_NONE ds = None assert gdal.VSIStatL(filename + ".tmp_rtree_foo.db") is None diff --git a/ogr/ogrsf_frmts/gpkg/ogrgeopackagetablelayer.cpp b/ogr/ogrsf_frmts/gpkg/ogrgeopackagetablelayer.cpp index ef9697c0df7c..8c708a8b9f02 100644 --- a/ogr/ogrsf_frmts/gpkg/ogrgeopackagetablelayer.cpp +++ b/ogr/ogrsf_frmts/gpkg/ogrgeopackagetablelayer.cpp @@ -2205,6 +2205,10 @@ OGRErr OGRGeoPackageTableLayer::CreateOrUpsertFeature( OGRFeature *poFeature, bo else if( !bUpsert && m_bAllowedRTreeThread && !m_bErrorDuringRTreeThread ) { GPKGRTreeEntry sEntry; +#ifdef DEBUG_VERBOSE + if( m_aoRTreeEntries.empty() ) + CPLDebug("GPKG", "Starting to fill m_aoRTreeEntries at FID " CPL_FRMT_GIB, nFID); +#endif sEntry.nId = nFID; sEntry.fMinX = rtreeValueDown(oEnv.MinX); sEntry.fMaxX = rtreeValueUp(oEnv.MaxX); @@ -2258,7 +2262,7 @@ void OGRGeoPackageTableLayer::SetDeferredSpatialIndexCreation( bool bFlag ) // For unit tests if( CPLTestBool(CPLGetConfigOption("OGR_GPKG_THREADED_RTREE_AT_FIRST_FEATURE", "NO")) ) { - m_nRTreeBatchSize = 1; + m_nRTreeBatchSize = 10; m_nRTreeBatchesBeforeStart = 1; } } @@ -2395,11 +2399,21 @@ void OGRGeoPackageTableLayer::AsyncRTreeThreadFunction() } SQLCommand(m_hAsyncDBHandle, "BEGIN"); + GIntBig nCount = 0; while( true ) { const auto aoEntries = m_oQueueRTreeEntries.get_and_pop_front(); if( aoEntries.empty() ) break; +#ifdef DEBUG_VERBOSE + CPLDebug("GPKG", "AsyncRTreeThreadFunction(): " + "Processing batch of %d features, " + "starting at FID " CPL_FRMT_GIB " and ending " + "at FID " CPL_FRMT_GIB, + static_cast(aoEntries.size()), + aoEntries.front().nId, + aoEntries.back().nId); +#endif for( const auto& entry: aoEntries ) { if( (entry.nId % 500000) == 0 ) @@ -2414,6 +2428,7 @@ void OGRGeoPackageTableLayer::AsyncRTreeThreadFunction() } sqlite3_reset(hStmt); + nCount ++; sqlite3_bind_int64(hStmt,1,entry.nId); sqlite3_bind_double(hStmt,2,entry.fMinX); sqlite3_bind_double(hStmt,3,entry.fMaxX); @@ -2440,6 +2455,7 @@ void OGRGeoPackageTableLayer::AsyncRTreeThreadFunction() } sqlite3_finalize(hStmt); + CPLDebug("GPKG", "AsyncRTreeThreadFunction(): " CPL_FRMT_GIB " rows inserted into RTree", nCount); if( m_bErrorDuringRTreeThread ) { From ede707acb11b7210473a30d1a027e15ba4b6e80f Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Tue, 13 Dec 2022 18:49:48 +0100 Subject: [PATCH 3/3] GPKG: Add heuristics to try to detect corrupted RTree generated by GDAL 3.6.0, and disable use of the rtree --- autotest/ogr/ogr_gpkg.py | 39 +++++++++++++++++++ .../gpkg/ogrgeopackagetablelayer.cpp | 38 ++++++++++++++++++ 2 files changed, 77 insertions(+) diff --git a/autotest/ogr/ogr_gpkg.py b/autotest/ogr/ogr_gpkg.py index 039e450b4d03..1232b598d306 100755 --- a/autotest/ogr/ogr_gpkg.py +++ b/autotest/ogr/ogr_gpkg.py @@ -7940,6 +7940,45 @@ def test_ogr_gpkg_background_rtree_build(filename): gdal.Unlink(filename) +############################################################################### + + +def test_ogr_gpkg_detect_broken_rtree_gdal_3_6_0(): + + filename = "/vsimem/test_ogr_gpkg_detect_broken_rtree_gdal_3_6_0.gpkg" + + ds = gdaltest.gpkg_dr.CreateDataSource(filename) + lyr = ds.CreateLayer("foo") + for i in range(100): + f = ogr.Feature(lyr.GetLayerDefn()) + f.SetGeometryDirectly( + ogr.CreateGeometryFromWkt("POINT(%d %d)" % (i % 10, i // 10)) + ) + lyr.CreateFeature(f) + ds = None + + # Voluntary corrupt the RTree by removing the entry for the last feature + ds = ogr.Open(filename, update=1) + sql_lyr = ds.ExecuteSQL("DELETE FROM rtree_foo_geom WHERE id = 100") + ds.ReleaseResultSet(sql_lyr) + ds = None + + with gdaltest.config_option("OGR_GPKG_THRESHOLD_DETECT_BROKEN_RTREE", "100"): + ds = ogr.Open(filename) + lyr = ds.GetLayer(0) + with gdaltest.error_handler(): + gdal.ErrorReset() + lyr.SetSpatialFilterRect(8.5, 8.5, 9.5, 9.5) + assert ( + "Spatial index (perhaps created with GDAL 3.6.0) of table foo is corrupted" + in gdal.GetLastErrorMsg() + ) + assert lyr.GetFeatureCount() == 1 + ds = None + + gdal.Unlink(filename) + + ############################################################################### # Test ST_Area() diff --git a/ogr/ogrsf_frmts/gpkg/ogrgeopackagetablelayer.cpp b/ogr/ogrsf_frmts/gpkg/ogrgeopackagetablelayer.cpp index 8c708a8b9f02..849e81157668 100644 --- a/ogr/ogrsf_frmts/gpkg/ogrgeopackagetablelayer.cpp +++ b/ogr/ogrsf_frmts/gpkg/ogrgeopackagetablelayer.cpp @@ -4202,6 +4202,44 @@ bool OGRGeoPackageTableLayer::HasSpatialIndex() m_osFIDForRTree = m_pszFidColumn; } + // Add heuristics to try to detect corrupted RTree generated by GDAL 3.6.0 + // Cf https://github.com/OSGeo/gdal/pull/6911 + if( m_bHasSpatialIndex ) + { + const auto nFC = GetTotalFeatureCount(); + if( nFC >= atoi(CPLGetConfigOption( + "OGR_GPKG_THRESHOLD_DETECT_BROKEN_RTREE", "100000")) ) + { + CPLString osSQL = "SELECT 1 FROM \""; + osSQL += SQLEscapeName(pszT); + osSQL += "\" WHERE \""; + osSQL += SQLEscapeName(GetFIDColumn()); + osSQL += "\" = "; + osSQL += CPLSPrintf(CPL_FRMT_GIB, nFC); + osSQL += " AND \""; + osSQL += SQLEscapeName(pszC); + osSQL += "\" IS NOT NULL AND NOT ST_IsEmpty(\""; + osSQL += SQLEscapeName(pszC); + osSQL += "\")"; + if( SQLGetInteger(m_poDS->GetDB(), osSQL, nullptr) == 1 ) + { + osSQL = "SELECT 1 FROM \""; + osSQL += SQLEscapeName(m_osRTreeName); + osSQL += "\" WHERE id = "; + osSQL += CPLSPrintf(CPL_FRMT_GIB, nFC); + if( SQLGetInteger(m_poDS->GetDB(), osSQL, nullptr) == 0 ) + { + CPLError(CE_Warning, CPLE_AppDefined, + "Spatial index (perhaps created with GDAL 3.6.0) " + "of table %s is corrupted. Disabling its use. " + "This file should be recreated or its spatial " + "index recreated", m_pszTableName); + m_bHasSpatialIndex = false; + } + } + } + } + return CPL_TO_BOOL(m_bHasSpatialIndex); }