From cb23cca3ecbc6177dbcd91c4f0a2a141166c722a Mon Sep 17 00:00:00 2001 From: oleibman Date: Sat, 27 Feb 2021 06:10:04 -0800 Subject: [PATCH] Avoid Duplicate Titles When Reading Multiple HTML Files (#1829) This issue arose while researching issue #1823. The issue was not a bug; it just required clarification to the author of how to use the software. But, while researching, I discovered that loading html into 2 sheets of a spreadsheet has a problem if the html title tag is the same for the 2 sheets. PhpSpreadsheet would be able to save the resulting file, but Excel would not be able to read it properly because of the duplicate title. The worksheet setTitle method allows for disambiguation is such a circumstance. The html reader passed a parameter indicating "don't disambiguate", but I can't see any harm in changing that to "disambiguate". An extremely simple fix, with tests to back it up. --- src/PhpSpreadsheet/Reader/Html.php | 2 +- .../Reader/Html/HtmlLoadStringTest.php | 29 +++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/src/PhpSpreadsheet/Reader/Html.php b/src/PhpSpreadsheet/Reader/Html.php index e11390155b..09148d9f4f 100644 --- a/src/PhpSpreadsheet/Reader/Html.php +++ b/src/PhpSpreadsheet/Reader/Html.php @@ -320,7 +320,7 @@ private function processDomElementTitle(Worksheet $sheet, int &$row, string &$co { if ($child->nodeName === 'title') { $this->processDomElement($child, $sheet, $row, $column, $cellContent); - $sheet->setTitle($cellContent, true, false); + $sheet->setTitle($cellContent, true, true); $cellContent = ''; } else { $this->processDomElementSpanEtc($sheet, $row, $column, $cellContent, $child, $attributeArray); diff --git a/tests/PhpSpreadsheetTests/Reader/Html/HtmlLoadStringTest.php b/tests/PhpSpreadsheetTests/Reader/Html/HtmlLoadStringTest.php index e104150702..bc4c30ffbe 100644 --- a/tests/PhpSpreadsheetTests/Reader/Html/HtmlLoadStringTest.php +++ b/tests/PhpSpreadsheetTests/Reader/Html/HtmlLoadStringTest.php @@ -89,4 +89,33 @@ public function testCanLoadFromStringIntoExistingSpreadsheet(): void $spreadsheet = $reader->loadFromString($html, $spreadsheet); self::assertEquals(2, $spreadsheet->getSheetCount()); } + + public function testCanLoadDuplicateTitle(): void + { + $html = <<<'EOF' + + +Sheet + + +
1
+ + +EOF; + $reader = new \PhpOffice\PhpSpreadsheet\Reader\Html(); + $spreadsheet = $reader->loadFromString($html); + $reader->setSheetIndex(1); + $reader->loadFromString($html, $spreadsheet); + $reader->setSheetIndex(2); + $reader->loadFromString($html, $spreadsheet); + $sheet = $spreadsheet->getSheet(0); + self::assertEquals(1, $sheet->getCell('A1')->getValue()); + self::assertEquals('Sheet', $sheet->getTitle()); + $sheet = $spreadsheet->getSheet(1); + self::assertEquals(1, $sheet->getCell('A1')->getValue()); + self::assertEquals('Sheet 1', $sheet->getTitle()); + $sheet = $spreadsheet->getSheet(2); + self::assertEquals(1, $sheet->getCell('A1')->getValue()); + self::assertEquals('Sheet 2', $sheet->getTitle()); + } }