From 0bbaf5ac97c435106c4f66890d3cfbbe37780442 Mon Sep 17 00:00:00 2001 From: Owen Leibman Date: Sun, 7 Feb 2021 22:42:44 -0800 Subject: [PATCH] Avoid Duplicate Titles When Reading Multiple HTML Files This issue arose while researching issue #1823. The issue was not a bug; it just required clarification to the author of how to use the software. But, while researching, I discovered that loading html into 2 sheets of a spreadsheet has a problem if the html title tag is the same for the 2 sheets. PhpSpreadsheet would be able to save the resulting file, but Excel would not be able to read it properly because of the duplicate title. The worksheet setTitle method allows for disambiguation is such a circumstance. The html reader passed a parameter indicating "don't disambiguate", but I can't see any harm in changing that to "disambiguate". An extremely simple fix, with tests to back it up. --- src/PhpSpreadsheet/Reader/Html.php | 2 +- .../Reader/Html/HtmlLoadStringTest.php | 29 +++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/src/PhpSpreadsheet/Reader/Html.php b/src/PhpSpreadsheet/Reader/Html.php index e11390155b..09148d9f4f 100644 --- a/src/PhpSpreadsheet/Reader/Html.php +++ b/src/PhpSpreadsheet/Reader/Html.php @@ -320,7 +320,7 @@ private function processDomElementTitle(Worksheet $sheet, int &$row, string &$co { if ($child->nodeName === 'title') { $this->processDomElement($child, $sheet, $row, $column, $cellContent); - $sheet->setTitle($cellContent, true, false); + $sheet->setTitle($cellContent, true, true); $cellContent = ''; } else { $this->processDomElementSpanEtc($sheet, $row, $column, $cellContent, $child, $attributeArray); diff --git a/tests/PhpSpreadsheetTests/Reader/Html/HtmlLoadStringTest.php b/tests/PhpSpreadsheetTests/Reader/Html/HtmlLoadStringTest.php index e104150702..bc4c30ffbe 100644 --- a/tests/PhpSpreadsheetTests/Reader/Html/HtmlLoadStringTest.php +++ b/tests/PhpSpreadsheetTests/Reader/Html/HtmlLoadStringTest.php @@ -89,4 +89,33 @@ public function testCanLoadFromStringIntoExistingSpreadsheet(): void $spreadsheet = $reader->loadFromString($html, $spreadsheet); self::assertEquals(2, $spreadsheet->getSheetCount()); } + + public function testCanLoadDuplicateTitle(): void + { + $html = <<<'EOF' + + +Sheet + + +
1
+ + +EOF; + $reader = new \PhpOffice\PhpSpreadsheet\Reader\Html(); + $spreadsheet = $reader->loadFromString($html); + $reader->setSheetIndex(1); + $reader->loadFromString($html, $spreadsheet); + $reader->setSheetIndex(2); + $reader->loadFromString($html, $spreadsheet); + $sheet = $spreadsheet->getSheet(0); + self::assertEquals(1, $sheet->getCell('A1')->getValue()); + self::assertEquals('Sheet', $sheet->getTitle()); + $sheet = $spreadsheet->getSheet(1); + self::assertEquals(1, $sheet->getCell('A1')->getValue()); + self::assertEquals('Sheet 1', $sheet->getTitle()); + $sheet = $spreadsheet->getSheet(2); + self::assertEquals(1, $sheet->getCell('A1')->getValue()); + self::assertEquals('Sheet 2', $sheet->getTitle()); + } }