diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bf7932dba..8d802aaa4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -32,7 +32,7 @@ jobs: tools: none # Install dependencies and handle caching in one go. - # @link https://github.com/marketplace/actions/install-composer-dependencies + # @link https://github.com/marketplace/actions/install-php-dependencies-with-composer - name: "Install Composer dependencies (PHP < 8.3)" if: ${{ matrix.php < '8.3' }} uses: "ramsey/composer-install@v2" @@ -46,6 +46,54 @@ jobs: - name: Run unit tests run: composer test + test-with-optional-libraries: + runs-on: ubuntu-latest + + strategy: + fail-fast: false + matrix: + php: ['7.2', '8.2'] + with: ['idna-convert-v051', 'idna-convert-v2', 'idna-convert-v3'] + exclude: + # no PHP 8.2 support in idna-convert-v2 + - php: '8.2' + with: 'idna-convert-v2' + + name: "PHP: ${{ matrix.php }} with ${{ matrix.with }}" + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Install PHP with latest composer + uses: shivammathur/setup-php@v2 + with: + php-version: ${{ matrix.php }} + ini-values: error_reporting=-1, display_errors=On, log_errors_max_len=0 + coverage: none + tools: none + + # Install dependencies and handle caching in one go. + # @link https://github.com/marketplace/actions/install-php-dependencies-with-composer + - name: "Install Composer dependencies" + uses: "ramsey/composer-install@v2" + + - name: "Install idna-convert v3" + if: ${{ matrix.with == 'idna-convert-v3' }} + run: "composer require --dev algo26-matthias/idna-convert:^3" + + - name: "Install idna-convert v2" + if: ${{ matrix.with == 'idna-convert-v2' }} + run: "composer require --dev algo26-matthias/idna-convert:^2" + + - name: Run unit tests with idna_convert 0.5.1 + if: ${{ matrix.with == 'idna-convert-v051' }} + run: composer test -- --bootstrap=tests/bootstrap-with-idna-convert-051.php + + - name: Run unit tests + if: ${{ matrix.with != 'idna-convert-v051' }} + run: composer test + test-compiled: runs-on: ubuntu-latest @@ -62,7 +110,7 @@ jobs: tools: none # Install dependencies and handle caching in one go. - # @link https://github.com/marketplace/actions/install-composer-dependencies + # @link https://github.com/marketplace/actions/install-php-dependencies-with-composer - name: "Install Composer dependencies" uses: "ramsey/composer-install@v2" with: diff --git a/composer.json b/composer.json index cfede2b53..cf741a4fe 100644 --- a/composer.json +++ b/composer.json @@ -39,6 +39,7 @@ "ext-iconv": "", "ext-intl": "", "ext-mbstring": "", + "algo26-matthias/idna-convert": "IdnaConvert allows you to convert internationalized domain names", "mf2/mf2": "Microformat module that allows for parsing HTML for microformats" }, "autoload": { diff --git a/idn/idna_convert.class.php b/idn/idna_convert.class.php index 1efeef966..1b3291c5b 100644 --- a/idn/idna_convert.class.php +++ b/idn/idna_convert.class.php @@ -89,6 +89,7 @@ class idna_convert var $_api_encoding = 'utf8'; // Default input charset is UTF-8 var $_allow_overlong = false; // Overlong UTF-8 encodings are forbidden var $_strict_mode = false; // Behave strict or not + var $slast = 0; // The constructor function __construct($options = false) diff --git a/src/Enclosure.php b/src/Enclosure.php index 124d65b6e..ef2292783 100644 --- a/src/Enclosure.php +++ b/src/Enclosure.php @@ -220,8 +220,6 @@ class Enclosure * * For documentation on all the parameters, see the corresponding * properties and their accessors - * - * @uses idna_convert If available, this will convert an IDN */ public function __construct($link = null, $type = null, $length = null, $javascript = null, $bitrate = null, $captions = null, $categories = null, $channels = null, $copyright = null, $credits = null, $description = null, $duration = null, $expression = null, $framerate = null, $hashes = null, $height = null, $keywords = null, $lang = null, $medium = null, $player = null, $ratings = null, $restrictions = null, $samplingrate = null, $thumbnails = null, $title = null, $width = null) { @@ -251,12 +249,8 @@ public function __construct($link = null, $type = null, $length = null, $javascr $this->type = $type; $this->width = $width; - if (class_exists('idna_convert')) { - $idn = new \idna_convert(); - $parsed = \SimplePie\Misc::parse_url($link); - $this->link = \SimplePie\Misc::compress_parse_url($parsed['scheme'], $idn->encode($parsed['authority']), $parsed['path'], $parsed['query'], $parsed['fragment']); - } - $this->handler = $this->get_handler(); // Needs to load last + // Needs to load last + $this->handler = $this->get_handler(); } /** diff --git a/src/File.php b/src/File.php index bc18b8d8d..f4c5f7dce 100644 --- a/src/File.php +++ b/src/File.php @@ -68,15 +68,21 @@ class File public function __construct($url, $timeout = 10, $redirects = 5, $headers = null, $useragent = null, $force_fsockopen = false, $curl_options = []) { - if (class_exists('idna_convert')) { - $idn = new \idna_convert(); - $parsed = \SimplePie\Misc::parse_url($url); - $url = \SimplePie\Misc::compress_parse_url($parsed['scheme'], $idn->encode($parsed['authority']), $parsed['path'], $parsed['query'], null); - } $this->url = $url; $this->permanent_url = $url; $this->useragent = $useragent; + if (preg_match('/^http(s)?:\/\//i', $url)) { + // Convert only urls, not paths + if (class_exists('idna_convert')) { + $idn = new \idna_convert(); + $parsed = \SimplePie\Misc::parse_url($url); + $url = \SimplePie\Misc::compress_parse_url($parsed['scheme'], $idn->encode($parsed['authority']), $parsed['path'], $parsed['query'], null); + + $this->url = $url; + $this->permanent_url = $url; + } + if ($useragent === null) { $useragent = ini_get('user_agent'); $this->useragent = $useragent; diff --git a/src/Idna/IdnaConverter.php b/src/Idna/IdnaConverter.php new file mode 100644 index 000000000..f731311c5 --- /dev/null +++ b/src/Idna/IdnaConverter.php @@ -0,0 +1,85 @@ +convert($decoded); + } else if (class_exists(IdnaConvert::class)) { + // Support for algo26-matthias/idna-convert:^2 + $idnaConvert = new IdnaConvert(); + + return $idnaConvert->encode($decoded); + } else if (class_exists(idna_convert::class)) { + // Support for idna_convert:0.5.1 + $idnaConvert = new idna_convert(); + + return $idnaConvert->encode($decoded); + } else { + // No idna-convert library is available + return $decoded; + } + } +} diff --git a/src/Idna/IdnaDomainFilter.php b/src/Idna/IdnaDomainFilter.php new file mode 100644 index 000000000..cda8388c8 --- /dev/null +++ b/src/Idna/IdnaDomainFilter.php @@ -0,0 +1,58 @@ +data['enclosures'][] = $this->registry->create(Enclosure::class, [$url, $type, $length, null, $bitrate, $captions, $categories, $channels, $copyrights, $credits, $description, $duration, $expression, $framerate, $hashes, $height, $keywords, $lang, $medium, $player, $ratings, $restrictions, $samplingrate, $thumbnails, $title, $width]); + $this->data['enclosures'][] = $this->create_enclosure($url, $type, $length, $bitrate, $captions, $categories, $channels, $copyrights, $credits, $description, $duration, $expression, $framerate, $hashes, $height, $keywords, $lang, $medium, $player, $ratings, $restrictions, $samplingrate, $thumbnails, $title, $width); } } } @@ -2169,7 +2171,7 @@ public function get_enclosures() $title = $title_parent; } - $this->data['enclosures'][] = $this->registry->create(Enclosure::class, [$url, $type, $length, null, $bitrate, $captions, $categories, $channels, $copyrights, $credits, $description, $duration, $expression, $framerate, $hashes, $height, $keywords, $lang, $medium, $player, $ratings, $restrictions, $samplingrate, $thumbnails, $title, $width]); + $this->data['enclosures'][] = $this->create_enclosure($url, $type, $length, $bitrate, $captions, $categories, $channels, $copyrights, $credits, $description, $duration, $expression, $framerate, $hashes, $height, $keywords, $lang, $medium, $player, $ratings, $restrictions, $samplingrate, $thumbnails, $title, $width); } } } @@ -2206,7 +2208,7 @@ public function get_enclosures() } // Since we don't have group or content for these, we'll just pass the '*_parent' variables directly to the constructor - $this->data['enclosures'][] = $this->registry->create(Enclosure::class, [$url, $type, $length, null, $bitrate, $captions_parent, $categories_parent, $channels, $copyrights_parent, $credits_parent, $description_parent, $duration_parent, $expression, $framerate, $hashes_parent, $height, $keywords_parent, $lang, $medium, $player_parent, $ratings_parent, $restrictions_parent, $samplingrate, $thumbnails_parent, $title, $width]); + $this->data['enclosures'][] = $this->create_enclosure($url, $type, $length, $bitrate, $captions_parent, $categories_parent, $channels, $copyrights_parent, $credits_parent, $description_parent, $duration_parent, $expression, $framerate, $hashes_parent, $height, $keywords_parent, $lang, $medium, $player_parent, $ratings_parent, $restrictions_parent, $samplingrate, $thumbnails_parent, $title, $width); } } @@ -2237,7 +2239,7 @@ public function get_enclosures() } // Since we don't have group or content for these, we'll just pass the '*_parent' variables directly to the constructor - $this->data['enclosures'][] = $this->registry->create(Enclosure::class, [$url, $type, $length, null, $bitrate, $captions_parent, $categories_parent, $channels, $copyrights_parent, $credits_parent, $description_parent, $duration_parent, $expression, $framerate, $hashes_parent, $height, $keywords_parent, $lang, $medium, $player_parent, $ratings_parent, $restrictions_parent, $samplingrate, $thumbnails_parent, $title_parent, $width]); + $this->data['enclosures'][] = $this->create_enclosure($url, $type, $length, $bitrate, $captions_parent, $categories_parent, $channels, $copyrights_parent, $credits_parent, $description_parent, $duration_parent, $expression, $framerate, $hashes_parent, $height, $keywords_parent, $lang, $medium, $player_parent, $ratings_parent, $restrictions_parent, $samplingrate, $thumbnails_parent, $title_parent, $width); } } @@ -2269,13 +2271,13 @@ public function get_enclosures() } // Since we don't have group or content for these, we'll just pass the '*_parent' variables directly to the constructor - $this->data['enclosures'][] = $this->registry->create(Enclosure::class, [$url, $type, $length, null, $bitrate, $captions_parent, $categories_parent, $channels, $copyrights_parent, $credits_parent, $description_parent, $duration_parent, $expression, $framerate, $hashes_parent, $height, $keywords_parent, $lang, $medium, $player_parent, $ratings_parent, $restrictions_parent, $samplingrate, $thumbnails_parent, $title_parent, $width]); + $this->data['enclosures'][] = $this->create_enclosure($url, $type, $length, $bitrate, $captions_parent, $categories_parent, $channels, $copyrights_parent, $credits_parent, $description_parent, $duration_parent, $expression, $framerate, $hashes_parent, $height, $keywords_parent, $lang, $medium, $player_parent, $ratings_parent, $restrictions_parent, $samplingrate, $thumbnails_parent, $title_parent, $width); } } if (sizeof($this->data['enclosures']) === 0 && ($url || $type || $length || $bitrate || $captions_parent || $categories_parent || $channels || $copyrights_parent || $credits_parent || $description_parent || $duration_parent || $expression || $framerate || $hashes_parent || $height || $keywords_parent || $lang || $medium || $player_parent || $ratings_parent || $restrictions_parent || $samplingrate || $thumbnails_parent || $title_parent || $width)) { // Since we don't have group or content for these, we'll just pass the '*_parent' variables directly to the constructor - $this->data['enclosures'][] = $this->registry->create(Enclosure::class, [$url, $type, $length, null, $bitrate, $captions_parent, $categories_parent, $channels, $copyrights_parent, $credits_parent, $description_parent, $duration_parent, $expression, $framerate, $hashes_parent, $height, $keywords_parent, $lang, $medium, $player_parent, $ratings_parent, $restrictions_parent, $samplingrate, $thumbnails_parent, $title_parent, $width]); + $this->data['enclosures'][] = $this->create_enclosure($url, $type, $length, $bitrate, $captions_parent, $categories_parent, $channels, $copyrights_parent, $credits_parent, $description_parent, $duration_parent, $expression, $framerate, $hashes_parent, $height, $keywords_parent, $lang, $medium, $player_parent, $ratings_parent, $restrictions_parent, $samplingrate, $thumbnails_parent, $title_parent, $width); } $this->data['enclosures'] = array_values(array_unique($this->data['enclosures'])); @@ -2349,6 +2351,81 @@ public function get_source() return null; } + + /** + * @uses idna_convert If available, this will convert an IDN in $url + * @see https://github.com/algo26-matthias/idna-convert + */ + private function create_enclosure( + $url, + $type, + $length, + $bitrate, + $captions_parent, + $categories_parent, + $channels, + $copyrights_parent, + $credits_parent, + $description_parent, + $duration_parent, + $expression, + $framerate, + $hashes_parent, + $height, + $keywords_parent, + $lang, + $medium, + $player_parent, + $ratings_parent, + $restrictions_parent, + $samplingrate, + $thumbnails_parent, + $title_parent, + $width + ): Enclosure { + $idnaConverter = $this->registry->create(IdnaDomainFilter::class); + $parsed = $this->registry->call(Misc::class, 'parse_url', [$url]); + $authority = $idnaConverter->filterDomain($parsed['authority']); + + if ($authority !== $parsed['authority']) { + $url = $this->registry->call(Misc::class, 'compress_parse_url', [ + $parsed['scheme'], + $authority, + $parsed['path'], + $parsed['query'], + $parsed['fragment'], + ]); + } + + return $this->registry->create(Enclosure::class, [ + $url, + $type, + $length, + null, + $bitrate, + $captions_parent, + $categories_parent, + $channels, + $copyrights_parent, + $credits_parent, + $description_parent, + $duration_parent, + $expression, + $framerate, + $hashes_parent, + $height, + $keywords_parent, + $lang, + $medium, + $player_parent, + $ratings_parent, + $restrictions_parent, + $samplingrate, + $thumbnails_parent, + $title_parent, + $width, + ]); + } } class_alias('SimplePie\Item', 'SimplePie_Item'); diff --git a/src/Registry.php b/src/Registry.php index f46c5fb5a..f7c358cd5 100644 --- a/src/Registry.php +++ b/src/Registry.php @@ -45,6 +45,8 @@ namespace SimplePie; use SimplePie\Content\Type\Sniffer; +use SimplePie\Idna\IdnaConverter; +use SimplePie\Idna\IdnaDomainFilter; use SimplePie\Parse\Date; use SimplePie\XML\Declaration\Parser as DeclarationParser; @@ -82,6 +84,7 @@ class Registry Misc::class => Misc::class, DeclarationParser::class => DeclarationParser::class, Date::class => Date::class, + IdnaDomainFilter::class => IdnaConverter::class, ]; /** diff --git a/tests/Integration/Idna/IdnaConverterTest.php b/tests/Integration/Idna/IdnaConverterTest.php new file mode 100644 index 000000000..d5bb4ae9e --- /dev/null +++ b/tests/Integration/Idna/IdnaConverterTest.php @@ -0,0 +1,86 @@ +assertSame($expected, $idnaConverter->filterDomain($decoded)); + } + + public function getIdnaData(): array + { + if ( + !class_exists(ToIdn::class) + && !class_exists(IdnaConvert::class) + && !class_exists(idna_convert::class) + ) { + // No idna-convert library is available + return [ + ['', ''], + ]; + } + + return [ + ['', ''], + ['müller.tld', 'xn--mller-kva.tld'], + ['weißenbach', 'xn--weienbach-i1a'], + ['يوم-جيد', 'xn----9mcj9fole'], + ['יום-טוב', 'xn----2hckbod3a'], + ]; + } +} diff --git a/tests/Unit/RegistryTest.php b/tests/Unit/RegistryTest.php index 5e9913899..66d81c1ff 100644 --- a/tests/Unit/RegistryTest.php +++ b/tests/Unit/RegistryTest.php @@ -95,6 +95,7 @@ public function getDefaultClassDataProvider(): array ['SimplePie\Misc', 'SimplePie\Misc'], ['SimplePie\XML\Declaration\Parser', 'SimplePie\XML\Declaration\Parser'], ['SimplePie\Parse\Date', 'SimplePie\Parse\Date'], + ['SimplePie\Idna\IdnaDomainFilter', 'SimplePie\Idna\IdnaConverter'], // Legacy type names ['Cache', 'SimplePie\Cache'], ['Locator', 'SimplePie\Locator'], diff --git a/tests/bootstrap-with-idna-convert-051.php b/tests/bootstrap-with-idna-convert-051.php new file mode 100644 index 000000000..31741f662 --- /dev/null +++ b/tests/bootstrap-with-idna-convert-051.php @@ -0,0 +1,10 @@ +