From cc32ebcabd578dbc3da8e06e6e005ebc47c00eb2 Mon Sep 17 00:00:00 2001 From: Jordi Boggiano Date: Wed, 8 Dec 2021 11:45:18 +0100 Subject: [PATCH] Search improvements (#10336) * Search performance improvements, add SEARCH_VENDOR type, fixes #10326, fixes #10324, fixes #10325 * Add extra optimization path for autocompletion of ^foo/* whereas the vendor is fully known, refs #10320 --- doc/03-cli.md | 4 +- src/Composer/Cache.php | 17 ++ src/Composer/Command/SearchCommand.php | 24 ++- src/Composer/Repository/ArrayRepository.php | 37 +++-- .../Repository/ComposerRepository.php | 154 ++++++++++++++---- .../Repository/PlatformRepository.php | 10 ++ .../Repository/RepositoryInterface.php | 7 +- 7 files changed, 204 insertions(+), 49 deletions(-) diff --git a/doc/03-cli.md b/doc/03-cli.md index c59045cb1505..23f83c297ab9 100644 --- a/doc/03-cli.md +++ b/doc/03-cli.md @@ -420,7 +420,9 @@ You can also search for more than one term by passing multiple arguments. ### Options -* **--only-name (-N):** Search only in name. +* **--only-name (-N):** Search only in package names. +* **--only-vendor (-O):** Search only for vendor / organization names, returns only "vendor" + as result. * **--type (-t):** Search for a specific package type. * **--format (-f):** Lets you pick between text (default) or json output format. Note that in the json, only the name and description keys are guaranteed to be diff --git a/src/Composer/Cache.php b/src/Composer/Cache.php index 135665d91fd4..c7da5c612734 100644 --- a/src/Composer/Cache.php +++ b/src/Composer/Cache.php @@ -286,6 +286,23 @@ public function clear() return false; } + /** + * @param string $file + * @return int|false + * @phpstan-return int<0, max>|false + */ + public function getAge($file) + { + if ($this->isEnabled()) { + $file = Preg::replace('{[^'.$this->allowlist.']}i', '-', $file); + if (file_exists($this->root . $file) && ($mtime = filemtime($this->root . $file)) !== false) { + return abs(time() - $mtime); + } + } + + return false; + } + /** * @param int $ttl * @param int $maxSize diff --git a/src/Composer/Command/SearchCommand.php b/src/Composer/Command/SearchCommand.php index 78ed6cab0a53..285fd4221ca2 100644 --- a/src/Composer/Command/SearchCommand.php +++ b/src/Composer/Command/SearchCommand.php @@ -38,7 +38,8 @@ protected function configure() ->setName('search') ->setDescription('Searches for packages.') ->setDefinition(array( - new InputOption('only-name', 'N', InputOption::VALUE_NONE, 'Search only in name'), + new InputOption('only-name', 'N', InputOption::VALUE_NONE, 'Search only in package names'), + new InputOption('only-vendor', 'O', InputOption::VALUE_NONE, 'Search only for vendor / organization names, returns only "vendor" as result'), new InputOption('type', 't', InputOption::VALUE_REQUIRED, 'Search for a specific package type'), new InputOption('format', 'f', InputOption::VALUE_REQUIRED, 'Format of the output: text or json', 'text'), new InputArgument('tokens', InputArgument::IS_ARRAY | InputArgument::REQUIRED, 'tokens to search for'), @@ -77,11 +78,24 @@ protected function execute(InputInterface $input, OutputInterface $output) $commandEvent = new CommandEvent(PluginEvents::COMMAND, 'search', $input, $output); $composer->getEventDispatcher()->dispatch($commandEvent->getName(), $commandEvent); - $onlyName = $input->getOption('only-name'); - $type = $input->getOption('type') ?: null; + $mode = RepositoryInterface::SEARCH_FULLTEXT; + if ($input->getOption('only-name') === true) { + if ($input->getOption('only-vendor') === true) { + throw new \InvalidArgumentException('--only-name and --only-vendor cannot be used together'); + } + $mode = RepositoryInterface::SEARCH_NAME; + } elseif ($input->getOption('only-vendor') === true) { + $mode = RepositoryInterface::SEARCH_VENDOR; + } + + $type = $input->getOption('type'); + + $query = implode(' ', $input->getArgument('tokens')); + if ($mode !== RepositoryInterface::SEARCH_FULLTEXT) { + $query = preg_quote($query); + } - $flags = $onlyName ? RepositoryInterface::SEARCH_NAME : RepositoryInterface::SEARCH_FULLTEXT; - $results = $repos->search(implode(' ', $input->getArgument('tokens')), $flags, $type); + $results = $repos->search($query, $mode, $type); if ($results && $format === 'text') { $width = $this->getTerminalWidth(); diff --git a/src/Composer/Repository/ArrayRepository.php b/src/Composer/Repository/ArrayRepository.php index 65e23b86eb5b..3d4e15abda50 100644 --- a/src/Composer/Repository/ArrayRepository.php +++ b/src/Composer/Repository/ArrayRepository.php @@ -148,28 +148,43 @@ public function findPackages($name, $constraint = null) */ public function search($query, $mode = 0, $type = null) { - $regex = '{(?:'.implode('|', Preg::split('{\s+}', $query)).')}i'; + if ($mode === self::SEARCH_FULLTEXT) { + $regex = '{(?:'.implode('|', Preg::split('{\s+}', preg_quote($query))).')}i'; + } else { + // vendor/name searches expect the caller to have preg_quoted the query + $regex = '{(?:'.implode('|', Preg::split('{\s+}', $query)).')}i'; + } $matches = array(); foreach ($this->getPackages() as $package) { $name = $package->getName(); + if ($mode === self::SEARCH_VENDOR) { + list($name) = explode('/', $name); + } if (isset($matches[$name])) { continue; } + if (null !== $type && $package->getType() !== $type) { + continue; + } + if (Preg::isMatch($regex, $name) || ($mode === self::SEARCH_FULLTEXT && $package instanceof CompletePackageInterface && Preg::isMatch($regex, implode(' ', (array) $package->getKeywords()) . ' ' . $package->getDescription())) ) { - if (null !== $type && $package->getType() !== $type) { - continue; - } - - $matches[$name] = array( - 'name' => $package->getPrettyName(), - 'description' => $package instanceof CompletePackageInterface ? $package->getDescription() : null, - ); + if ($mode === self::SEARCH_VENDOR) { + $matches[$name] = array( + 'name' => $name, + 'description' => null, + ); + } else { + $matches[$name] = array( + 'name' => $package->getPrettyName(), + 'description' => $package instanceof CompletePackageInterface ? $package->getDescription() : null, + ); - if ($package instanceof CompletePackageInterface && $package->isAbandoned()) { - $matches[$name]['abandoned'] = $package->getReplacementPackage() ?: true; + if ($package instanceof CompletePackageInterface && $package->isAbandoned()) { + $matches[$name]['abandoned'] = $package->getReplacementPackage() ?: true; + } } } } diff --git a/src/Composer/Repository/ComposerRepository.php b/src/Composer/Repository/ComposerRepository.php index 3cfc59725ea7..8c30935f3c04 100644 --- a/src/Composer/Repository/ComposerRepository.php +++ b/src/Composer/Repository/ComposerRepository.php @@ -337,7 +337,7 @@ public function getPackages() } /** - * @param string|null $packageFilter + * @param string|null $packageFilter Package pattern filter which can include "*" as a wildcard * * @return string[] */ @@ -345,51 +345,120 @@ public function getPackageNames($packageFilter = null) { $hasProviders = $this->hasProviders(); - $packageFilterCb = function ($name) { - return true; - }; - if (null !== $packageFilter) { - $packageFilterRegex = '{^'.str_replace('\\*', '.*?', preg_quote($packageFilter)).'$}i'; - $packageFilterCb = function ($name) use ($packageFilterRegex) { - return Preg::isMatch($packageFilterRegex, $name); - }; + $filterResults = + /** + * @param list $results + * @return list + */ + function (array $results) { + return $results; + } + ; + if (null !== $packageFilter && '' !== $packageFilter) { + $packageFilterRegex = BasePackage::packageNameToRegexp($packageFilter); + $filterResults = + /** + * @param list $results + * @return list + */ + function (array $results) use ($packageFilterRegex) { + /** @var list $results */ + return Preg::grep($packageFilterRegex, $results); + } + ; } if ($this->lazyProvidersUrl) { if (is_array($this->availablePackages)) { - return array_filter(array_keys($this->availablePackages), $packageFilterCb); + return $filterResults(array_keys($this->availablePackages)); } if ($this->listUrl) { - $url = $this->listUrl; - if ($packageFilter) { - $url .= '?filter='.urlencode($packageFilter); - } - - $result = $this->httpDownloader->get($url, $this->options)->decodeJson(); - - return $result['packageNames']; + // no need to call $filterResults here as the $packageFilter is applied in the function itself + return $this->loadPackageList($packageFilter); } - if ($this->hasPartialPackages()) { - return array_filter(array_keys($this->partialPackagesByName), $packageFilterCb); + if ($this->hasPartialPackages() && $this->partialPackagesByName !== null) { + return $filterResults(array_keys($this->partialPackagesByName)); } return array(); } if ($hasProviders) { - return array_filter($this->getProviderNames(), $packageFilterCb); + return $filterResults($this->getProviderNames()); } $names = array(); foreach ($this->getPackages() as $package) { - if ($packageFilterCb($package->getName())) { - $names[] = $package->getPrettyName(); - } + $names[] = $package->getPrettyName(); } - return $names; + return $filterResults($names); + } + + /** + * @return list + */ + private function getVendorNames() + { + $cacheKey = 'vendor-list.txt'; + $cacheAge = $this->cache->getAge($cacheKey); + if (false !== $cacheAge && $cacheAge < 600 && ($cachedData = $this->cache->read($cacheKey)) !== false) { + $cachedData = explode("\n", $cachedData); + + return $cachedData; + } + + $names = $this->getPackageNames(); + + $uniques = array(); + foreach ($names as $name) { + // @phpstan-ignore-next-line + $uniques[substr($name, 0, strpos($name, '/'))] = true; + } + + $vendors = array_keys($uniques); + + if (!$this->cache->isReadOnly()) { + $this->cache->write($cacheKey, implode("\n", $vendors)); + } + + return $vendors; + } + + /** + * @param string|null $packageFilter + * @return list + */ + private function loadPackageList($packageFilter = null) + { + if (null === $this->listUrl) { + throw new \LogicException('Make sure to call loadRootServerFile before loadPackageList'); + } + + $url = $this->listUrl; + if (is_string($packageFilter) && $packageFilter !== '') { + $url .= '?filter='.urlencode($packageFilter); + $result = $this->httpDownloader->get($url, $this->options)->decodeJson(); + + return $result['packageNames']; + } + + $cacheKey = 'package-list.txt'; + $cacheAge = $this->cache->getAge($cacheKey); + if (false !== $cacheAge && $cacheAge < 600 && ($cachedData = $this->cache->read($cacheKey)) !== false) { + $cachedData = explode("\n", $cachedData); + + return $cachedData; + } + + $result = $this->httpDownloader->get($url, $this->options)->decodeJson(); + if (!$this->cache->isReadOnly()) { + $this->cache->write($cacheKey, implode("\n", $result['packageNames'])); + } + + return $result['packageNames']; } public function loadPackages(array $packageNameMap, array $acceptableStabilities, array $stabilityFlags, array $alreadyLoaded = array()) @@ -465,7 +534,7 @@ public function loadPackages(array $packageNameMap, array $acceptableStabilities */ public function search($query, $mode = 0, $type = null) { - $this->loadRootServerFile(); + $this->loadRootServerFile(600); if ($this->searchUrl && $mode === self::SEARCH_FULLTEXT) { $url = str_replace(array('%query%', '%type%'), array($query, $type), $this->searchUrl); @@ -489,12 +558,36 @@ public function search($query, $mode = 0, $type = null) return $results; } + if ($mode === self::SEARCH_VENDOR) { + $results = array(); + $regex = '{(?:'.implode('|', Preg::split('{\s+}', $query)).')}i'; + + $vendorNames = $this->getVendorNames(); + foreach (Preg::grep($regex, $vendorNames) as $name) { + $results[] = array('name' => $name, 'description' => ''); + } + + return $results; + } + if ($this->hasProviders() || $this->lazyProvidersUrl) { + // optimize search for "^foo/bar" where at least "^foo/" is present by loading this directly from the listUrl if present + if (Preg::isMatch('{^\^(?P(?P[a-z0-9_.-]+)/[a-z0-9_.-]*)\*?$}i', $query, $match) && $this->listUrl !== null) { + $url = $this->listUrl . '?vendor='.urlencode($match['vendor']).'&filter='.urlencode($match['query'].'*'); + $result = $this->httpDownloader->get($url, $this->options)->decodeJson(); + + $results = array(); + foreach ($result['packageNames'] as $name) { + $results[] = array('name' => $name, 'description' => ''); + } + + return $results; + } + $results = array(); $regex = '{(?:'.implode('|', Preg::split('{\s+}', $query)).')}i'; $packageNames = $this->getPackageNames(); - foreach (Preg::grep($regex, $packageNames) as $name) { $results[] = array('name' => $name, 'description' => ''); } @@ -920,9 +1013,10 @@ private function getPackagesJsonUrl() } /** + * @param int|null $rootMaxAge * @return array */ - protected function loadRootServerFile() + protected function loadRootServerFile($rootMaxAge = null) { if (null !== $this->rootData) { return $this->rootData; @@ -934,7 +1028,9 @@ protected function loadRootServerFile() if ($cachedData = $this->cache->read('packages.json')) { $cachedData = json_decode($cachedData, true); - if (isset($cachedData['last-modified'])) { + if ($rootMaxAge !== null && ($age = $this->cache->getAge('packages.json')) !== false && $age <= $rootMaxAge) { + $data = $cachedData; + } elseif (isset($cachedData['last-modified'])) { $response = $this->fetchFileIfLastModified($this->getPackagesJsonUrl(), 'packages.json', $cachedData['last-modified']); $data = true === $response ? $cachedData : $response; } diff --git a/src/Composer/Repository/PlatformRepository.php b/src/Composer/Repository/PlatformRepository.php index 4bf0a5b101b5..916145ee1b98 100644 --- a/src/Composer/Repository/PlatformRepository.php +++ b/src/Composer/Repository/PlatformRepository.php @@ -727,4 +727,14 @@ public static function getPlatformPhpVersion() { return self::$lastSeenPlatformPhp; } + + public function search($query, $mode = 0, $type = null) + { + // suppress vendor search as there are no vendors to match in platform packages + if ($mode === self::SEARCH_VENDOR) { + return array(); + } + + return parent::search($query, $mode, $type); + } } diff --git a/src/Composer/Repository/RepositoryInterface.php b/src/Composer/Repository/RepositoryInterface.php index e8be2a07c8f5..7972e9417c47 100644 --- a/src/Composer/Repository/RepositoryInterface.php +++ b/src/Composer/Repository/RepositoryInterface.php @@ -27,6 +27,7 @@ interface RepositoryInterface extends \Countable { const SEARCH_FULLTEXT = 0; const SEARCH_NAME = 1; + const SEARCH_VENDOR = 2; /** * Checks if specified package registered (installed). @@ -85,11 +86,11 @@ public function loadPackages(array $packageNameMap, array $acceptableStabilities /** * Searches the repository for packages containing the query * - * @param string $query search query - * @param int $mode a set of SEARCH_* constants to search on, implementations should do a best effort only + * @param string $query search query, for SEARCH_NAME and SEARCH_VENDOR regular expressions metacharacters are supported by implementations, and user input should be escaped through preg_quote by callers + * @param int $mode a set of SEARCH_* constants to search on, implementations should do a best effort only, default is SEARCH_FULLTEXT * @param string $type The type of package to search for. Defaults to all types of packages * - * @return array[] an array of array('name' => '...', 'description' => '...'|null) + * @return array[] an array of array('name' => '...', 'description' => '...'|null, 'abandoned' => 'string'|true|unset) For SEARCH_VENDOR the name will be in "vendor" form * @phpstan-return list */ public function search($query, $mode = 0, $type = null);