Skip to content

Commit

Permalink
Search improvements (#10336)
Browse files Browse the repository at this point in the history
* Search performance improvements, add SEARCH_VENDOR type, fixes #10326, fixes #10324, fixes #10325

* Add extra optimization path for autocompletion of ^foo/* whereas the vendor is fully known, refs #10320
  • Loading branch information
Seldaek committed Dec 8, 2021
1 parent 8c8d9ef commit cc32ebc
Show file tree
Hide file tree
Showing 7 changed files with 204 additions and 49 deletions.
4 changes: 3 additions & 1 deletion doc/03-cli.md
Expand Up @@ -420,7 +420,9 @@ You can also search for more than one term by passing multiple arguments.

### Options

* **--only-name (-N):** Search only in name.
* **--only-name (-N):** Search only in package names.
* **--only-vendor (-O):** Search only for vendor / organization names, returns only "vendor"
as result.
* **--type (-t):** Search for a specific package type.
* **--format (-f):** Lets you pick between text (default) or json output format.
Note that in the json, only the name and description keys are guaranteed to be
Expand Down
17 changes: 17 additions & 0 deletions src/Composer/Cache.php
Expand Up @@ -286,6 +286,23 @@ public function clear()
return false;
}

/**
* @param string $file
* @return int|false
* @phpstan-return int<0, max>|false
*/
public function getAge($file)
{
if ($this->isEnabled()) {
$file = Preg::replace('{[^'.$this->allowlist.']}i', '-', $file);
if (file_exists($this->root . $file) && ($mtime = filemtime($this->root . $file)) !== false) {
return abs(time() - $mtime);
}
}

return false;
}

/**
* @param int $ttl
* @param int $maxSize
Expand Down
24 changes: 19 additions & 5 deletions src/Composer/Command/SearchCommand.php
Expand Up @@ -38,7 +38,8 @@ protected function configure()
->setName('search')
->setDescription('Searches for packages.')
->setDefinition(array(
new InputOption('only-name', 'N', InputOption::VALUE_NONE, 'Search only in name'),
new InputOption('only-name', 'N', InputOption::VALUE_NONE, 'Search only in package names'),
new InputOption('only-vendor', 'O', InputOption::VALUE_NONE, 'Search only for vendor / organization names, returns only "vendor" as result'),
new InputOption('type', 't', InputOption::VALUE_REQUIRED, 'Search for a specific package type'),
new InputOption('format', 'f', InputOption::VALUE_REQUIRED, 'Format of the output: text or json', 'text'),
new InputArgument('tokens', InputArgument::IS_ARRAY | InputArgument::REQUIRED, 'tokens to search for'),
Expand Down Expand Up @@ -77,11 +78,24 @@ protected function execute(InputInterface $input, OutputInterface $output)
$commandEvent = new CommandEvent(PluginEvents::COMMAND, 'search', $input, $output);
$composer->getEventDispatcher()->dispatch($commandEvent->getName(), $commandEvent);

$onlyName = $input->getOption('only-name');
$type = $input->getOption('type') ?: null;
$mode = RepositoryInterface::SEARCH_FULLTEXT;
if ($input->getOption('only-name') === true) {
if ($input->getOption('only-vendor') === true) {
throw new \InvalidArgumentException('--only-name and --only-vendor cannot be used together');
}
$mode = RepositoryInterface::SEARCH_NAME;
} elseif ($input->getOption('only-vendor') === true) {
$mode = RepositoryInterface::SEARCH_VENDOR;
}

$type = $input->getOption('type');

$query = implode(' ', $input->getArgument('tokens'));
if ($mode !== RepositoryInterface::SEARCH_FULLTEXT) {
$query = preg_quote($query);
}

$flags = $onlyName ? RepositoryInterface::SEARCH_NAME : RepositoryInterface::SEARCH_FULLTEXT;
$results = $repos->search(implode(' ', $input->getArgument('tokens')), $flags, $type);
$results = $repos->search($query, $mode, $type);

if ($results && $format === 'text') {
$width = $this->getTerminalWidth();
Expand Down
37 changes: 26 additions & 11 deletions src/Composer/Repository/ArrayRepository.php
Expand Up @@ -148,28 +148,43 @@ public function findPackages($name, $constraint = null)
*/
public function search($query, $mode = 0, $type = null)
{
$regex = '{(?:'.implode('|', Preg::split('{\s+}', $query)).')}i';
if ($mode === self::SEARCH_FULLTEXT) {
$regex = '{(?:'.implode('|', Preg::split('{\s+}', preg_quote($query))).')}i';
} else {
// vendor/name searches expect the caller to have preg_quoted the query
$regex = '{(?:'.implode('|', Preg::split('{\s+}', $query)).')}i';
}

$matches = array();
foreach ($this->getPackages() as $package) {
$name = $package->getName();
if ($mode === self::SEARCH_VENDOR) {
list($name) = explode('/', $name);
}
if (isset($matches[$name])) {
continue;
}
if (null !== $type && $package->getType() !== $type) {
continue;
}

if (Preg::isMatch($regex, $name)
|| ($mode === self::SEARCH_FULLTEXT && $package instanceof CompletePackageInterface && Preg::isMatch($regex, implode(' ', (array) $package->getKeywords()) . ' ' . $package->getDescription()))
) {
if (null !== $type && $package->getType() !== $type) {
continue;
}

$matches[$name] = array(
'name' => $package->getPrettyName(),
'description' => $package instanceof CompletePackageInterface ? $package->getDescription() : null,
);
if ($mode === self::SEARCH_VENDOR) {
$matches[$name] = array(
'name' => $name,
'description' => null,
);
} else {
$matches[$name] = array(
'name' => $package->getPrettyName(),
'description' => $package instanceof CompletePackageInterface ? $package->getDescription() : null,
);

if ($package instanceof CompletePackageInterface && $package->isAbandoned()) {
$matches[$name]['abandoned'] = $package->getReplacementPackage() ?: true;
if ($package instanceof CompletePackageInterface && $package->isAbandoned()) {
$matches[$name]['abandoned'] = $package->getReplacementPackage() ?: true;
}
}
}
}
Expand Down
154 changes: 125 additions & 29 deletions src/Composer/Repository/ComposerRepository.php
Expand Up @@ -337,59 +337,128 @@ public function getPackages()
}

/**
* @param string|null $packageFilter
* @param string|null $packageFilter Package pattern filter which can include "*" as a wildcard
*
* @return string[]
*/
public function getPackageNames($packageFilter = null)
{
$hasProviders = $this->hasProviders();

$packageFilterCb = function ($name) {
return true;
};
if (null !== $packageFilter) {
$packageFilterRegex = '{^'.str_replace('\\*', '.*?', preg_quote($packageFilter)).'$}i';
$packageFilterCb = function ($name) use ($packageFilterRegex) {
return Preg::isMatch($packageFilterRegex, $name);
};
$filterResults =
/**
* @param list<string> $results
* @return list<string>
*/
function (array $results) {
return $results;
}
;
if (null !== $packageFilter && '' !== $packageFilter) {
$packageFilterRegex = BasePackage::packageNameToRegexp($packageFilter);
$filterResults =
/**
* @param list<string> $results
* @return list<string>
*/
function (array $results) use ($packageFilterRegex) {
/** @var list<string> $results */
return Preg::grep($packageFilterRegex, $results);
}
;
}

if ($this->lazyProvidersUrl) {
if (is_array($this->availablePackages)) {
return array_filter(array_keys($this->availablePackages), $packageFilterCb);
return $filterResults(array_keys($this->availablePackages));
}

if ($this->listUrl) {
$url = $this->listUrl;
if ($packageFilter) {
$url .= '?filter='.urlencode($packageFilter);
}

$result = $this->httpDownloader->get($url, $this->options)->decodeJson();

return $result['packageNames'];
// no need to call $filterResults here as the $packageFilter is applied in the function itself
return $this->loadPackageList($packageFilter);
}

if ($this->hasPartialPackages()) {
return array_filter(array_keys($this->partialPackagesByName), $packageFilterCb);
if ($this->hasPartialPackages() && $this->partialPackagesByName !== null) {
return $filterResults(array_keys($this->partialPackagesByName));
}

return array();
}

if ($hasProviders) {
return array_filter($this->getProviderNames(), $packageFilterCb);
return $filterResults($this->getProviderNames());
}

$names = array();
foreach ($this->getPackages() as $package) {
if ($packageFilterCb($package->getName())) {
$names[] = $package->getPrettyName();
}
$names[] = $package->getPrettyName();
}

return $names;
return $filterResults($names);
}

/**
* @return list<string>
*/
private function getVendorNames()
{
$cacheKey = 'vendor-list.txt';
$cacheAge = $this->cache->getAge($cacheKey);
if (false !== $cacheAge && $cacheAge < 600 && ($cachedData = $this->cache->read($cacheKey)) !== false) {
$cachedData = explode("\n", $cachedData);

return $cachedData;
}

$names = $this->getPackageNames();

$uniques = array();
foreach ($names as $name) {
// @phpstan-ignore-next-line
$uniques[substr($name, 0, strpos($name, '/'))] = true;
}

$vendors = array_keys($uniques);

if (!$this->cache->isReadOnly()) {
$this->cache->write($cacheKey, implode("\n", $vendors));
}

return $vendors;
}

/**
* @param string|null $packageFilter
* @return list<string>
*/
private function loadPackageList($packageFilter = null)
{
if (null === $this->listUrl) {
throw new \LogicException('Make sure to call loadRootServerFile before loadPackageList');
}

$url = $this->listUrl;
if (is_string($packageFilter) && $packageFilter !== '') {
$url .= '?filter='.urlencode($packageFilter);
$result = $this->httpDownloader->get($url, $this->options)->decodeJson();

return $result['packageNames'];
}

$cacheKey = 'package-list.txt';
$cacheAge = $this->cache->getAge($cacheKey);
if (false !== $cacheAge && $cacheAge < 600 && ($cachedData = $this->cache->read($cacheKey)) !== false) {
$cachedData = explode("\n", $cachedData);

return $cachedData;
}

$result = $this->httpDownloader->get($url, $this->options)->decodeJson();
if (!$this->cache->isReadOnly()) {
$this->cache->write($cacheKey, implode("\n", $result['packageNames']));
}

return $result['packageNames'];
}

public function loadPackages(array $packageNameMap, array $acceptableStabilities, array $stabilityFlags, array $alreadyLoaded = array())
Expand Down Expand Up @@ -465,7 +534,7 @@ public function loadPackages(array $packageNameMap, array $acceptableStabilities
*/
public function search($query, $mode = 0, $type = null)
{
$this->loadRootServerFile();
$this->loadRootServerFile(600);

if ($this->searchUrl && $mode === self::SEARCH_FULLTEXT) {
$url = str_replace(array('%query%', '%type%'), array($query, $type), $this->searchUrl);
Expand All @@ -489,12 +558,36 @@ public function search($query, $mode = 0, $type = null)
return $results;
}

if ($mode === self::SEARCH_VENDOR) {
$results = array();
$regex = '{(?:'.implode('|', Preg::split('{\s+}', $query)).')}i';

$vendorNames = $this->getVendorNames();
foreach (Preg::grep($regex, $vendorNames) as $name) {
$results[] = array('name' => $name, 'description' => '');
}

return $results;
}

if ($this->hasProviders() || $this->lazyProvidersUrl) {
// optimize search for "^foo/bar" where at least "^foo/" is present by loading this directly from the listUrl if present
if (Preg::isMatch('{^\^(?P<query>(?P<vendor>[a-z0-9_.-]+)/[a-z0-9_.-]*)\*?$}i', $query, $match) && $this->listUrl !== null) {
$url = $this->listUrl . '?vendor='.urlencode($match['vendor']).'&filter='.urlencode($match['query'].'*');
$result = $this->httpDownloader->get($url, $this->options)->decodeJson();

$results = array();
foreach ($result['packageNames'] as $name) {
$results[] = array('name' => $name, 'description' => '');
}

return $results;
}

$results = array();
$regex = '{(?:'.implode('|', Preg::split('{\s+}', $query)).')}i';

$packageNames = $this->getPackageNames();

foreach (Preg::grep($regex, $packageNames) as $name) {
$results[] = array('name' => $name, 'description' => '');
}
Expand Down Expand Up @@ -920,9 +1013,10 @@ private function getPackagesJsonUrl()
}

/**
* @param int|null $rootMaxAge
* @return array<string, mixed>
*/
protected function loadRootServerFile()
protected function loadRootServerFile($rootMaxAge = null)
{
if (null !== $this->rootData) {
return $this->rootData;
Expand All @@ -934,7 +1028,9 @@ protected function loadRootServerFile()

if ($cachedData = $this->cache->read('packages.json')) {
$cachedData = json_decode($cachedData, true);
if (isset($cachedData['last-modified'])) {
if ($rootMaxAge !== null && ($age = $this->cache->getAge('packages.json')) !== false && $age <= $rootMaxAge) {
$data = $cachedData;
} elseif (isset($cachedData['last-modified'])) {
$response = $this->fetchFileIfLastModified($this->getPackagesJsonUrl(), 'packages.json', $cachedData['last-modified']);
$data = true === $response ? $cachedData : $response;
}
Expand Down
10 changes: 10 additions & 0 deletions src/Composer/Repository/PlatformRepository.php
Expand Up @@ -727,4 +727,14 @@ public static function getPlatformPhpVersion()
{
return self::$lastSeenPlatformPhp;
}

public function search($query, $mode = 0, $type = null)
{
// suppress vendor search as there are no vendors to match in platform packages
if ($mode === self::SEARCH_VENDOR) {
return array();
}

return parent::search($query, $mode, $type);
}
}
7 changes: 4 additions & 3 deletions src/Composer/Repository/RepositoryInterface.php
Expand Up @@ -27,6 +27,7 @@ interface RepositoryInterface extends \Countable
{
const SEARCH_FULLTEXT = 0;
const SEARCH_NAME = 1;
const SEARCH_VENDOR = 2;

/**
* Checks if specified package registered (installed).
Expand Down Expand Up @@ -85,11 +86,11 @@ public function loadPackages(array $packageNameMap, array $acceptableStabilities
/**
* Searches the repository for packages containing the query
*
* @param string $query search query
* @param int $mode a set of SEARCH_* constants to search on, implementations should do a best effort only
* @param string $query search query, for SEARCH_NAME and SEARCH_VENDOR regular expressions metacharacters are supported by implementations, and user input should be escaped through preg_quote by callers
* @param int $mode a set of SEARCH_* constants to search on, implementations should do a best effort only, default is SEARCH_FULLTEXT
* @param string $type The type of package to search for. Defaults to all types of packages
*
* @return array[] an array of array('name' => '...', 'description' => '...'|null)
* @return array[] an array of array('name' => '...', 'description' => '...'|null, 'abandoned' => 'string'|true|unset) For SEARCH_VENDOR the name will be in "vendor" form
* @phpstan-return list<array{name: string, description: ?string, abandoned?: string|true}>
*/
public function search($query, $mode = 0, $type = null);
Expand Down

0 comments on commit cc32ebc

Please sign in to comment.