New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support hfh 0.10 implicit auth #5031
Changes from 5 commits
3c95246
49bb38f
99dde63
536510d
e4472cf
f45116e
136a09a
82554fb
07d0b0a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,8 @@ | ||
from typing import Optional | ||
from typing import List, Optional, Union | ||
|
||
import huggingface_hub | ||
from huggingface_hub import HfApi | ||
from huggingface_hub import HfApi, HfFolder | ||
from huggingface_hub.hf_api import DatasetInfo | ||
from packaging import version | ||
|
||
|
||
|
@@ -99,3 +100,75 @@ def delete_repo( | |
token=token, | ||
repo_type=repo_type, | ||
) | ||
|
||
|
||
def dataset_info( | ||
hf_api: HfApi, | ||
repo_id: str, | ||
*, | ||
revision: Optional[str] = None, | ||
timeout: Optional[float] = None, | ||
use_auth_token: Optional[Union[bool, str]] = None, | ||
) -> DatasetInfo: | ||
""" | ||
Get info on one specific dataset on huggingface.co. | ||
Dataset can be private if you pass an acceptable token. | ||
Args: | ||
hf_api (`huggingface_hub.HfApi`): Hub client | ||
repo_id (`str`): | ||
A namespace (user or an organization) and a repo name separated | ||
by a `/`. | ||
revision (`str`, *optional*): | ||
The revision of the dataset repository from which to get the | ||
information. | ||
timeout (`float`, *optional*): | ||
Whether to set a timeout for the request to the Hub. | ||
use_auth_token (`bool` or `str`, *optional*): | ||
Whether to use the `auth_token` provided from the | ||
`huggingface_hub` cli. If not logged in, a valid `auth_token` | ||
can be passed in as a string. | ||
Returns: | ||
[`hf_api.DatasetInfo`]: The dataset repository information. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess this is a copy-paste from the |
||
<Tip> | ||
Raises the following errors: | ||
- [`~utils.RepositoryNotFoundError`] | ||
If the repository to download from cannot be found. This may be because it doesn't exist, | ||
or because it is set to `private` and you do not have access. | ||
- [`~utils.RevisionNotFoundError`] | ||
If the revision to download from cannot be found. | ||
</Tip> | ||
""" | ||
if version.parse(huggingface_hub.__version__) < version.parse("0.10.0"): | ||
if use_auth_token is False: | ||
token = "no-token" | ||
elif isinstance(use_auth_token, str): | ||
token = use_auth_token | ||
else: | ||
token = HfFolder.get_token() or "no-token" | ||
return hf_api.dataset_info( | ||
repo_id, | ||
revision=revision, | ||
token=token, | ||
timeout=timeout, | ||
) | ||
else: # the `token` parameter is deprecated in huggingface_hub>=0.10.0 | ||
return hf_api.dataset_info(repo_id, revision=revision, timeout=timeout, use_auth_token=use_auth_token) | ||
|
||
|
||
def list_repo_files( | ||
hf_api: HfApi, | ||
repo_id: str, | ||
revision: Optional[str] = None, | ||
repo_type: Optional[str] = None, | ||
token: Optional[str] = None, | ||
timeout: Optional[float] = None, | ||
) -> List[str]: | ||
""" | ||
Get the list of files in a given repo. | ||
""" | ||
if version.parse(huggingface_hub.__version__) < version.parse("0.10.0"): | ||
return hf_api.list_repo_files(repo_id, revision=revision, repo_type=repo_type, token=token, timeout=timeout) | ||
else: # the `token` parameter is deprecated in huggingface_hub>=0.10.0 | ||
return hf_api.list_repo_files( | ||
repo_id, revision=revision, repo_type=repo_type, use_auth_token=token, timeout=timeout | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,7 +22,9 @@ | |
from typing import List, Optional, Type, TypeVar, Union | ||
from urllib.parse import urljoin, urlparse | ||
|
||
import huggingface_hub | ||
import requests | ||
from huggingface_hub import HfFolder | ||
|
||
from .. import __version__, config | ||
from ..download.download_config import DownloadConfig | ||
|
@@ -218,7 +220,9 @@ def cached_path( | |
|
||
|
||
def get_datasets_user_agent(user_agent: Optional[Union[str, dict]] = None) -> str: | ||
ua = f"datasets/{__version__}; python/{config.PY_VERSION}" | ||
ua = f"datasets/{__version__}" | ||
ua += f"; python/{config.PY_VERSION}" | ||
ua += f"; huggingface_hub/{huggingface_hub.__version__}" | ||
Comment on lines
222
to
+225
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would love to hear if you have ideas on how to make this easy to configure. We have the same logic in (not a suggestion to change something here, more as a general question) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since it's only a few lines and pretty easy to modify it, I don't think we can make something significantly better than that |
||
ua += f"; pyarrow/{config.PYARROW_VERSION}" | ||
if config.TORCH_AVAILABLE: | ||
ua += f"; torch/{config.TORCH_VERSION}" | ||
|
@@ -239,13 +243,13 @@ def get_authentication_headers_for_url(url: str, use_auth_token: Optional[Union[ | |
"""Handle the HF authentication""" | ||
headers = {} | ||
if url.startswith(config.HF_ENDPOINT): | ||
token = None | ||
if isinstance(use_auth_token, str): | ||
if use_auth_token is False: | ||
token = None | ||
elif isinstance(use_auth_token, str): | ||
token = use_auth_token | ||
elif bool(use_auth_token): | ||
from huggingface_hub import hf_api | ||
else: | ||
token = HfFolder.get_token() | ||
|
||
token = hf_api.HfFolder.get_token() | ||
if token: | ||
headers["authorization"] = f"Bearer {token}" | ||
return headers | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think you can remove this line because
hf_api
is no longer used in this method?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's still used in hf_api_dataset_info(hf_api, ...) a few lines later ;)