diff --git a/.circleci/config.yml b/.circleci/config.yml deleted file mode 100644 index 4219fab8c7e..00000000000 --- a/.circleci/config.yml +++ /dev/null @@ -1,102 +0,0 @@ -version: 2.1 - -orbs: - win: circleci/windows@2.2.0 - -jobs: - run_dataset_script_tests_pyarrow_latest: - working_directory: ~/datasets - docker: - - image: cimg/python:3.6 - resource_class: medium - steps: - - checkout - - run: sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev - - run: pip install --upgrade pip - - run: python -m venv venv - - run: source venv/bin/activate - - run: pip install .[tests] - - run: pip install -r additional-tests-requirements.txt --no-deps - - run: pip install pyarrow --upgrade - - run: HF_SCRIPTS_VERSION=master HF_ALLOW_CODE_EVAL=1 python -m pytest -d --tx 2*popen//python=python3.6 --dist loadfile -sv ./tests/ - - run_dataset_script_tests_pyarrow_5: - working_directory: ~/datasets - docker: - - image: cimg/python:3.6 - resource_class: medium - steps: - - checkout - - run: sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev - - run: pip install --upgrade pip - - run: python -m venv venv - - run: source venv/bin/activate - - run: pip install .[tests] - - run: pip install -r additional-tests-requirements.txt --no-deps - - run: pip install pyarrow==5.0.0 - - run: HF_SCRIPTS_VERSION=master HF_ALLOW_CODE_EVAL=1 python -m pytest -d --tx 2*popen//python=python3.6 --dist loadfile -sv ./tests/ - - run_dataset_script_tests_pyarrow_latest_WIN: - working_directory: ~/datasets - executor: - name: win/default - shell: powershell - steps: - - checkout - - run: | - conda init powershell - conda update conda - conda create -n py37 python=3.7 pytorch --yes - - run: | - conda activate py37 - pip install .[tests] - pip install -r additional-tests-requirements.txt --no-deps - pip install pyarrow --upgrade - - run: | - conda activate py37 - $env:HF_SCRIPTS_VERSION="master" - python -m pytest -n 2 --dist loadfile -sv ./tests/ - - run_dataset_script_tests_pyarrow_5_WIN: - working_directory: ~/datasets - executor: - name: win/default - shell: powershell - steps: - - checkout - - run: | - conda init powershell - conda update conda - conda create -n py37 python=3.7 pytorch --yes - - run: | - conda activate py37 - pip install .[tests] - pip install -r additional-tests-requirements.txt --no-deps - pip install pyarrow==5.0.0 - - run: | - conda activate py37 - $env:HF_SCRIPTS_VERSION="master" - python -m pytest -n 2 --dist loadfile -sv ./tests/ - - check_code_quality: - working_directory: ~/datasets - docker: - - image: circleci/python:3.6 - resource_class: medium - parallelism: 1 - steps: - - checkout - - run: sudo pip install .[quality] - - run: black --check --line-length 119 --target-version py36 tests src benchmarks datasets metrics - - run: isort --check-only tests src benchmarks datasets metrics - - run: flake8 tests src benchmarks datasets metrics - -workflows: - version: 2 - build_and_test: - jobs: - - check_code_quality - - run_dataset_script_tests_pyarrow_latest - - run_dataset_script_tests_pyarrow_5 - - run_dataset_script_tests_pyarrow_latest_WIN - - run_dataset_script_tests_pyarrow_5_WIN diff --git a/.circleci/deploy.sh b/.circleci/deploy.sh deleted file mode 100755 index 10929052252..00000000000 --- a/.circleci/deploy.sh +++ /dev/null @@ -1,81 +0,0 @@ -cd docs - -function deploy_doc(){ - echo "Creating doc at commit $1 and pushing to folder $2" - git checkout $1 - if [ ! -z "$2" ] - then - if [ "$2" == "master" ]; then - echo "Pushing master" - make clean && make html && scp -r -oStrictHostKeyChecking=no _build/html/* $doc:$dir/$2/ - cp -r _build/html/_static . - elif ssh -oStrictHostKeyChecking=no $doc "[ -d $dir/$2 ]"; then - echo "Directory" $2 "already exists" - scp -r -oStrictHostKeyChecking=no _static/* $doc:$dir/$2/_static/ - else - echo "Pushing version" $2 - make clean && make html - rm -rf _build/html/_static - cp -r _static _build/html - scp -r -oStrictHostKeyChecking=no _build/html $doc:$dir/$2 - fi - else - echo "Pushing stable" - make clean && make html - rm -rf _build/html/_static - cp -r _static _build/html - scp -r -oStrictHostKeyChecking=no _build/html/* $doc:$dir - fi -} - -# You can find the commit for each tag on https://github.com/huggingface/datasets/tags -# Deploys the master documentation on huggingface.co/docs/datasets/master -deploy_doc "master" master - -# Example of how to deploy a doc on a certain commit (the commit doesn't have to be on the master branch). -# The following commit would live on huggingface.co/docs/datasets/v1.0.0 -deploy_doc "faf3d79" v1.18.4 -deploy_doc "c6bc52a" v1.18.3 -deploy_doc "ba00b25" v1.18.2 -deploy_doc "218e496" v1.18.1 -deploy_doc "c0aea8d" v1.18.0 -deploy_doc "dff6c92" v1.17.0 -deploy_doc "acca8f4" v1.16.1 -deploy_doc "d50f5f9" v1.16.0 -deploy_doc "0181006" v1.15.1 -deploy_doc "dcaa3c0" v1.15.0 -deploy_doc "ec82422" v1.14.0 -deploy_doc "10dc68c" v1.13.3 -deploy_doc "e82164f" v1.13.2 -deploy_doc "2ed762b" v1.13.1 -deploy_doc "38ec259" v1.13.0 -deploy_doc "2c1fc9c" v1.12.1 -deploy_doc "c65dccc" v1.12.0 -deploy_doc "ea7f0b8" v1.11.0 -deploy_doc "cea1a29" v1.10.2 -deploy_doc "6b7b227" v1.10.1 -deploy_doc "3aabafb" v1.10.0 -deploy_doc "5bc064d" v1.9.0 -deploy_doc "bcf0543" v1.8.0 -deploy_doc "448c177" v1.7.0 -deploy_doc "b0d7ae1" v1.6.2 -deploy_doc "e8fc41f" v1.6.1 -deploy_doc "40bb9e6" v1.6.0 -deploy_doc "f256b77" v1.5.0 -deploy_doc "ca41320" v1.4.1 -deploy_doc "f42658e" v1.4.0 -deploy_doc "ef633da" v1.3.0 -deploy_doc "a59580b" v1.2.1 -deploy_doc "dae6880" v1.2.0 -deploy_doc "000b584" v1.1.3 -deploy_doc "2256521" v1.1.2 -deploy_doc "8029965" v1.1.1 -deploy_doc "fe52b67" v1.1.0 -deploy_doc "af7cd94" v1.0.2 -deploy_doc "7c9d2b5" v1.0.1 -deploy_doc "322ba0e" v1.0.0 -deploy_doc "99e0ee6" v0.3.0 -deploy_doc "21e8091" v0.4.0 - -# Replace this by the latest stable commit. It is recommended to pin on a version release rather than master. -deploy_doc "master" diff --git a/.github/ISSUE_TEMPLATE/add-dataset.md b/.github/ISSUE_TEMPLATE/add-dataset.md index dd5038bd456..23505dc3359 100644 --- a/.github/ISSUE_TEMPLATE/add-dataset.md +++ b/.github/ISSUE_TEMPLATE/add-dataset.md @@ -14,4 +14,4 @@ assignees: '' - **Data:** *link to the Github repository or current dataset location* - **Motivation:** *what are some good reasons to have this dataset* -Instructions to add a new dataset can be found [here](https://github.com/huggingface/datasets/blob/master/ADD_NEW_DATASET.md). +Instructions to add a new dataset can be found [here](https://github.com/huggingface/datasets/blob/main/ADD_NEW_DATASET.md). diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index f71970b51ac..8d778801a30 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -1,4 +1,7 @@ contact_links: + - name: Datasets on the Hugging Face Hub + url: https://huggingface.co/datasets + about: Open a Pull request / Discussion related to a specific dataset on the Hugging Face Hub (PRs for datasets with no namespace still have to be on GitHub though) - name: Forum url: https://discuss.huggingface.co/c/datasets/10 about: Please ask and answer questions here, and engage with other community members diff --git a/.github/ISSUE_TEMPLATE/dataset-viewer.yml b/.github/ISSUE_TEMPLATE/dataset-viewer.yml new file mode 100644 index 00000000000..b9a9943a8d0 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/dataset-viewer.yml @@ -0,0 +1,27 @@ +name: Dataset Viewer Issue +description: Issue related to the Dataset Viewer on huggingface.co/datasets +title: "Dataset Viewer issue for [dataset name]" +labels: ["dataset-viewer"] +assignees: + - severo +body: + - type: input + id: url + attributes: + label: Link + description: Link to the dataset viewer page + placeholder: ex. https://huggingface.co/datasets/glue/viewer/cola/test + - type: textarea + id: description + attributes: + label: Description + description: Short description of the issue + placeholder: Tell us what the issue is and which error you get + - type: dropdown + id: owner + attributes: + label: Owner + description: Is it you who added this dataset? + options: + - "Yes" + - "No" diff --git a/.github/ISSUE_TEMPLATE/dataset_viewer.md b/.github/ISSUE_TEMPLATE/dataset_viewer.md deleted file mode 100644 index 9545c629f27..00000000000 --- a/.github/ISSUE_TEMPLATE/dataset_viewer.md +++ /dev/null @@ -1,15 +0,0 @@ ---- -name: "Dataset viewer issue" -about: Issue related to the dataset viewer on huggingface.co. -title: '' -labels: 'dataset-viewer' -assignees: '' ---- - -## Dataset viewer issue for '*name of the dataset*' - -**Link:** *link to the dataset viewer page* - -*short description of the issue* - -Am I the one who added this dataset ? Yes-No diff --git a/.github/conda/meta.yaml b/.github/conda/meta.yaml index 229f0cb8265..b8ecbcc99f2 100644 --- a/.github/conda/meta.yaml +++ b/.github/conda/meta.yaml @@ -15,7 +15,7 @@ requirements: - python - pip - numpy >=1.17 - - pyarrow >=5.0.0 + - pyarrow >=6.0.0 - python-xxhash - dill - pandas @@ -32,7 +32,7 @@ requirements: - python - pip - numpy >=1.17 - - pyarrow >=5.0.0 + - pyarrow >=6.0.0 - python-xxhash - dill - pandas diff --git a/.github/hub/requirements.txt b/.github/hub/requirements.txt index bf6c92fadc0..369ed61b776 100644 --- a/.github/hub/requirements.txt +++ b/.github/hub/requirements.txt @@ -1,4 +1,4 @@ -GitPython==3.1.11 +GitPython==3.1.27 python-dotenv==0.19.2 requests==2.25.1 tqdm==4.62.3 \ No newline at end of file diff --git a/.github/hub/update_hub_repositories.py b/.github/hub/update_hub_repositories.py index 9ce0fdf7a1d..c923583ba7f 100644 --- a/.github/hub/update_hub_repositories.py +++ b/.github/hub/update_hub_repositories.py @@ -29,7 +29,9 @@ HUB_CANONICAL_WHOAMI = HUB_ENDPOINT + "/api/whoami-v2" HUB_CANONICAL_CREATE_URL = HUB_ENDPOINT + "/api/repos/create" HUB_CANONICAL_INFO_URL = HUB_ENDPOINT + "/api/datasets/{dataset_name}" -HUB_CANONICAL_DATASET_GIT_URL = HUB_ENDPOINT.replace("https://", "https://user:{token}@") + "/datasets/{dataset_name}.git" +HUB_CANONICAL_DATASET_GIT_URL = ( + HUB_ENDPOINT.replace("https://", "https://user:{token}@") + "/datasets/{dataset_name}.git" +) HUB_API_GH_TO_HF = HUB_ENDPOINT + "/api/gh-to-hf/{github_username}" DATASETS_LIB_CATALOG_DIR_NAME = "datasets" DATASETS_LIB_COMMIT_URL = "https://github.com/huggingface/datasets/commit/{hexsha}" @@ -102,17 +104,6 @@ def check_authorizations(user_info: dict): ) -def apply_hacks_for_moon_landing(dataset_repo_path: Path): - if (dataset_repo_path / "README.md").is_file(): - with (dataset_repo_path / "README.md").open() as f: - readme_content = f.read() - if readme_content.count("---\n") > 1: - _, tags, content = readme_content.split("---\n", 2) - tags = tags.replace("\nlicense:", "\nlicenses:").replace(".", "-").replace("$", "%") - with (dataset_repo_path / "README.md").open("w") as f: - f.write("---\n".join(["", tags, content])) - - class update_main: def __init__( self, @@ -136,7 +127,8 @@ def __call__(self, dataset_name: str) -> bool: logger.warning(f"[{dataset_name}] " + repr(e)) if not canonical_dataset_path(dataset_name).is_dir(): repo = Repo.clone_from( - canonical_dataset_git_url(dataset_name, self.token), to_path=canonical_dataset_path(dataset_name) + canonical_dataset_git_url(dataset_name, token=self.token), + to_path=canonical_dataset_path(dataset_name), ) else: repo = Repo(canonical_dataset_path(dataset_name)) @@ -145,7 +137,11 @@ def __call__(self, dataset_name: str) -> bool: logs.append(repo.git.reset("--hard")) logs.append(repo.git.clean("-f", "-d")) logs.append(repo.git.checkout(CANONICAL_DATASET_REPO_MAIN_BRANCH)) - logs.append(repo.remote().pull()) + try: + logs.append(repo.remote().pull()) + except Exception as e: + logs.append("pull failed !") + logs.append(repr(e)) # Copy the changes and commit distutils.dir_util.copy_tree( str(src_canonical_dataset_path(datasets_lib_path, dataset_name)), str(canonical_dataset_path(dataset_name)) @@ -155,7 +151,6 @@ def __call__(self, dataset_name: str) -> bool: (canonical_dataset_path(dataset_name) / filepath_to_delete).unlink() except Exception as e: logger.warning(f"[{dataset_name}] Couldn't delete file at {filepath_to_delete}: {repr(e)}") - apply_hacks_for_moon_landing(canonical_dataset_path(dataset_name)) logs.append(repo.git.add(".")) if "Changes to be committed:" in repo.git.status(): logs.append(repo.git.commit(*self.commit_args)) @@ -168,6 +163,7 @@ def __call__(self, dataset_name: str) -> bool: logs.append(repo.git.tag(self.tag_name, f"-m Add tag from datasets {self.tag_name}")) logs.append(repo.git.push("--tags")) except Exception as e: + logs.append("push failed !") logs.append(repr(e)) if "Your branch is up to date with" not in repo.git.status(): logs.append(repo.git.status()) @@ -210,31 +206,29 @@ def __call__(self, dataset_name: str) -> bool: path for diff in datasets_lib_repo.index.diff(prev_commit) for path in [diff.a_path, diff.b_path] - if path.startswith(DATASETS_LIB_CATALOG_DIR_NAME) - and path.count("/") >= 2 + if path.startswith(DATASETS_LIB_CATALOG_DIR_NAME) and path.count("/") >= 2 ] changed_datasets_names_since_last_commit = {path.split("/")[1] for path in changed_files_since_last_commit} # ignore json, csv etc. changed_datasets_names_since_last_commit = { - dataset_name for dataset_name in changed_datasets_names_since_last_commit + dataset_name + for dataset_name in changed_datasets_names_since_last_commit if (datasets_lib_path / DATASETS_LIB_CATALOG_DIR_NAME / dataset_name / (dataset_name + ".py")).is_file() } deleted_files = {dataset_name: set() for dataset_name in changed_datasets_names_since_last_commit} for path in changed_files_since_last_commit: _, dataset_name, rel_path = path.split("/", 2) - if ( - dataset_name in changed_datasets_names_since_last_commit - and not (datasets_lib_path / path).is_file() - ): + if dataset_name in changed_datasets_names_since_last_commit and not (datasets_lib_path / path).is_file(): deleted_files[dataset_name].add(rel_path) dataset_names = sys.argv[1:] if dataset_names: if dataset_names[0] == "--all": dataset_names = sorted( - d.name for d in (datasets_lib_path / DATASETS_LIB_CATALOG_DIR_NAME).glob("*") + d.name + for d in (datasets_lib_path / DATASETS_LIB_CATALOG_DIR_NAME).glob("*") if d.is_dir() and (d / (d.name + ".py")).is_file() # ignore json, csv etc. ) if dataset_names[0] == "--auto": @@ -245,7 +239,8 @@ def __call__(self, dataset_name: str) -> bool: ) dataset_names = sorted(d.name for d in (ROOT / HUB_DIR_NAME).glob("*") if d.is_dir()) dataset_names = sorted( - d.name for d in (datasets_lib_path / DATASETS_LIB_CATALOG_DIR_NAME).glob("*") + d.name + for d in (datasets_lib_path / DATASETS_LIB_CATALOG_DIR_NAME).glob("*") if d.is_dir() and (d / (d.name + ".py")).is_file() # ignore json, csv etc. ) else: @@ -268,7 +263,9 @@ def __call__(self, dataset_name: str) -> bool: ), dataset_names, ) - datasets_with_errors = [dataset_name for success, dataset_name in zip(successes, dataset_names) if not success] + datasets_with_errors = [ + dataset_name for success, dataset_name in zip(successes, dataset_names) if not success + ] if datasets_with_errors: raise UpdateFailed( f"Those datasets couldn't be updated: {' '.join(datasets_with_errors)}\n" diff --git a/.github/workflows/benchmarks.yaml b/.github/workflows/benchmarks.yaml index cc03b887ba7..c926a708b5d 100644 --- a/.github/workflows/benchmarks.yaml +++ b/.github/workflows/benchmarks.yaml @@ -3,30 +3,33 @@ on: [push] jobs: run: runs-on: [ubuntu-latest] - container: docker://dvcorg/cml-py3:latest + container: docker://dvcorg/cml:latest steps: - uses: actions/checkout@v2 - name: cml_run env: repo_token: ${{ secrets.GITHUB_TOKEN }} run: | + # See https://github.com/actions/checkout/issues/760 + git config --global --add safe.directory /__w/datasets/datasets + # Your ML workflow goes here pip install --upgrade pip pip install setuptools wheel pip install -e .[benchmarks] - # pyarrow==5.0.0 - pip install pyarrow==5.0.0 + # pyarrow==6.0.0 + pip install pyarrow==6.0.0 dvc repro --force git fetch --prune - dvc metrics diff --show-json master > report.json + dvc metrics diff --show-json main > report.json python ./benchmarks/format.py report.json report.md - echo "
\nShow benchmarks\n\nPyArrow==5.0.0\n" > final_report.md + echo "
\nShow benchmarks\n\nPyArrow==6.0.0\n" > final_report.md cat report.md >> final_report.md # pyarrow @@ -35,7 +38,7 @@ jobs: dvc repro --force git fetch --prune - dvc metrics diff --show-json master > report.json + dvc metrics diff --show-json main > report.json python ./benchmarks/format.py report.json report.md diff --git a/.github/workflows/build_documentation.yml b/.github/workflows/build_documentation.yml index 37ac93e3730..8dac4dc0d51 100644 --- a/.github/workflows/build_documentation.yml +++ b/.github/workflows/build_documentation.yml @@ -3,7 +3,7 @@ name: Build documentation on: push: branches: - - master + - main - doc-builder* - v*-release diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000000..e2f18e0b50e --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,74 @@ +name: CI + +on: + pull_request: + branches: + - main + push: + branches: + - main + +env: + HF_SCRIPTS_VERSION: main + HF_ALLOW_CODE_EVAL: 1 + +jobs: + + check_code_quality: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.7" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install .[quality] + - name: Check quality + run: | + black --check --line-length 119 --target-version py36 tests src benchmarks datasets metrics + isort --check-only tests src benchmarks datasets metrics + flake8 tests src benchmarks datasets metrics + + test: + needs: check_code_quality + strategy: + matrix: + test: ['unit', 'integration'] + os: [ubuntu-latest, windows-latest] + pyarrow_version: [latest, 6.0.1] + continue-on-error: ${{ matrix.test == 'integration' }} + runs-on: ${{ matrix.os }} + steps: + - name: Install OS dependencies + if: ${{ matrix.os == 'ubuntu-latest' }} + run: | + sudo apt-get -y update + sudo apt-get -y install libsndfile1 sox + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + - name: Set up Python 3.7 + uses: actions/setup-python@v4 + with: + python-version: 3.7 + - name: Upgrade pip + run: python -m pip install --upgrade pip + - name: Pin setuptools-scm + if: ${{ matrix.os == 'ubuntu-latest' }} + run: echo "installing pinned version of setuptools-scm to fix seqeval installation on 3.7" && pip install "setuptools-scm==6.4.2" + - name: Install dependencies + run: | + pip install .[tests] + pip install -r additional-tests-requirements.txt --no-deps + - name: Install latest PyArrow + if: ${{ matrix.pyarrow_version == 'latest' }} + run: pip install pyarrow --upgrade + - name: Install PyArrow ${{ matrix.pyarrow_version }} + if: ${{ matrix.pyarrow_version != 'latest' }} + run: pip install pyarrow==${{ matrix.pyarrow_version }} + - name: Test with pytest + run: | + python -m pytest -rfExX -m ${{ matrix.test }} -n 2 --dist loadfile -sv ./tests/ diff --git a/.github/workflows/test-audio.yml b/.github/workflows/test-audio.yml deleted file mode 100644 index 68e0b8f0b3b..00000000000 --- a/.github/workflows/test-audio.yml +++ /dev/null @@ -1,30 +0,0 @@ -name: Test audio - -on: - pull_request: - branches: - - master - -jobs: - test: - runs-on: ubuntu-latest - steps: - - name: Install OS dependencies - run: | - sudo apt-get update - sudo apt-get install libsndfile1 sox - - uses: actions/checkout@v2 - with: - ref: ${{ github.event.pull_request.head.sha }} - - name: Set up Python - uses: actions/setup-python@v2 - with: - python-version: "3.6" - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install .[tests,audio] - pip install pyarrow --upgrade - - name: Test audio with pytest - run: | - HF_SCRIPTS_VERSION=master python -m pytest -n 2 -sv ./tests/features/test_audio.py diff --git a/.github/workflows/update-hub-repositories.yaml b/.github/workflows/update-hub-repositories.yaml index 6b39d9c6537..3132d16c9f1 100644 --- a/.github/workflows/update-hub-repositories.yaml +++ b/.github/workflows/update-hub-repositories.yaml @@ -3,7 +3,7 @@ name: Update Hub repositories on: push: branches: - - master + - main jobs: update-hub-repositories: @@ -16,7 +16,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v2 with: - python-version: "3.6" + python-version: "3.7" - name: Set up default Git config run: | git config --global user.name system @@ -32,4 +32,5 @@ jobs: export HF_TOKEN=${{ secrets.HUB_TRUSTED_COMMITER_TOKEN }} export DATASETS_LIB_PATH=$GITHUB_WORKSPACE export HF_USE_PROD=1 + export GIT_LFS_SKIP_SMUDGE=1 python update_hub_repositories.py --auto diff --git a/ADD_NEW_DATASET.md b/ADD_NEW_DATASET.md index bc78ac67a4a..fd4a22f4eb4 100644 --- a/ADD_NEW_DATASET.md +++ b/ADD_NEW_DATASET.md @@ -1,6 +1,27 @@ # How to add one (or several) new datasets to 🤗 Datasets -## Start by preparing your environment +ADD DATASETS DIRECTLY ON THE 🤗 HUGGING FACE HUB ! + +You can share your dataset on https://huggingface.co/datasets directly using your account, see the documentation: + +* [Create a dataset and upload files](https://huggingface.co/docs/datasets/upload_dataset) +* [Advanced guide using dataset scripts](https://huggingface.co/docs/datasets/share) + +## What about the datasets scripts in this GitHub repository then ? + +Datasets used to be hosted in this GitHub repository, but all datasets have now been migrated to the Hugging Face Hub. +The legacy GitHub datasets were added originally on the GitHub repository and therefore don't have a namespace: "squad", "glue", etc. unlike the other datasets that are named "username/dataset_name" or "org/dataset_name". +Those datasets are still maintained on GitHub, and if you'd like to edit them, please open a Pull Request on the huggingface/datasets repository. + +Sharing your dataset to the Hub is the recommended way of adding a dataset. + +In some rare cases it makes more sense to open a PR on GitHub. For example when you are not the author of the dataset and there is no clear organization / namespace that you can put the dataset under. + +The following presents how to open a Pull Request on GitHub to add a new dataset to this repository. + +## Add a new dataset to this repository (legacy) + +### Start by preparing your environment 1. Fork the [repository](https://github.com/huggingface/datasets) by clicking on the 'Fork' button on the repository's page. This creates a copy of the code under your GitHub user account. @@ -29,9 +50,9 @@ This creates a copy of the code under your GitHub user account. Now you are ready, each time you want to add a new dataset, follow the steps in the following section: -## Adding a new dataset +### Adding a new dataset -### Understand the structure of the dataset +#### Understand the structure of the dataset 1. Find a short-name for the dataset: @@ -49,11 +70,11 @@ You are now ready to start the process of adding the dataset. We will create the ```bash git fetch upstream - git rebase upstream/master + git rebase upstream/main git checkout -b a-descriptive-name-for-my-changes ``` - **Do not** work on the `master` branch. + **Do not** work on the `main` branch. 3. Create your dataset folder under `datasets/`: @@ -75,44 +96,46 @@ You are now ready to start the process of adding the dataset. We will create the - Download/open the data to see how it looks like - While you explore and read about the dataset, you can complete some sections of the dataset card (the online form or the one you have just created at `./datasets//README.md`). You can just copy the information you meet in your readings in the relevant sections of the dataset card (typically in `Dataset Description`, `Dataset Structure` and `Dataset Creation`). - If you need more information on a section of the dataset card, a detailed guide is in the `README_guide.md` here: https://github.com/huggingface/datasets/blob/master/templates/README_guide.md. + If you need more information on a section of the dataset card, a detailed guide is in the `README_guide.md` here: https://github.com/huggingface/datasets/blob/main/templates/README_guide.md. - There is a also a (very detailed) example here: https://github.com/huggingface/datasets/tree/master/datasets/eli5. + There is a also a (very detailed) example here: https://github.com/huggingface/datasets/tree/main/datasets/eli5. Don't spend too much time completing the dataset card, just copy what you find when exploring the dataset documentation. If you can't find all the information it's ok. You can always spend more time completing the dataset card while we are reviewing your PR (see below) and the dataset card will be open for everybody to complete them afterwards. If you don't know what to write in a section, just leave the `[More Information Needed]` text. -### Write the loading/processing code +#### Write the loading/processing code Now let's get coding :-) The dataset script is the main entry point to load and process the data. It is a python script under `datasets//.py`. -There is a detailed explanation on how the library and scripts are organized [here](https://huggingface.co/docs/datasets/master/about_dataset_load.html). +There is a detailed explanation on how the library and scripts are organized [here](https://huggingface.co/docs/datasets/main/about_dataset_load.html). Note on naming: the dataset class should be camel case, while the dataset short_name is its snake case equivalent (ex: `class BookCorpus` for the dataset `book_corpus`). -To add a new dataset, you can start from the empty template which is [in the `templates` folder](https://github.com/huggingface/datasets/blob/master/templates/new_dataset_script.py): +To add a new dataset, you can start from the empty template which is [in the `templates` folder](https://github.com/huggingface/datasets/blob/main/templates/new_dataset_script.py): ```bash cp ./templates/new_dataset_script.py ./datasets//.py ``` -And then go progressively through all the `TODO` in the template 🙂. If it's your first dataset addition and you are a bit lost among the information to fill in, you can take some time to read the [detailed explanation here](https://huggingface.co/docs/datasets/master/dataset_script.html). +And then go progressively through all the `TODO` in the template 🙂. If it's your first dataset addition and you are a bit lost among the information to fill in, you can take some time to read the [detailed explanation here](https://huggingface.co/docs/datasets/main/dataset_script.html). You can also start (or copy any part) from one of the datasets of reference listed below. The main criteria for choosing among these reference dataset is the format of the data files (JSON/JSONL/CSV/TSV/text) and whether you need or don't need several configurations (see above explanations on configurations). Feel free to reuse any parts of the following examples and adapt them to your case: -- question-answering: [squad](https://github.com/huggingface/datasets/blob/master/datasets/squad/squad.py) (original data are in json) -- natural language inference: [snli](https://github.com/huggingface/datasets/blob/master/datasets/snli/snli.py) (original data are in text files with tab separated columns) -- POS/NER: [conll2003](https://github.com/huggingface/datasets/blob/master/datasets/conll2003/conll2003.py) (original data are in text files with one token per line) -- sentiment analysis: [allocine](https://github.com/huggingface/datasets/blob/master/datasets/allocine/allocine.py) (original data are in jsonl files) -- text classification: [ag_news](https://github.com/huggingface/datasets/blob/master/datasets/ag_news/ag_news.py) (original data are in csv files) -- translation: [flores](https://github.com/huggingface/datasets/blob/master/datasets/flores/flores.py) (original data come from text files - one per language) -- summarization: [billsum](https://github.com/huggingface/datasets/blob/master/datasets/billsum/billsum.py) (original data are in json files) -- benchmark: [glue](https://github.com/huggingface/datasets/blob/master/datasets/glue/glue.py) (original data are various formats) -- multilingual: [xquad](https://github.com/huggingface/datasets/blob/master/datasets/xquad/xquad.py) (original data are in json) -- multitask: [matinf](https://github.com/huggingface/datasets/blob/master/datasets/matinf/matinf.py) (original data need to be downloaded by the user because it requires authentication) -- speech recognition: [librispeech_asr](https://github.com/huggingface/datasets/blob/master/datasets/librispeech_asr/librispeech_asr.py) (original data is in .flac format) +- question-answering: [squad](https://github.com/huggingface/datasets/blob/main/datasets/squad/squad.py) (original data are in json) +- natural language inference: [snli](https://github.com/huggingface/datasets/blob/main/datasets/snli/snli.py) (original data are in text files with tab separated columns) +- POS/NER: [conll2003](https://github.com/huggingface/datasets/blob/main/datasets/conll2003/conll2003.py) (original data are in text files with one token per line) +- sentiment analysis: [allocine](https://github.com/huggingface/datasets/blob/main/datasets/allocine/allocine.py) (original data are in jsonl files) +- text classification: [ag_news](https://github.com/huggingface/datasets/blob/main/datasets/ag_news/ag_news.py) (original data are in csv files) +- translation: [flores](https://github.com/huggingface/datasets/blob/main/datasets/flores/flores.py) (original data come from text files - one per language) +- summarization: [billsum](https://github.com/huggingface/datasets/blob/main/datasets/billsum/billsum.py) (original data are in json files) +- benchmark: [glue](https://github.com/huggingface/datasets/blob/main/datasets/glue/glue.py) (original data are various formats) +- multilingual: [xquad](https://github.com/huggingface/datasets/blob/main/datasets/xquad/xquad.py) (original data are in json) +- multitask: [matinf](https://github.com/huggingface/datasets/blob/main/datasets/matinf/matinf.py) (original data need to be downloaded by the user because it requires authentication) +- speech recognition: [librispeech_asr](https://github.com/huggingface/datasets/blob/main/datasets/librispeech_asr/librispeech_asr.py) (original data is in .flac format) +- image classification: [beans](https://github.com/huggingface/datasets/blob/main/datasets/beans/beans.py) (original data are in .jpg format) +- object detection: [wider_face](https://github.com/huggingface/datasets/blob/main/datasets/wider_face/wider_face.py) (image files are in .jpg format and metadata come from text files) While you are developing the dataset script you can list test it by opening a python interpreter and running the script (the script is dynamically updated each time you modify it): @@ -155,7 +178,7 @@ datasets-cli test datasets/ --save_infos --all_configs --da ``` To have the configs use the path from `--data_dir` when generating them. -### Automatically add code metadata +#### Automatically add code metadata Now that your dataset script runs and create a dataset with the format you expected, you can add the JSON metadata and test data. @@ -222,7 +245,7 @@ Note: You can use the CLI tool from the root of the repository with the followin python src/datasets/commands/datasets_cli.py ``` -### Open a Pull Request on the main HuggingFace repo and share your work!! +#### Open a Pull Request on the main HuggingFace repo and share your work!! Here are the step to open the Pull-Request on the main repo. @@ -265,18 +288,18 @@ Here are the step to open the Pull-Request on the main repo. It is a good idea to sync your copy of the code with the original repository regularly. This way you can quickly account for changes: - - If you haven't pushed your branch yet, you can rebase on upstream/master: + - If you haven't pushed your branch yet, you can rebase on upstream/main: ```bash git fetch upstream - git rebase upstream/master + git rebase upstream/main ``` - If you have already pushed your branch, do not rebase but merge instead: ```bash git fetch upstream - git merge upstream/master + git merge upstream/main ``` Push the changes to your account using: @@ -291,7 +314,7 @@ Congratulation you have open a PR to add a new dataset 🙏 **Important note:** In order to merge your Pull Request the maintainers will require you to tag and add a dataset card. Here is now how to do this last step: -### Tag the dataset and write the dataset card +#### Tag the dataset and write the dataset card Each dataset is provided with a dataset card. @@ -313,7 +336,7 @@ Creating the dataset card goes in two steps: - **Very important as well:** On the right side of the tagging app, you will also find an expandable section called **Show Markdown Data Fields**. This gives you a starting point for the description of the fields in your dataset: you should paste it into the **Data Fields** section of the [online form](https://huggingface.co/datasets/card-creator/) (or your local README.md), then modify the description as needed. Briefly describe each of the fields and indicate if they have a default value (e.g. when there is no label). If the data has span indices, describe their attributes (character level or word level, contiguous or not, etc). If the datasets contains example IDs, state whether they have an inherent meaning, such as a mapping to other datasets or pointing to relationships between data points. - Example from the [ELI5 card](https://github.com/huggingface/datasets/tree/master/datasets/eli5#data-fields): + Example from the [ELI5 card](https://github.com/huggingface/datasets/tree/main/datasets/eli5#data-fields): Data Fields: - q_id: a string question identifier for each example, corresponding to its ID in the Pushshift.io Reddit submission dumps. @@ -322,9 +345,9 @@ Creating the dataset card goes in two steps: - title_urls: list of the extracted URLs, the nth element of the list was replaced by URL_n - - **Very nice to have but optional for now:** Complete all you can find in the dataset card using the detailed instructions for completed it which are in the `README_guide.md` here: https://github.com/huggingface/datasets/blob/master/templates/README_guide.md. + - **Very nice to have but optional for now:** Complete all you can find in the dataset card using the detailed instructions for completed it which are in the `README_guide.md` here: https://github.com/huggingface/datasets/blob/main/templates/README_guide.md. - Here is a completed example: https://github.com/huggingface/datasets/tree/master/datasets/eli5 for inspiration + Here is a completed example: https://github.com/huggingface/datasets/tree/main/datasets/eli5 for inspiration If you don't know what to write in a field and can find it, write: `[More Information Needed]` diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 05b483e8919..89e787256bf 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -41,7 +41,7 @@ If you would like to work on any of the open Issues: git checkout -b a-descriptive-name-for-my-changes ``` - **do not** work on the `master` branch. + **do not** work on the `main` branch. 4. Set up a development environment by running the following command in a virtual environment: @@ -73,7 +73,7 @@ If you would like to work on any of the open Issues: ```bash git fetch upstream - git rebase upstream/master + git rebase upstream/main ``` Push the changes to your account using: @@ -86,83 +86,26 @@ If you would like to work on any of the open Issues: ## How to add a dataset -A [more complete guide](https://github.com/huggingface/datasets/blob/master/ADD_NEW_DATASET.md) to adding a dataset was written for our December 2020 `datasets` sprint, we recommend reading through it before you start the process. Here is a summary of the steps described there: +You can share your dataset on https://huggingface.co/datasets directly using your account, see the documentation: -1. Make sure you followed steps 1-4 of the section [*How to contribute to datasets?*](#how-to-contribute-to-datasets). - -2. Create your dataset folder under `datasets/` and create your dataset script under `datasets//.py`. You can check out other dataset scripts under `datasets` for some inspiration. Note on naming: the dataset class should be camel case, while the dataset name is its snake case equivalent (ex: `class BookCorpus(datasets.GeneratorBasedBuilder)` for the dataset `book_corpus`). - -3. **Make sure you run all of the following commands from the root of your `datasets` git clone.** To check that your dataset works correctly and to create its `dataset_infos.json` file run the command: - - ```bash - datasets-cli test datasets/ --save_infos --all_configs - ``` - -4. If the command was succesful, you should now create some dummy data. Use the following command to get in-detail instructions on how to create the dummy data: - - ```bash - datasets-cli dummy_data datasets/ - ``` - - There is a tool that automatically generates dummy data for you. At the moment it supports data files in the following format: txt, csv, tsv, jsonl, json, xml. - If the extensions of the raw data files of your dataset are in this list, then you can automatically generate your dummy data with: - - ```bash - datasets-cli dummy_data datasets/ --auto_generate - ``` - -5. Now test that both the real data and the dummy data work correctly using the following commands: - - *For the real data*: - ```bash - RUN_SLOW=1 pytest tests/test_dataset_common.py::LocalDatasetTest::test_load_real_dataset_ - ``` - and - - *For the dummy data*: - ```bash - RUN_SLOW=1 pytest tests/test_dataset_common.py::LocalDatasetTest::test_load_dataset_all_configs_ - ``` - -6. Finally, take some time to document your dataset for other users. Each dataset should be accompanied by a `README.md` dataset card in its directory which describes the data and contains tags representing languages and tasks supported to be easily discoverable. You can find information on how to fill out the card either manually or by using our [web app](https://huggingface.co/datasets/card-creator/) in the following [guide](https://github.com/huggingface/datasets/blob/master/templates/README_guide.md). - -7. If all tests pass, your dataset works correctly. Awesome! You can now follow steps 6, 7 and 8 of the section [*How to contribute to 🤗 Datasets?*](#how-to-contribute-to-Datasets). If you experience problems with the dummy data tests, you might want to take a look at the section *Help for dummy data tests* below. - - - -### Help for dummy data tests - -Follow these steps in case the dummy data test keeps failing: - -- Verify that all filenames are spelled correctly. Rerun the command - ```bash - datasets-cli dummy_data datasets/ - ``` - and make sure you follow the exact instructions provided by the command of step 5). - -- Your datascript might require a difficult dummy data structure. In this case make sure you fully understand the data folder logit created by the function `_split_generators(...)` and expected by the function `_generate_examples(...)` of your dataset script. Also take a look at `tests/README.md` which lists different possible cases of how the dummy data should be created. - -- If the dummy data tests still fail, open a PR in the repo anyways and make a remark in the description that you need help creating the dummy data. - -If you're looking for more details about dataset scripts creation, please refer to the [documentation](https://huggingface.co/docs/datasets/master/dataset_script). - -Note: You can use the CLI tool from the root of the repository with the following command: -```bash -python src/datasets/commands/datasets_cli.py -``` +* [Create a dataset and upload files](https://huggingface.co/docs/datasets/upload_dataset) +* [Advanced guide using dataset scripts](https://huggingface.co/docs/datasets/share) ## How to contribute to the dataset cards Improving the documentation of datasets is an ever increasing effort and we invite users to contribute by sharing their insights with the community in the `README.md` dataset cards provided for each dataset. -If you see that a dataset card is missing information that you are in a position to provide (as an author of the dataset or as an experienced user), the best thing you can do is to open a Pull Request with the updated `README.md` file. We provide: -- a [template](https://github.com/huggingface/datasets/blob/master/templates/README.md) -- a [guide](https://github.com/huggingface/datasets/blob/master/templates/README_guide.md) describing what information should go into each of the paragraphs -- and if you need inspiration, we recommend looking through a [completed example](https://github.com/huggingface/datasets/blob/master/datasets/eli5/README.md) +If you see that a dataset card is missing information that you are in a position to provide (as an author of the dataset or as an experienced user), the best thing you can do is to open a Pull Request on the Hugging Face Hub. To to do, go to the "Files and versions" tab of the dataset page and edit the `README.md` file. We provide: + +* a [template](https://github.com/huggingface/datasets/blob/main/templates/README.md) +* a [guide](https://github.com/huggingface/datasets/blob/main/templates/README_guide.md) describing what information should go into each of the paragraphs +* and if you need inspiration, we recommend looking through a [completed example](https://github.com/huggingface/datasets/blob/main/datasets/eli5/README.md) + +Note that datasets that are outside of a namespace (`squad`, `imagenet-1k`, etc.) are maintained on GitHub. In this case you have to open a Pull request on GitHub to edit the file at `datasets//README.md`. If you are a **dataset author**... you know what to do, it is your dataset after all ;) ! We would especially appreciate if you could help us fill in information about the process of creating the dataset, and take a moment to reflect on its social impact and possible limitations if you haven't already done so in the dataset paper or in another data statement. -If you are a **user of a dataset**, the main source of information should be the dataset paper if it is available: we recommend pulling information from there into the relevant paragraphs of the template. We also eagerly welcome discussions on the [Considerations for Using the Data](https://github.com/huggingface/datasets/blob/master/templates/README_guide.md#considerations-for-using-the-data) based on existing scholarship or personal experience that would benefit the whole community. +If you are a **user of a dataset**, the main source of information should be the dataset paper if it is available: we recommend pulling information from there into the relevant paragraphs of the template. We also eagerly welcome discussions on the [Considerations for Using the Data](https://github.com/huggingface/datasets/blob/main/templates/README_guide.md#considerations-for-using-the-data) based on existing scholarship or personal experience that would benefit the whole community. Finally, if you want more information on the how and why of dataset cards, we strongly recommend reading the foundational works [Datasheets for Datasets](https://arxiv.org/abs/1803.09010) and [Data Statements for NLP](https://www.aclweb.org/anthology/Q18-1041/). @@ -170,5 +113,5 @@ Thank you for your contribution! ## Code of conduct -This project adheres to the HuggingFace [code of conduct](CODE_OF_CONDUCT.md). +This project adheres to the HuggingFace [code of conduct](CODE_OF_CONDUCT.md). By participating, you are expected to uphold this code. diff --git a/Makefile b/Makefile index e3615d44ed0..b7936753dba 100644 --- a/Makefile +++ b/Makefile @@ -3,14 +3,14 @@ # Check that source code meets quality standards quality: - black --check --line-length 119 --target-version py36 tests src benchmarks datasets/**/*.py metrics + black --check --line-length 119 --target-version py37 tests src benchmarks datasets/**/*.py metrics isort --check-only tests src benchmarks datasets/**/*.py metrics flake8 tests src benchmarks datasets/**/*.py metrics # Format source code automatically style: - black --line-length 119 --target-version py36 tests src benchmarks datasets/**/*.py metrics + black --line-length 119 --target-version py37 tests src benchmarks datasets/**/*.py metrics isort tests src benchmarks datasets/**/*.py metrics # Run tests for the library diff --git a/README.md b/README.md index c7aba5d3ce7..348c7bc5464 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,13 @@


- +

- - Build + + Build - + GitHub @@ -27,20 +27,21 @@ 🤗 Datasets is a lightweight library providing **two** main features: -- **one-line dataloaders for many public datasets**: one-liners to download and pre-process any of the ![number of datasets](https://img.shields.io/endpoint?url=https://huggingface.co/api/shields/datasets&color=brightgreen) major public datasets (in 467 languages and dialects!) provided on the [HuggingFace Datasets Hub](https://huggingface.co/datasets). With a simple command like `squad_dataset = load_dataset("squad")`, get any of these datasets ready to use in a dataloader for training/evaluating a ML model (Numpy/Pandas/PyTorch/TensorFlow/JAX), -- **efficient data pre-processing**: simple, fast and reproducible data pre-processing for the above public datasets as well as your own local datasets in CSV/JSON/text. With simple commands like `tokenized_dataset = dataset.map(tokenize_example)`, efficiently prepare the dataset for inspection and ML model evaluation and training. +- **one-line dataloaders for many public datasets**: one-liners to download and pre-process any of the ![number of datasets](https://img.shields.io/endpoint?url=https://huggingface.co/api/shields/datasets&color=brightgreen) major public datasets (text datasets in 467 languages and dialects, image datasets, audio datasets, etc.) provided on the [HuggingFace Datasets Hub](https://huggingface.co/datasets). With a simple command like `squad_dataset = load_dataset("squad")`, get any of these datasets ready to use in a dataloader for training/evaluating a ML model (Numpy/Pandas/PyTorch/TensorFlow/JAX), +- **efficient data pre-processing**: simple, fast and reproducible data pre-processing for the above public datasets as well as your own local datasets in CSV/JSON/text/PNG/JPEG/etc. With simple commands like `processed_dataset = dataset.map(process_example)`, efficiently prepare the dataset for inspection and ML model evaluation and training. -[🎓 **Documentation**](https://huggingface.co/docs/datasets/) [🕹 **Colab tutorial**](https://colab.research.google.com/github/huggingface/datasets/blob/master/notebooks/Overview.ipynb) +[🎓 **Documentation**](https://huggingface.co/docs/datasets/) [🕹 **Colab tutorial**](https://colab.research.google.com/github/huggingface/datasets/blob/main/notebooks/Overview.ipynb) -[🔎 **Find a dataset in the Hub**](https://huggingface.co/datasets) [🌟 **Add a new dataset to the Hub**](https://github.com/huggingface/datasets/blob/master/ADD_NEW_DATASET.md) +[🔎 **Find a dataset in the Hub**](https://huggingface.co/datasets) [🌟 **Add a new dataset to the Hub**](https://github.com/huggingface/datasets/blob/main/ADD_NEW_DATASET.md)

- +

-🤗 Datasets also provides access to +15 evaluation metrics and is designed to let the community easily add and share new datasets and evaluation metrics. +🤗 Datasets is designed to let the community easily add and share new datasets. 🤗 Datasets has many additional interesting features: + - Thrive on large datasets: 🤗 Datasets naturally frees the user from RAM memory limitation, all datasets are memory-mapped using an efficient zero-serialization cost backend (Apache Arrow). - Smart caching: never wait for your data to process several times. - Lightweight and fast with a transparent and pythonic API (multi-processing/caching/memory-mapping). @@ -68,13 +69,13 @@ conda install -c huggingface -c conda-forge datasets Follow the installation pages of TensorFlow and PyTorch to see how to install them with conda. -For more details on installation, check the installation page in the documentation: https://huggingface.co/docs/datasets/installation.html +For more details on installation, check the installation page in the documentation: https://huggingface.co/docs/datasets/installation ## Installation to use with PyTorch/TensorFlow/pandas If you plan to use 🤗 Datasets with PyTorch (1.0+), TensorFlow (2.2+) or pandas, you should also install PyTorch, TensorFlow or pandas. -For more details on using the library with NumPy, pandas, PyTorch or TensorFlow, check the quick start page in the documentation: https://huggingface.co/docs/datasets/quickstart.html +For more details on using the library with NumPy, pandas, PyTorch or TensorFlow, check the quick start page in the documentation: https://huggingface.co/docs/datasets/quickstart # Usage @@ -82,13 +83,13 @@ For more details on using the library with NumPy, pandas, PyTorch or TensorFlow, - `datasets.list_datasets()` to list the available datasets - `datasets.load_dataset(dataset_name, **kwargs)` to instantiate a dataset -- `datasets.list_metrics()` to list the available metrics -- `datasets.load_metric(metric_name, **kwargs)` to instantiate a metric + +This library can be used for text/image/audio/etc. datasets. Here is an example to load a text dataset: Here is a quick example: ```python -from datasets import list_datasets, load_dataset, list_metrics, load_metric +from datasets import list_datasets, load_dataset # Print all the available datasets print(list_datasets()) @@ -97,12 +98,6 @@ print(list_datasets()) squad_dataset = load_dataset('squad') print(squad_dataset['train'][0]) -# List all the available metrics -print(list_metrics()) - -# Load a metric -squad_metric = load_metric('squad') - # Process the dataset - add a column with the length of the context texts dataset_with_length = squad_dataset.map(lambda x: {"length": len(x["context"])}) @@ -115,26 +110,29 @@ tokenized_dataset = squad_dataset.map(lambda x: tokenizer(x['context']), batched For more details on using the library, check the quick start page in the documentation: https://huggingface.co/docs/datasets/quickstart.html and the specific pages on: -- Loading a dataset https://huggingface.co/docs/datasets/loading.html -- What's in a Dataset: https://huggingface.co/docs/datasets/access.html -- Processing data with 🤗 Datasets: https://huggingface.co/docs/datasets/process.html -- Writing your own dataset loading script: https://huggingface.co/docs/datasets/dataset_script.html +- Loading a dataset https://huggingface.co/docs/datasets/loading +- What's in a Dataset: https://huggingface.co/docs/datasets/access +- Processing data with 🤗 Datasets: https://huggingface.co/docs/datasets/process +- Processing audio data: https://huggingface.co/docs/datasets/audio_process +- Processing image data: https://huggingface.co/docs/datasets/image_process +- Writing your own dataset loading script: https://huggingface.co/docs/datasets/dataset_script - etc. Another introduction to 🤗 Datasets is the tutorial on Google Colab here: -[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/datasets/blob/master/notebooks/Overview.ipynb) +[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/datasets/blob/main/notebooks/Overview.ipynb) # Add a new dataset to the Hub We have a very detailed step-by-step guide to add a new dataset to the ![number of datasets](https://img.shields.io/endpoint?url=https://huggingface.co/api/shields/datasets&color=brightgreen) datasets already provided on the [HuggingFace Datasets Hub](https://huggingface.co/datasets). -You will find [the step-by-step guide here](https://github.com/huggingface/datasets/blob/master/ADD_NEW_DATASET.md) to add a dataset to this repository. +You will find [the step-by-step guide here](https://huggingface.co/docs/datasets/share.html) to add a dataset on the Hub. -You can also have your own repository for your dataset on the Hub under your or your organization's namespace and share it with the community. More information in [the documentation section about dataset sharing](https://huggingface.co/docs/datasets/share.html). +However if you prefer to add your dataset in this repository, you can find the guide [here](https://github.com/huggingface/datasets/blob/main/ADD_NEW_DATASET.md). # Main differences between 🤗 Datasets and `tfds` If you are familiar with the great TensorFlow Datasets, here are the main differences between 🤗 Datasets and `tfds`: + - the scripts in 🤗 Datasets are not provided within the library but are queried, downloaded/cached and dynamically loaded upon request - 🤗 Datasets also provides evaluation metrics in a similar fashion to the datasets, i.e. as dynamically installed scripts with a unified API. This gives access to the pair of a benchmark dataset and a benchmark metric for instance for benchmarks like [SQuAD](https://rajpurkar.github.io/SQuAD-explorer/) or [GLUE](https://gluebenchmark.com/). - the backend serialization of 🤗 Datasets is based on [Apache Arrow](https://arrow.apache.org/) instead of TF Records and leverage python dataclasses for info and features with some diverging features (we mostly don't do encoding and store the raw data as much as possible in the backend serialization cache). @@ -142,12 +140,13 @@ If you are familiar with the great TensorFlow Datasets, here are the main differ # Disclaimers -Similar to TensorFlow Datasets, 🤗 Datasets is a utility library that downloads and prepares public datasets. We do not host or distribute these datasets, vouch for their quality or fairness, or claim that you have license to use them. It is your responsibility to determine whether you have permission to use the dataset under the dataset's license. +Similar to TensorFlow Datasets, 🤗 Datasets is a utility library that downloads and prepares public datasets. We do not host or distribute most of these datasets, vouch for their quality or fairness, or claim that you have license to use them. It is your responsibility to determine whether you have permission to use the dataset under the dataset's license. If you're a dataset owner and wish to update any part of it (description, citation, etc.), or do not want your dataset to be included in this library, please get in touch through a [GitHub issue](https://github.com/huggingface/datasets/issues/new). Thanks for your contribution to the ML community! ## BibTeX -If you want to cite our 🤗 Datasets [paper](https://arxiv.org/abs/2109.02846) and library, you can use these: + +If you want to cite our 🤗 Datasets library, you can use our [paper](https://arxiv.org/abs/2109.02846): ```bibtex @inproceedings{lhoest-etal-2021-datasets, @@ -197,45 +196,5 @@ If you want to cite our 🤗 Datasets [paper](https://arxiv.org/abs/2109.02846) primaryClass={cs.CL}, } ``` -```bibtex -@software{quentin_lhoest_2021_5639822, - author = {Quentin Lhoest and - Albert Villanova del Moral and - Patrick von Platen and - Thomas Wolf and - Mario Šaško and - Yacine Jernite and - Abhishek Thakur and - Lewis Tunstall and - Suraj Patil and - Mariama Drame and - Julien Chaumond and - Julien Plu and - Joe Davison and - Simon Brandeis and - Victor Sanh and - Teven Le Scao and - Kevin Canwen Xu and - Nicolas Patry and - Steven Liu and - Angelina McMillan-Major and - Philipp Schmid and - Sylvain Gugger and - Nathan Raw and - Sylvain Lesage and - Anton Lozhkov and - Matthew Carrigan and - Théo Matussière and - Leandro von Werra and - Lysandre Debut and - Stas Bekman and - Clément Delangue}, - title = {huggingface/datasets: 1.15.1}, - month = nov, - year = 2021, - publisher = {Zenodo}, - version = {1.15.1}, - doi = {10.5281/zenodo.5639822}, - url = {https://doi.org/10.5281/zenodo.5639822} -} -``` + +If you need to cite a specific version of our 🤗 Datasets library for reproducibility, you can use the corresponding version Zenodo DOI from this [list](https://zenodo.org/search?q=conceptrecid:%224817768%22&sort=-version&all_versions=True). diff --git a/SECURITY.md b/SECURITY.md index a624cb57fdb..90c1b5edcfd 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -16,8 +16,9 @@ currently being supported with security updates. Each major version is currently being supported with security updates. | Version | Supported | -| ------- | ------------------ | +|---------|--------------------| | 1.x.x | :white_check_mark: | +| 2.x.x | :white_check_mark: | ## Reporting a Vulnerability @@ -29,4 +30,4 @@ reported vulnerability, what to expect if the vulnerability is accepted or declined, etc. --> -To report a security vulnerability, please contact: feedback@huggingface.co +To report a security vulnerability, please contact: security@huggingface.co diff --git a/additional-tests-requirements.txt b/additional-tests-requirements.txt index a827c308c9f..00b5b8d62a3 100644 --- a/additional-tests-requirements.txt +++ b/additional-tests-requirements.txt @@ -1,4 +1,4 @@ -unbabel-comet>=1.0.0;python_version>'3.6' +unbabel-comet>=1.0.0 git+https://github.com/google-research/bleurt.git git+https://github.com/ns-moosavi/coval.git git+https://github.com/hendrycks/math.git diff --git a/datasets/acronym_identification/README.md b/datasets/acronym_identification/README.md index a2e205679fd..732633f28ec 100644 --- a/datasets/acronym_identification/README.md +++ b/datasets/acronym_identification/README.md @@ -3,9 +3,9 @@ annotations_creators: - expert-generated language_creators: - found -languages: +language: - en -licenses: +license: - mit multilinguality: - monolingual @@ -14,11 +14,20 @@ size_categories: source_datasets: - original task_categories: -- structure-prediction +- token-classification task_ids: -- structure-prediction-other-acronym-identification +- token-classification-other-acronym-identification paperswithcode_id: acronym-identification pretty_name: Acronym Identification Dataset +train-eval-index: +- config: default + task: token-classification + task_id: entity_extraction + splits: + eval_split: test + col_mapping: + tokens: tokens + labels: tags --- # Dataset Card for Acronym Identification Dataset diff --git a/datasets/ade_corpus_v2/README.md b/datasets/ade_corpus_v2/README.md index 5d375ed9e2b..e90b4d70e64 100644 --- a/datasets/ade_corpus_v2/README.md +++ b/datasets/ade_corpus_v2/README.md @@ -3,37 +3,78 @@ annotations_creators: - expert-generated language_creators: - found -languages: +language: - en -licenses: +license: - unknown multilinguality: - monolingual size_categories: - Ade_corpus_v2_classification: - - 10K None: + """Check that the requested task name is a valid bigbench json task.""" + if task_name in bb_utils.get_all_json_task_names(): + return + elif task_name in bb_utils.get_all_programmatic_task_names(): + raise ValueError( + "BIG-Bench does not support programmatic tasks through HuggingFace datasets" + f"Please see {_HOMEPAGE} for more information for how to interact with the programmatic tasks." + ) + else: + raise ValueError( + f"Invalid task_name. Got task_name = {task_name}. Please choose one from:\n -- " + + "\n -- ".join(bb_utils.get_all_json_task_names()) + ) + + +def validate_subtask_name(task_name: str, subtask_name: str) -> None: + """Check that the requested subtask name is a valid bigbench subtask.""" + subtasks = [name.split(":")[-1] for name in bb_utils.get_subtask_names_from_task(task_name)] + if not subtasks: + raise ValueError(f"Task {task_name} has no subtasks. Got subtask_name = {subtask_name}.") + elif subtask_name not in subtasks: + raise ValueError( + f"Invalid subtask_name {subtask_name} for task {task_name}. Please choose one from:\n -- " + + "\n -- ".join(subtasks) + ) + + +class BigBenchConfig(datasets.BuilderConfig): + def __init__( + self, + name, + subtask_name: Optional[str] = None, + num_shots: int = 0, + max_examples: Optional[int] = None, + **kwargs, + ): + if subtask_name is not None: + name += f"_subtask={subtask_name}" + if num_shots != 0: + name += f"_num_shots={num_shots}" + if max_examples is not None: + name += f"_max_examples={max_examples}" + super().__init__( + name=name, + **kwargs, + ) + """BIG-bench configuration. + + Args: + name: BIG-bench task name. + subtask_name: BIG-bench subtask name. Accepts both "task_name:subtask_name" and "subtask_name" formats. + num_shots: Number of few-shot examples in input prompt. Default is zero. + max_examples: Limit number of examples for each task. Default is including all examples. + """ + self.task_name = name + self.subtask_name = subtask_name + self.num_shots = num_shots + self.max_examples = max_examples + + +class Bigbench(datasets.GeneratorBasedBuilder): + """The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark + intended to probe large language models, and extrapolate their future capabilities.""" + + VERSION = datasets.Version("1.0.0") + + BUILDER_CONFIG_CLASS = BigBenchConfig + + BUILDER_CONFIGS = [ + BigBenchConfig(name=name, version=datasets.Version("1.0.0")) for name in bb_utils.get_all_json_task_names() + ] + + def _info(self): + features = datasets.Features( + { + "idx": datasets.Value("int32"), + "inputs": datasets.Value("string"), + "targets": datasets.Sequence(datasets.Value("string")), + "multiple_choice_targets": datasets.Sequence(datasets.Value("string")), + "multiple_choice_scores": datasets.Sequence(datasets.Value("int32")), + } + ) + return datasets.DatasetInfo( + # This is the description that will appear on the datasets page. + description=_DESCRIPTION, + # This defines the different columns of the dataset and their types + features=features, # Here we define them above because they are different between the two configurations + # If there's a common (input, target) tuple from the features, + # specify them here. They'll be used if as_supervised=True in + # builder.as_dataset. + supervised_keys=None, + # Homepage of the dataset for documentation + homepage=_HOMEPAGE, + # License for the dataset if available + license=_LICENSE, + # Citation for the dataset + citation=_CITATION, + ) + + def _split_generators(self, dl_manager): + """Returns SplitGenerators.""" + return [ + datasets.SplitGenerator( + name=datasets.splits.NamedSplit("default"), # TODO(ajandreassen): Is there a way to call this 'all'? + # These kwargs will be passed to _generate_examples + gen_kwargs={ + "split": "all", + }, + ), + datasets.SplitGenerator( + name=datasets.Split.TRAIN, + # These kwargs will be passed to _generate_examples + gen_kwargs={ + "split": "train", + }, + ), + datasets.SplitGenerator( + name=datasets.Split.VALIDATION, + # These kwargs will be passed to _generate_examples + gen_kwargs={ + "split": "validation", + }, + ), + ] + + def _generate_examples( + self, + split, # method parameters are unpacked from `gen_kwargs` as given in `_split_generators` + ): + validate_task_name(self.config.task_name) + if self.config.subtask_name: + # Subtasks are sometimes in bigbench written as task_name:subtask_name. + # We want to remove the task_name from the subtask names: + self.config.subtask_name = self.config.subtask_name.split(":")[-1] + validate_subtask_name(self.config.task_name, self.config.subtask_name) + + """Yields examples as (key, example) tuples.""" + if split == "all": + # not cutoff in number of examples for 'all' split + MIN_VALIDATION_EXAMPLES = 0 + else: + MIN_VALIDATION_EXAMPLES = 16 + + try: + task_path, json_util = bb_json_paths.get_task_path(self.config.task_name) + + has_subtasks = bb_json_paths.has_subtasks(self.config.task_name) + if has_subtasks: + subtask_names = bb_json_paths.get_subtask_names(self.config.task_name) + num_subtasks = len(subtask_names) + min_validation_examples_per_subtask = div_or_none(MIN_VALIDATION_EXAMPLES, num_subtasks) + + if not has_subtasks: + ds_fn = bbb.get_dataset_fn( + task_name=self.config.task_name, + task_path=task_path, + subtask_name=None, + num_shots=self.config.num_shots, + bigbench_task_type=bbb.BigBenchTaskType.HUGGINGFACE, + max_examples=self.config.max_examples, + json_util=json_util, + min_validation_examples=MIN_VALIDATION_EXAMPLES, + format_fn=json_task.default_format_fn, + ) + ds_list = [ds_fn(split)] + elif self.config.subtask_name is not None: + ds_fn = bbb.get_dataset_fn( + task_name=self.config.task_name, + task_path=task_path, + subtask_name=self.config.subtask_name, + num_shots=self.config.num_shots, + bigbench_task_type=bbb.BigBenchTaskType.HUGGINGFACE, + max_examples=self.config.max_examples, + json_util=json_util, + min_validation_examples=min_validation_examples_per_subtask, + format_fn=json_task.default_format_fn, + ) + ds_list = [ds_fn(split)] + else: + # Create mixture of all subtasks + ds_list = [] + for subtask_name in subtask_names: + subtask_name = subtask_name.split(":")[-1] + logger.info(f"Loading subtask {split} split", subtask_name) + ds_fn = bbb.get_dataset_fn( + task_name=self.config.task_name, + task_path=task_path, + subtask_name=subtask_name, + num_shots=self.config.num_shots, + bigbench_task_type=bbb.BigBenchTaskType.HUGGINGFACE, + max_examples=div_or_none(self.config.max_examples, num_subtasks), + json_util=json_util, + min_validation_examples=min_validation_examples_per_subtask, + format_fn=json_task.default_format_fn, + ) + ds_list.append(ds_fn(split)) + except ValueError as value_error: + # BIG-Bench requires at least 16 examples to use the train & validation splits, + # while using 'all'/'default' does not have such a requirement. + if "has too few examples" in value_error.args[0] and split != "all": + logger.warning( + f"-- WARNING: skipping split {split} because it has too few examples. Please use 'default' split." + ) + logger.warning(value_error) + return + raise value_error + + unique_key_counter = 0 + for ds in ds_list: + for example in ds: + unique_key_counter += 1 + yield unique_key_counter, { + "idx": example["idx"], + "inputs": example["inputs"].numpy().decode().strip(), + "targets": [target.numpy().decode().strip() for target in example["targets"]], + "multiple_choice_targets": [ + targets.decode().strip() for targets in example["multiple_choice_targets"].numpy() + ], + "multiple_choice_scores": [scores for scores in example["multiple_choice_scores"].numpy()], + } diff --git a/datasets/bigbench/dataset_infos.json b/datasets/bigbench/dataset_infos.json new file mode 100644 index 00000000000..7f604a04db9 --- /dev/null +++ b/datasets/bigbench/dataset_infos.json @@ -0,0 +1 @@ +{"abstract_narrative_understanding": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "abstract_narrative_understanding", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 6574843, "num_examples": 3000, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 5261643, "num_examples": 2400, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 1313224, "num_examples": 600, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 13149710, "size_in_bytes": 13149710}, "anachronisms": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "anachronisms", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 48937, "num_examples": 230, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 39209, "num_examples": 184, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 9752, "num_examples": 46, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 97898, "size_in_bytes": 97898}, "analogical_similarity": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "analogical_similarity", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 1374163, "num_examples": 323, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 1101796, "num_examples": 259, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 272391, "num_examples": 64, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 2748350, "size_in_bytes": 2748350}, "analytic_entailment": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "analytic_entailment", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 17367, "num_examples": 70, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 13413, "num_examples": 54, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 3978, "num_examples": 16, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 34758, "size_in_bytes": 34758}, "arithmetic": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "arithmetic", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 3848183, "num_examples": 15023, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 3078715, "num_examples": 12019, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 769493, "num_examples": 3004, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 7696391, "size_in_bytes": 7696391}, "ascii_word_recognition": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "ascii_word_recognition", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 4985315, "num_examples": 5000, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 3997801, "num_examples": 4000, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 987542, "num_examples": 1000, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 9970658, "size_in_bytes": 9970658}, "authorship_verification": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "authorship_verification", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 14118946, "num_examples": 880, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 11288769, "num_examples": 704, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 2830201, "num_examples": 176, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 28237916, "size_in_bytes": 28237916}, "auto_categorization": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "auto_categorization", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 40618, "num_examples": 328, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 33053, "num_examples": 263, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 7594, "num_examples": 65, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 81265, "size_in_bytes": 81265}, "auto_debugging": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "auto_debugging", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 5145, "num_examples": 34, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 2682, "num_examples": 18, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 2491, "num_examples": 16, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 10318, "size_in_bytes": 10318}, "bbq_lite_json": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "bbq_lite_json", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 6898580, "num_examples": 16076, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 5515066, "num_examples": 12866, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 1383539, "num_examples": 3210, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 13797185, "size_in_bytes": 13797185}, "bridging_anaphora_resolution_barqa": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "bridging_anaphora_resolution_barqa", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 1971124, "num_examples": 648, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 1537357, "num_examples": 519, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 433796, "num_examples": 129, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 3942277, "size_in_bytes": 3942277}, "causal_judgment": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "causal_judgment", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 204974, "num_examples": 190, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 165021, "num_examples": 152, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 39977, "num_examples": 38, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 409972, "size_in_bytes": 409972}, "cause_and_effect": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "cause_and_effect", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 49397, "num_examples": 153, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 39691, "num_examples": 123, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 9730, "num_examples": 30, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 98818, "size_in_bytes": 98818}, "checkmate_in_one": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "checkmate_in_one", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 3140634, "num_examples": 3498, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 2516239, "num_examples": 2799, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 624419, "num_examples": 699, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 6281292, "size_in_bytes": 6281292}, "chess_state_tracking": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "chess_state_tracking", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 3270710, "num_examples": 6000, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 2616922, "num_examples": 4800, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 653816, "num_examples": 1200, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 6541448, "size_in_bytes": 6541448}, "chinese_remainder_theorem": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "chinese_remainder_theorem", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 153313, "num_examples": 500, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 122679, "num_examples": 400, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 30662, "num_examples": 100, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 306654, "size_in_bytes": 306654}, "cifar10_classification": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "cifar10_classification", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 111049748, "num_examples": 20000, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 88804772, "num_examples": 16000, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 22245000, "num_examples": 4000, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 222099520, "size_in_bytes": 222099520}, "code_line_description": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "code_line_description", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 33733, "num_examples": 60, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 25583, "num_examples": 44, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 8174, "num_examples": 16, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 67490, "size_in_bytes": 67490}, "codenames": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "codenames", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 25234, "num_examples": 85, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 20001, "num_examples": 68, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 5262, "num_examples": 17, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 50497, "size_in_bytes": 50497}, "color": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "color", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 1638787, "num_examples": 4000, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 1311087, "num_examples": 3200, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 327724, "num_examples": 800, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 3277598, "size_in_bytes": 3277598}, "common_morpheme": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "common_morpheme", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 12444, "num_examples": 50, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 8490, "num_examples": 34, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 3978, "num_examples": 16, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 24912, "size_in_bytes": 24912}, "conceptual_combinations": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "conceptual_combinations", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 58948, "num_examples": 103, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 48087, "num_examples": 84, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 10886, "num_examples": 19, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 117921, "size_in_bytes": 117921}, "conlang_translation": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "conlang_translation", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 215239, "num_examples": 164, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 173069, "num_examples": 132, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 42198, "num_examples": 32, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 430506, "size_in_bytes": 430506}, "contextual_parametric_knowledge_conflicts": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "contextual_parametric_knowledge_conflicts", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 14594175, "num_examples": 17528, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 11671543, "num_examples": 14023, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 2922658, "num_examples": 3505, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 29188376, "size_in_bytes": 29188376}, "crash_blossom": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "crash_blossom", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 12242, "num_examples": 38, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 7037, "num_examples": 22, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 5229, "num_examples": 16, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 24508, "size_in_bytes": 24508}, "crass_ai": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "crass_ai", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 22922, "num_examples": 44, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 14172, "num_examples": 28, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 8774, "num_examples": 16, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 45868, "size_in_bytes": 45868}, "cryobiology_spanish": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "cryobiology_spanish", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 38754, "num_examples": 146, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 31198, "num_examples": 117, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 7581, "num_examples": 29, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 77533, "size_in_bytes": 77533}, "cryptonite": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "cryptonite", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 2847756, "num_examples": 26157, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 2278424, "num_examples": 20926, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 569360, "num_examples": 5231, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 5695540, "size_in_bytes": 5695540}, "cs_algorithms": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "cs_algorithms", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 273274, "num_examples": 1320, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 218868, "num_examples": 1056, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 54430, "num_examples": 264, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 546572, "size_in_bytes": 546572}, "dark_humor_detection": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "dark_humor_detection", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 26610, "num_examples": 80, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 21315, "num_examples": 64, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 5319, "num_examples": 16, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 53244, "size_in_bytes": 53244}, "date_understanding": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "date_understanding", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 95249, "num_examples": 369, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 76443, "num_examples": 296, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 18831, "num_examples": 73, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 190523, "size_in_bytes": 190523}, "disambiguation_qa": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "disambiguation_qa", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 122626, "num_examples": 258, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 98815, "num_examples": 207, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 23835, "num_examples": 51, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 245276, "size_in_bytes": 245276}, "discourse_marker_prediction": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "discourse_marker_prediction", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 2091888, "num_examples": 857, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 1667020, "num_examples": 686, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 424892, "num_examples": 171, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 4183800, "size_in_bytes": 4183800}, "disfl_qa": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "disfl_qa", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 7965803, "num_examples": 8000, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 6377339, "num_examples": 6400, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 1588492, "num_examples": 1600, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 15931634, "size_in_bytes": 15931634}, "dyck_languages": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "dyck_languages", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 1238565, "num_examples": 1000, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 991204, "num_examples": 800, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 247385, "num_examples": 200, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 2477154, "size_in_bytes": 2477154}, "elementary_math_qa": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "elementary_math_qa", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 13471291, "num_examples": 38160, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 10789985, "num_examples": 30531, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 2681331, "num_examples": 7629, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 26942607, "size_in_bytes": 26942607}, "emoji_movie": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "emoji_movie", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 33767, "num_examples": 100, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 27071, "num_examples": 80, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 6720, "num_examples": 20, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 67558, "size_in_bytes": 67558}, "emojis_emotion_prediction": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "emojis_emotion_prediction", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 48155, "num_examples": 131, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 38601, "num_examples": 105, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 9579, "num_examples": 26, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 96335, "size_in_bytes": 96335}, "empirical_judgments": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "empirical_judgments", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 47574, "num_examples": 99, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 38410, "num_examples": 80, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 9188, "num_examples": 19, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 95172, "size_in_bytes": 95172}, "english_proverbs": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "english_proverbs", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 22577, "num_examples": 34, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 12103, "num_examples": 18, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 10499, "num_examples": 16, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 45179, "size_in_bytes": 45179}, "english_russian_proverbs": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "english_russian_proverbs", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 59974, "num_examples": 80, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 48115, "num_examples": 64, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 11883, "num_examples": 16, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 119972, "size_in_bytes": 119972}, "entailed_polarity": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "entailed_polarity", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 25501, "num_examples": 148, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 20419, "num_examples": 119, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 5107, "num_examples": 29, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 51027, "size_in_bytes": 51027}, "entailed_polarity_hindi": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "entailed_polarity_hindi", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 57129, "num_examples": 138, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 45895, "num_examples": 111, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 11258, "num_examples": 27, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 114282, "size_in_bytes": 114282}, "epistemic_reasoning": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "epistemic_reasoning", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 887932, "num_examples": 2000, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 710731, "num_examples": 1600, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 177225, "num_examples": 400, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 1775888, "size_in_bytes": 1775888}, "evaluating_information_essentiality": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "evaluating_information_essentiality", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 77564, "num_examples": 68, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 59660, "num_examples": 52, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 17928, "num_examples": 16, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 155152, "size_in_bytes": 155152}, "fact_checker": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "fact_checker", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 1340092, "num_examples": 7154, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 1072921, "num_examples": 5724, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 267195, "num_examples": 1430, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 2680208, "size_in_bytes": 2680208}, "fantasy_reasoning": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "fantasy_reasoning", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 75987, "num_examples": 201, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 61484, "num_examples": 161, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 14527, "num_examples": 40, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 151998, "size_in_bytes": 151998}, "few_shot_nlg": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "few_shot_nlg", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 75985, "num_examples": 153, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 61906, "num_examples": 123, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 14107, "num_examples": 30, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 151998, "size_in_bytes": 151998}, "figure_of_speech_detection": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "figure_of_speech_detection", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 21823, "num_examples": 59, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 16046, "num_examples": 43, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 5801, "num_examples": 16, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 43670, "size_in_bytes": 43670}, "formal_fallacies_syllogisms_negation": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "formal_fallacies_syllogisms_negation", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 8320026, "num_examples": 14200, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 6657263, "num_examples": 11360, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 1662787, "num_examples": 2840, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 16640076, "size_in_bytes": 16640076}, "gem": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "gem", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 36067188, "num_examples": 14802, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 28821034, "num_examples": 11845, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 7246182, "num_examples": 2957, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 72134404, "size_in_bytes": 72134404}, "gender_inclusive_sentences_german": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "gender_inclusive_sentences_german", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 126934, "num_examples": 200, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 100676, "num_examples": 160, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 26286, "num_examples": 40, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 253896, "size_in_bytes": 253896}, "general_knowledge": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "general_knowledge", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 21928, "num_examples": 70, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 16900, "num_examples": 54, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 5052, "num_examples": 16, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 43880, "size_in_bytes": 43880}, "geometric_shapes": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "geometric_shapes", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 180621, "num_examples": 359, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 145030, "num_examples": 288, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 35616, "num_examples": 71, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 361267, "size_in_bytes": 361267}, "goal_step_wikihow": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "goal_step_wikihow", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 3571273, "num_examples": 7053, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 2856803, "num_examples": 5643, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 714495, "num_examples": 1410, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 7142571, "size_in_bytes": 7142571}, "gre_reading_comprehension": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "gre_reading_comprehension", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 94319, "num_examples": 31, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 44493, "num_examples": 15, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 49850, "num_examples": 16, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 188662, "size_in_bytes": 188662}, "hhh_alignment": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "hhh_alignment", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 273006, "num_examples": 221, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 212580, "num_examples": 179, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 60451, "num_examples": 42, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 546037, "size_in_bytes": 546037}, "hindi_question_answering": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "hindi_question_answering", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 15155809, "num_examples": 6610, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 11984526, "num_examples": 5288, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 3171311, "num_examples": 1322, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 30311646, "size_in_bytes": 30311646}, "hindu_knowledge": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "hindu_knowledge", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 44227, "num_examples": 175, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 35505, "num_examples": 140, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 8747, "num_examples": 35, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 88479, "size_in_bytes": 88479}, "hinglish_toxicity": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "hinglish_toxicity", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 60712, "num_examples": 200, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 50081, "num_examples": 160, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 10655, "num_examples": 40, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 121448, "size_in_bytes": 121448}, "human_organs_senses": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "human_organs_senses", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 7995, "num_examples": 42, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 4914, "num_examples": 26, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 3105, "num_examples": 16, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 16014, "size_in_bytes": 16014}, "hyperbaton": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "hyperbaton", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 9402856, "num_examples": 50000, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 7524430, "num_examples": 40000, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 1878426, "num_examples": 10000, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 18805712, "size_in_bytes": 18805712}, "identify_math_theorems": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "identify_math_theorems", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 104899, "num_examples": 53, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 70343, "num_examples": 37, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 34581, "num_examples": 16, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 209823, "size_in_bytes": 209823}, "identify_odd_metaphor": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "identify_odd_metaphor", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 27658, "num_examples": 47, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 18183, "num_examples": 31, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 9499, "num_examples": 16, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 55340, "size_in_bytes": 55340}, "implicatures": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "implicatures", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 91892, "num_examples": 492, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 73589, "num_examples": 394, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 18329, "num_examples": 98, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 183810, "size_in_bytes": 183810}, "implicit_relations": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "implicit_relations", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 80011, "num_examples": 85, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 64592, "num_examples": 68, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 15445, "num_examples": 17, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 160048, "size_in_bytes": 160048}, "intent_recognition": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "intent_recognition", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 323089, "num_examples": 693, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 258444, "num_examples": 555, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 64670, "num_examples": 138, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 646203, "size_in_bytes": 646203}, "international_phonetic_alphabet_nli": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "international_phonetic_alphabet_nli", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 79408, "num_examples": 126, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 63363, "num_examples": 101, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 16070, "num_examples": 25, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 158841, "size_in_bytes": 158841}, "international_phonetic_alphabet_transliterate": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "international_phonetic_alphabet_transliterate", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 276092, "num_examples": 1003, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 220913, "num_examples": 803, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 55207, "num_examples": 200, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 552212, "size_in_bytes": 552212}, "intersect_geometry": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "intersect_geometry", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 212987847, "num_examples": 249999, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 170383378, "num_examples": 200000, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 42604469, "num_examples": 49999, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 425975694, "size_in_bytes": 425975694}, "irony_identification": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "irony_identification", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 28240, "num_examples": 99, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 22972, "num_examples": 80, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 5292, "num_examples": 19, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 56504, "size_in_bytes": 56504}, "kanji_ascii": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "kanji_ascii", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 367225, "num_examples": 1092, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 294162, "num_examples": 875, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 73089, "num_examples": 217, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 734476, "size_in_bytes": 734476}, "kannada": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "kannada", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 140859, "num_examples": 316, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 112047, "num_examples": 253, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 28836, "num_examples": 63, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 281742, "size_in_bytes": 281742}, "key_value_maps": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "key_value_maps", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 105199, "num_examples": 101, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 84371, "num_examples": 80, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 20852, "num_examples": 21, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 210422, "size_in_bytes": 210422}, "known_unknowns": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "known_unknowns", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 8002, "num_examples": 46, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 5166, "num_examples": 30, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 2860, "num_examples": 16, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 16028, "size_in_bytes": 16028}, "language_games": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "language_games", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 979913, "num_examples": 2128, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 783352, "num_examples": 1704, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 196589, "num_examples": 424, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 1959854, "size_in_bytes": 1959854}, "language_identification": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "language_identification", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 7391247, "num_examples": 10000, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 5920832, "num_examples": 8000, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 1470439, "num_examples": 2000, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 14782518, "size_in_bytes": 14782518}, "linguistic_mappings": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "linguistic_mappings", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 1327183, "num_examples": 15527, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 1061698, "num_examples": 12426, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 265514, "num_examples": 3101, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 2654395, "size_in_bytes": 2654395}, "linguistics_puzzles": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "linguistics_puzzles", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 1746302, "num_examples": 2000, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 1398341, "num_examples": 1600, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 347989, "num_examples": 400, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 3492632, "size_in_bytes": 3492632}, "list_functions": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "list_functions", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 2679536, "num_examples": 10750, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 2162181, "num_examples": 8700, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 517356, "num_examples": 2050, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 5359073, "size_in_bytes": 5359073}, "logic_grid_puzzle": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "logic_grid_puzzle", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 1456816, "num_examples": 1000, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 1160620, "num_examples": 800, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 296220, "num_examples": 200, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 2913656, "size_in_bytes": 2913656}, "logical_args": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "logical_args", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 43630, "num_examples": 32, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 21108, "num_examples": 16, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 22546, "num_examples": 16, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 87284, "size_in_bytes": 87284}, "logical_deduction": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "logical_deduction", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 1057966, "num_examples": 1500, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 842792, "num_examples": 1200, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 215198, "num_examples": 300, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 2115956, "size_in_bytes": 2115956}, "logical_fallacy_detection": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "logical_fallacy_detection", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 721360, "num_examples": 2800, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 577159, "num_examples": 2240, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 144225, "num_examples": 560, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 1442744, "size_in_bytes": 1442744}, "logical_sequence": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "logical_sequence", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 22771, "num_examples": 39, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 12687, "num_examples": 23, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 10108, "num_examples": 16, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 45566, "size_in_bytes": 45566}, "mathematical_induction": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "mathematical_induction", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 19069, "num_examples": 69, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 15028, "num_examples": 53, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 4065, "num_examples": 16, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 38162, "size_in_bytes": 38162}, "matrixshapes": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "matrixshapes", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 1131160, "num_examples": 4462, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 906536, "num_examples": 3570, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 224653, "num_examples": 892, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 2262349, "size_in_bytes": 2262349}, "metaphor_boolean": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "metaphor_boolean", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 214127, "num_examples": 680, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 170993, "num_examples": 544, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 43158, "num_examples": 136, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 428278, "size_in_bytes": 428278}, "metaphor_understanding": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "metaphor_understanding", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 201033, "num_examples": 234, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 162243, "num_examples": 188, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 38814, "num_examples": 46, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 402090, "size_in_bytes": 402090}, "minute_mysteries_qa": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "minute_mysteries_qa", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 3245380, "num_examples": 477, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 2623861, "num_examples": 383, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 621544, "num_examples": 94, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 6490785, "size_in_bytes": 6490785}, "misconceptions": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "misconceptions", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 45923, "num_examples": 219, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 37336, "num_examples": 176, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 8611, "num_examples": 43, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 91870, "size_in_bytes": 91870}, "misconceptions_russian": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "misconceptions_russian", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 17035, "num_examples": 49, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 11008, "num_examples": 33, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 6051, "num_examples": 16, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 34094, "size_in_bytes": 34094}, "mnist_ascii": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "mnist_ascii", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 61836204, "num_examples": 69984, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 49497056, "num_examples": 55988, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 12339173, "num_examples": 13996, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 123672433, "size_in_bytes": 123672433}, "modified_arithmetic": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "modified_arithmetic", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 1221771, "num_examples": 6000, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 977487, "num_examples": 4800, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 244312, "num_examples": 1200, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 2443570, "size_in_bytes": 2443570}, "moral_permissibility": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "moral_permissibility", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 162221, "num_examples": 342, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 128918, "num_examples": 274, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 33328, "num_examples": 68, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 324467, "size_in_bytes": 324467}, "movie_dialog_same_or_different": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "movie_dialog_same_or_different", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 28664867, "num_examples": 50000, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 22904157, "num_examples": 40000, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 5760710, "num_examples": 10000, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 57329734, "size_in_bytes": 57329734}, "movie_recommendation": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "movie_recommendation", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 173894, "num_examples": 500, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 139210, "num_examples": 400, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 34708, "num_examples": 100, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 347812, "size_in_bytes": 347812}, "mult_data_wrangling": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "mult_data_wrangling", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 626432, "num_examples": 7854, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 508664, "num_examples": 6380, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 117797, "num_examples": 1474, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 1252893, "size_in_bytes": 1252893}, "multiemo": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "multiemo", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 651075683, "num_examples": 1437281, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 520893617, "num_examples": 1149873, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 130182066, "num_examples": 287408, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 1302151366, "size_in_bytes": 1302151366}, "natural_instructions": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "natural_instructions", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 355963087, "num_examples": 193250, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 284939871, "num_examples": 154615, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 71023216, "num_examples": 38635, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 711926174, "size_in_bytes": 711926174}, "navigate": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "navigate", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 226212, "num_examples": 1000, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 181282, "num_examples": 800, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 44954, "num_examples": 200, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 452448, "size_in_bytes": 452448}, "nonsense_words_grammar": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "nonsense_words_grammar", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 11164, "num_examples": 50, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 7632, "num_examples": 34, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 3556, "num_examples": 16, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 22352, "size_in_bytes": 22352}, "novel_concepts": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "novel_concepts", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 16115, "num_examples": 32, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 8165, "num_examples": 16, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 7974, "num_examples": 16, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 32254, "size_in_bytes": 32254}, "object_counting": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "object_counting", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 149708, "num_examples": 1000, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 119737, "num_examples": 800, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 29999, "num_examples": 200, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 299444, "size_in_bytes": 299444}, "odd_one_out": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "odd_one_out", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 13932, "num_examples": 86, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 11293, "num_examples": 69, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 2664, "num_examples": 17, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 27889, "size_in_bytes": 27889}, "operators": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "operators", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 32490, "num_examples": 210, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 25986, "num_examples": 168, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 6532, "num_examples": 42, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 65008, "size_in_bytes": 65008}, "paragraph_segmentation": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "paragraph_segmentation", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 56847660, "num_examples": 9000, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 45675248, "num_examples": 7200, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 11172440, "num_examples": 1800, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 113695348, "size_in_bytes": 113695348}, "parsinlu_qa": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "parsinlu_qa", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 456870, "num_examples": 1050, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 367126, "num_examples": 840, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 89768, "num_examples": 210, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 913764, "size_in_bytes": 913764}, "parsinlu_reading_comprehension": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "parsinlu_reading_comprehension", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 573891, "num_examples": 518, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 455908, "num_examples": 415, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 118011, "num_examples": 103, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 1147810, "size_in_bytes": 1147810}, "penguins_in_a_table": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "penguins_in_a_table", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 76121, "num_examples": 149, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 61435, "num_examples": 120, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 14711, "num_examples": 29, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 152267, "size_in_bytes": 152267}, "periodic_elements": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "periodic_elements", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 287051, "num_examples": 654, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 230973, "num_examples": 524, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 56104, "num_examples": 130, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 574128, "size_in_bytes": 574128}, "persian_idioms": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "persian_idioms", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 28658, "num_examples": 66, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 21740, "num_examples": 50, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 6942, "num_examples": 16, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 57340, "size_in_bytes": 57340}, "phrase_relatedness": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "phrase_relatedness", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 30277, "num_examples": 100, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 23847, "num_examples": 80, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 6454, "num_examples": 20, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 60578, "size_in_bytes": 60578}, "physical_intuition": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "physical_intuition", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 23810, "num_examples": 81, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 19373, "num_examples": 65, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 4461, "num_examples": 16, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 47644, "size_in_bytes": 47644}, "physics": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "physics", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 120407, "num_examples": 229, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 96261, "num_examples": 184, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 24170, "num_examples": 45, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 240838, "size_in_bytes": 240838}, "physics_questions": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "physics_questions", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 18407, "num_examples": 54, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 13435, "num_examples": 38, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 5000, "num_examples": 16, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 36842, "size_in_bytes": 36842}, "play_dialog_same_or_different": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "play_dialog_same_or_different", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 3143716, "num_examples": 3264, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 2517056, "num_examples": 2612, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 626685, "num_examples": 652, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 6287457, "size_in_bytes": 6287457}, "polish_sequence_labeling": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "polish_sequence_labeling", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 18082770, "num_examples": 12812, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 14472058, "num_examples": 10250, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 3610741, "num_examples": 2562, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 36165569, "size_in_bytes": 36165569}, "presuppositions_as_nli": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "presuppositions_as_nli", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 502914, "num_examples": 735, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 401080, "num_examples": 588, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 101860, "num_examples": 147, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 1005854, "size_in_bytes": 1005854}, "qa_wikidata": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "qa_wikidata", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 1421667, "num_examples": 20321, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 1137007, "num_examples": 16257, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 284660, "num_examples": 4064, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 2843334, "size_in_bytes": 2843334}, "question_selection": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "question_selection", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 2487986, "num_examples": 1582, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 1990739, "num_examples": 1266, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 497272, "num_examples": 316, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 4975997, "size_in_bytes": 4975997}, "real_or_fake_text": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "real_or_fake_text", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 53684101, "num_examples": 15088, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 42896484, "num_examples": 12072, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 10787642, "num_examples": 3016, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 107368227, "size_in_bytes": 107368227}, "reasoning_about_colored_objects": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "reasoning_about_colored_objects", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 912440, "num_examples": 2000, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 733608, "num_examples": 1600, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 178857, "num_examples": 400, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 1824905, "size_in_bytes": 1824905}, "repeat_copy_logic": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "repeat_copy_logic", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 6710, "num_examples": 32, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 3357, "num_examples": 16, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 3381, "num_examples": 16, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 13448, "size_in_bytes": 13448}, "rephrase": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "rephrase", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 34260, "num_examples": 78, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 27396, "num_examples": 62, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 6892, "num_examples": 16, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 68548, "size_in_bytes": 68548}, "riddle_sense": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "riddle_sense", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 15569, "num_examples": 49, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 10791, "num_examples": 33, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 4802, "num_examples": 16, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 31162, "size_in_bytes": 31162}, "ruin_names": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "ruin_names", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 144391, "num_examples": 448, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 115420, "num_examples": 359, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 28997, "num_examples": 89, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 288808, "size_in_bytes": 288808}, "salient_translation_error_detection": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "salient_translation_error_detection", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 1142524, "num_examples": 998, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 913543, "num_examples": 799, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 229006, "num_examples": 199, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 2285073, "size_in_bytes": 2285073}, "scientific_press_release": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "scientific_press_release", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 13725, "num_examples": 50, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 9287, "num_examples": 34, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 4466, "num_examples": 16, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 27478, "size_in_bytes": 27478}, "semantic_parsing_in_context_sparc": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "semantic_parsing_in_context_sparc", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 1525025, "num_examples": 1155, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 1248535, "num_examples": 924, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 276518, "num_examples": 231, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 3050078, "size_in_bytes": 3050078}, "semantic_parsing_spider": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "semantic_parsing_spider", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 1265902, "num_examples": 1034, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 973996, "num_examples": 828, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 291934, "num_examples": 206, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 2531832, "size_in_bytes": 2531832}, "sentence_ambiguity": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "sentence_ambiguity", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 8215, "num_examples": 60, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 6017, "num_examples": 44, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 2222, "num_examples": 16, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 16454, "size_in_bytes": 16454}, "similarities_abstraction": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "similarities_abstraction", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 23490, "num_examples": 76, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 18609, "num_examples": 60, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 4906, "num_examples": 16, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 47005, "size_in_bytes": 47005}, "simp_turing_concept": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "simp_turing_concept", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 1018473, "num_examples": 6390, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 813887, "num_examples": 5112, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 204614, "num_examples": 1278, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 2036974, "size_in_bytes": 2036974}, "simple_arithmetic_json": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "simple_arithmetic_json", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 1177, "num_examples": 30, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 570, "num_examples": 14, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 635, "num_examples": 16, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 2382, "size_in_bytes": 2382}, "simple_arithmetic_json_multiple_choice": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "simple_arithmetic_json_multiple_choice", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 953, "num_examples": 8, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 0, "num_examples": 0, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 0, "num_examples": 0, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 953, "size_in_bytes": 953}, "simple_arithmetic_json_subtasks": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "simple_arithmetic_json_subtasks", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 1177, "num_examples": 30, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 601, "num_examples": 15, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 604, "num_examples": 15, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 2382, "size_in_bytes": 2382}, "simple_arithmetic_multiple_targets_json": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "simple_arithmetic_multiple_targets_json", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 444, "num_examples": 10, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 0, "num_examples": 0, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 0, "num_examples": 0, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 444, "size_in_bytes": 444}, "simple_ethical_questions": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "simple_ethical_questions", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 76615, "num_examples": 115, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 60357, "num_examples": 92, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 16282, "num_examples": 23, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 153254, "size_in_bytes": 153254}, "simple_text_editing": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "simple_text_editing", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 27899, "num_examples": 47, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 18501, "num_examples": 31, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 9426, "num_examples": 16, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 55826, "size_in_bytes": 55826}, "snarks": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "snarks", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 45810, "num_examples": 181, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 37069, "num_examples": 145, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 8766, "num_examples": 36, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 91645, "size_in_bytes": 91645}, "social_iqa": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "social_iqa", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 644154, "num_examples": 1935, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 516485, "num_examples": 1548, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 127694, "num_examples": 387, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 1288333, "size_in_bytes": 1288333}, "social_support": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "social_support", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 367179, "num_examples": 897, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 295177, "num_examples": 718, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 72027, "num_examples": 179, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 734383, "size_in_bytes": 734383}, "sports_understanding": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "sports_understanding", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 227049, "num_examples": 986, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 181649, "num_examples": 789, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 45425, "num_examples": 197, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 454123, "size_in_bytes": 454123}, "strange_stories": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "strange_stories", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 120620, "num_examples": 174, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 98157, "num_examples": 140, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 22489, "num_examples": 34, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 241266, "size_in_bytes": 241266}, "strategyqa": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "strategyqa", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 660851, "num_examples": 2289, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 528381, "num_examples": 1832, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 132494, "num_examples": 457, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 1321726, "size_in_bytes": 1321726}, "sufficient_information": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "sufficient_information", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 9458, "num_examples": 39, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 5625, "num_examples": 23, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 3861, "num_examples": 16, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 18944, "size_in_bytes": 18944}, "suicide_risk": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "suicide_risk", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 38001, "num_examples": 40, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 23106, "num_examples": 24, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 14919, "num_examples": 16, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 76026, "size_in_bytes": 76026}, "swahili_english_proverbs": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "swahili_english_proverbs", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 90367, "num_examples": 153, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 72569, "num_examples": 123, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 17822, "num_examples": 30, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 180758, "size_in_bytes": 180758}, "swedish_to_german_proverbs": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "swedish_to_german_proverbs", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 35273, "num_examples": 72, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 27325, "num_examples": 56, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 7972, "num_examples": 16, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 70570, "size_in_bytes": 70570}, "symbol_interpretation": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "symbol_interpretation", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 1149725, "num_examples": 990, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 927947, "num_examples": 795, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 221803, "num_examples": 195, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 2299475, "size_in_bytes": 2299475}, "temporal_sequences": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "temporal_sequences", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 687735, "num_examples": 1000, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 550332, "num_examples": 800, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 137427, "num_examples": 200, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 1375494, "size_in_bytes": 1375494}, "tense": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "tense", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 43946, "num_examples": 286, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 35523, "num_examples": 229, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 8452, "num_examples": 57, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 87921, "size_in_bytes": 87921}, "timedial": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "timedial", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 2764478, "num_examples": 2550, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 2218234, "num_examples": 2040, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 546268, "num_examples": 510, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 5528980, "size_in_bytes": 5528980}, "topical_chat": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "topical_chat", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 30930629, "num_examples": 22295, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 24829540, "num_examples": 17836, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 6101090, "num_examples": 4459, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 61861259, "size_in_bytes": 61861259}, "tracking_shuffled_objects": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "tracking_shuffled_objects", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 2779059, "num_examples": 3750, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 2226511, "num_examples": 3000, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 552572, "num_examples": 750, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 5558142, "size_in_bytes": 5558142}, "understanding_fables": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "understanding_fables", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 227915, "num_examples": 189, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 181138, "num_examples": 152, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 46801, "num_examples": 37, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 455854, "size_in_bytes": 455854}, "undo_permutation": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "undo_permutation", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 196443, "num_examples": 300, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 158827, "num_examples": 240, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 37641, "num_examples": 60, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 392911, "size_in_bytes": 392911}, "unit_conversion": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "unit_conversion", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 4040317, "num_examples": 23936, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 3239699, "num_examples": 19151, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 800619, "num_examples": 4785, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 8080635, "size_in_bytes": 8080635}, "unit_interpretation": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "unit_interpretation", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 37463, "num_examples": 100, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 30023, "num_examples": 80, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 7464, "num_examples": 20, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 74950, "size_in_bytes": 74950}, "unnatural_in_context_learning": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "unnatural_in_context_learning", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 4609162, "num_examples": 73420, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 3687332, "num_examples": 58736, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 921830, "num_examples": 14684, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 9218324, "size_in_bytes": 9218324}, "vitaminc_fact_verification": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "vitaminc_fact_verification", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 32389297, "num_examples": 54668, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 25911838, "num_examples": 43735, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 6477483, "num_examples": 10933, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 64778618, "size_in_bytes": 64778618}, "what_is_the_tao": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "what_is_the_tao", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 13306, "num_examples": 36, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 7467, "num_examples": 20, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 5863, "num_examples": 16, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 26636, "size_in_bytes": 26636}, "which_wiki_edit": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "which_wiki_edit", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 6332065, "num_examples": 571, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 5234181, "num_examples": 457, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 1097909, "num_examples": 114, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 12664155, "size_in_bytes": 12664155}, "winowhy": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "winowhy", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 1003532, "num_examples": 2862, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 801404, "num_examples": 2290, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 202153, "num_examples": 572, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 2007089, "size_in_bytes": 2007089}, "word_sorting": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "word_sorting", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"default": {"name": "default", "num_bytes": 491320, "num_examples": 1900, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 392956, "num_examples": 1520, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 98392, "num_examples": 380, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 982668, "size_in_bytes": 982668}, "word_unscrambling": {"description": "The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to\nprobe large language models, and extrapolate their future capabilities.\n", "citation": "@InProceedings{bigbench,\ntitle = {Beyond the Imitation Game: Quantifying and extrapolating the\ncapabilities of language models},\nauthor={BIG-Bench Collaboration\n},\nyear={2022}\n}\n", "homepage": "https://github.com/google/BIG-bench", "license": "Apache License 2.0", "features": {"idx": {"dtype": "int32", "id": null, "_type": "Value"}, "inputs": {"dtype": "string", "id": null, "_type": "Value"}, "targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_targets": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "multiple_choice_scores": {"feature": {"dtype": "int32", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "bigbench", "config_name": "word_unscrambling", "version": "0.0.0", "splits": {"default": {"name": "default", "num_bytes": 883507, "num_examples": 8917, "dataset_name": "bigbench"}, "train": {"name": "train", "num_bytes": 706675, "num_examples": 7134, "dataset_name": "bigbench"}, "validation": {"name": "validation", "num_bytes": 176860, "num_examples": 1783, "dataset_name": "bigbench"}}, "download_checksums": {}, "download_size": 0, "post_processing_size": null, "dataset_size": 1767042, "size_in_bytes": 1767042}} \ No newline at end of file diff --git a/datasets/billsum/README.md b/datasets/billsum/README.md index 694d095cac7..99f23568133 100644 --- a/datasets/billsum/README.md +++ b/datasets/billsum/README.md @@ -3,10 +3,10 @@ annotations_creators: - found language_creators: - found -languages: +language: - en -licenses: -- unknown +license: +- cc0-1.0 multilinguality: - monolingual size_categories: @@ -14,11 +14,24 @@ size_categories: source_datasets: - original task_categories: -- conditional-text-generation -task_ids: - summarization +task_ids: +- summarization-other-bills-summarization paperswithcode_id: billsum pretty_name: BillSum +train-eval-index: +- config: default + task: summarization + task_id: summarization + splits: + train_split: train + eval_split: test + col_mapping: + text: text + summary: target + metrics: + - type: rouge + name: Rouge --- # Dataset Card for "billsum" @@ -79,8 +92,6 @@ features for us bills. ca bills does not have. ## Dataset Structure -We show detailed information for up to 5 configurations of the dataset. - ### Data Instances #### default @@ -123,7 +134,7 @@ The data fields are the same among all splits. #### Initial Data Collection and Normalization -[More Information Needed](https://github.com/huggingface/datasets/blob/master/CONTRIBUTING.md#how-to-contribute-to-the-dataset-cards) +The data consists of three parts: US training bills, US test bills and California test bills. The US bills were collected from the [Govinfo](https://github.com/unitedstates/congress) service provided by the United States Government Publishing Office (GPO) under CC0-1.0 license. The California, bills from the 2015-2016 session are available from the legislature’s [website](https://leginfo.legislature.ca.gov/). #### Who are the source language producers? diff --git a/datasets/billsum/billsum.py b/datasets/billsum/billsum.py index 4e7ca20475d..4285236bb06 100644 --- a/datasets/billsum/billsum.py +++ b/datasets/billsum/billsum.py @@ -48,6 +48,8 @@ _URL = "https://drive.google.com/uc?export=download&id=1g89WgFHMRbr4QrvA0ngh26PY081Nv3lx" +_LICENSE = "CC0" + _DOCUMENT = "text" _SUMMARY = "summary" @@ -63,6 +65,7 @@ class Billsum(datasets.GeneratorBasedBuilder): def _info(self): return datasets.DatasetInfo( description=_DESCRIPTION, + license=_LICENSE, features=datasets.Features( { _DOCUMENT: datasets.Value("string"), diff --git a/datasets/billsum/dataset_infos.json b/datasets/billsum/dataset_infos.json index 703aba30584..05919a0cfd6 100644 --- a/datasets/billsum/dataset_infos.json +++ b/datasets/billsum/dataset_infos.json @@ -1 +1 @@ -{"default": {"description": "\nBillSum, summarization of US Congressional and California state bills.\n\nThere are several features:\n - text: bill text.\n - summary: summary of the bills.\n - title: title of the bills.\nfeatures for us bills. ca bills does not have.\n - text_len: number of chars in text.\n - sum_len: number of chars in summary.\n", "citation": "\n@misc{kornilova2019billsum,\n title={BillSum: A Corpus for Automatic Summarization of US Legislation},\n author={Anastassia Kornilova and Vlad Eidelman},\n year={2019},\n eprint={1910.00523},\n archivePrefix={arXiv},\n primaryClass={cs.CL}\n}\n", "homepage": "https://github.com/FiscalNote/BillSum", "license": "", "features": {"text": {"dtype": "string", "id": null, "_type": "Value"}, "summary": {"dtype": "string", "id": null, "_type": "Value"}, "title": {"dtype": "string", "id": null, "_type": "Value"}}, "supervised_keys": {"input": "text", "output": "summary"}, "builder_name": "billsum", "config_name": "default", "version": {"version_str": "3.0.0", "description": null, "datasets_version_to_prepare": null, "major": 3, "minor": 0, "patch": 0}, "splits": {"ca_test": {"name": "ca_test", "num_bytes": 14945923, "num_examples": 1237, "dataset_name": "billsum"}, "test": {"name": "test", "num_bytes": 37867905, "num_examples": 3269, "dataset_name": "billsum"}, "train": {"name": "train", "num_bytes": 219605578, "num_examples": 18949, "dataset_name": "billsum"}}, "download_checksums": {"https://drive.google.com/uc?export=download&id=1g89WgFHMRbr4QrvA0ngh26PY081Nv3lx": {"num_bytes": 67260676, "checksum": "5a55dfb231618d63b25cec4773280a2986d38f53d6d4d39b8256b278edf1110c"}}, "download_size": 67260676, "dataset_size": 272419406, "size_in_bytes": 339680082}} +{"default": {"description": "\nBillSum, summarization of US Congressional and California state bills.\n\nThere are several features:\n - text: bill text.\n - summary: summary of the bills.\n - title: title of the bills.\nfeatures for us bills. ca bills does not have.\n - text_len: number of chars in text.\n - sum_len: number of chars in summary.\n", "citation": "\n@misc{kornilova2019billsum,\n title={BillSum: A Corpus for Automatic Summarization of US Legislation},\n author={Anastassia Kornilova and Vlad Eidelman},\n year={2019},\n eprint={1910.00523},\n archivePrefix={arXiv},\n primaryClass={cs.CL}\n}\n", "homepage": "https://github.com/FiscalNote/BillSum", "license": "CC0", "features": {"text": {"dtype": "string", "id": null, "_type": "Value"}, "summary": {"dtype": "string", "id": null, "_type": "Value"}, "title": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": {"input": "text", "output": "summary"}, "task_templates": null, "builder_name": "billsum", "config_name": "default", "version": {"version_str": "3.0.0", "description": null, "major": 3, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 219596090, "num_examples": 18949, "dataset_name": "billsum"}, "test": {"name": "test", "num_bytes": 37866257, "num_examples": 3269, "dataset_name": "billsum"}, "ca_test": {"name": "ca_test", "num_bytes": 14945291, "num_examples": 1237, "dataset_name": "billsum"}}, "download_checksums": {"https://drive.google.com/uc?export=download&id=1g89WgFHMRbr4QrvA0ngh26PY081Nv3lx": {"num_bytes": 67260676, "checksum": "5a55dfb231618d63b25cec4773280a2986d38f53d6d4d39b8256b278edf1110c"}}, "download_size": 67260676, "post_processing_size": null, "dataset_size": 272407638, "size_in_bytes": 339668314}} \ No newline at end of file diff --git a/datasets/bing_coronavirus_query_set/README.md b/datasets/bing_coronavirus_query_set/README.md index f96ae1ae61d..65afeccbc95 100644 --- a/datasets/bing_coronavirus_query_set/README.md +++ b/datasets/bing_coronavirus_query_set/README.md @@ -3,10 +3,10 @@ annotations_creators: - found language_creators: - found -languages: +language: - en -licenses: -- o-uda-1.0 +license: +- other multilinguality: - monolingual size_categories: @@ -21,7 +21,7 @@ paperswithcode_id: null pretty_name: BingCoronavirusQuerySet --- -# Dataset Card Creation Guide +# Dataset Card for BingCoronavirusQuerySet ## Table of Contents - [Dataset Description](#dataset-description) diff --git a/datasets/biomrc/README.md b/datasets/biomrc/README.md index e825f2c402e..d67639ca352 100644 --- a/datasets/biomrc/README.md +++ b/datasets/biomrc/README.md @@ -1,5 +1,5 @@ --- -languages: +language: - en paperswithcode_id: biomrc pretty_name: BIOMRC @@ -55,8 +55,6 @@ We introduce BIOMRC, a large-scale cloze-style biomedical MRC dataset. Care was ## Dataset Structure -We show detailed information for up to 5 configurations of the dataset. - ### Data Instances #### biomrc_large_A diff --git a/datasets/biosses/README.md b/datasets/biosses/README.md index 5b66590225a..c6192ad9129 100644 --- a/datasets/biosses/README.md +++ b/datasets/biosses/README.md @@ -3,9 +3,9 @@ annotations_creators: - expert-generated language_creators: - found -languages: +language: - en -licenses: +license: - gpl-3.0 multilinguality: - monolingual @@ -14,8 +14,9 @@ size_categories: source_datasets: - original task_categories: -- text-scoring +- text-classification task_ids: +- text-scoring - semantic-similarity-scoring paperswithcode_id: biosses pretty_name: BIOSSES diff --git a/datasets/biwi_kinect_head_pose/README.md b/datasets/biwi_kinect_head_pose/README.md new file mode 100644 index 00000000000..c1610dda072 --- /dev/null +++ b/datasets/biwi_kinect_head_pose/README.md @@ -0,0 +1,308 @@ +--- +annotations_creators: +- expert-generated +language_creators: +- expert-generated +language: +- en +license: +- other +multilinguality: +- monolingual +pretty_name: Biwi Kinect Head Pose Database +size_categories: +- 10K + View C++ Code + +```cpp +/* + * Gabriele Fanelli + * + * fanelli@vision.ee.ethz.ch + * + * BIWI, ETHZ, 2011 + * + * Part of the Biwi Kinect Head Pose Database + * + * Example code for reading a compressed binary depth image file. + * + * THE SOFTWARE IS PROVIDED “AS IS” AND THE PROVIDER GIVES NO EXPRESS OR IMPLIED WARRANTIES OF ANY KIND, + * INCLUDING WITHOUT LIMITATION THE WARRANTIES OF FITNESS FOR ANY PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE PROVIDER BE HELD RESPONSIBLE FOR LOSS OR DAMAGE CAUSED BY THE USE OF THE SOFTWARE. + * + * + */ + +#include +#include +#include + +int16_t* loadDepthImageCompressed( const char* fname ){ + + //now read the depth image + FILE* pFile = fopen(fname, "rb"); + if(!pFile){ + std::cerr << "could not open file " << fname << std::endl; + return NULL; + } + + int im_width = 0; + int im_height = 0; + bool success = true; + + success &= ( fread(&im_width,sizeof(int),1,pFile) == 1 ); // read width of depthmap + success &= ( fread(&im_height,sizeof(int),1,pFile) == 1 ); // read height of depthmap + + int16_t* depth_img = new int16_t[im_width*im_height]; + + int numempty; + int numfull; + int p = 0; + + while(p < im_width*im_height ){ + + success &= ( fread( &numempty,sizeof(int),1,pFile) == 1 ); + + for(int i = 0; i < numempty; i++) + depth_img[ p + i ] = 0; + + success &= ( fread( &numfull,sizeof(int), 1, pFile) == 1 ); + success &= ( fread( &depth_img[ p + numempty ], sizeof(int16_t), numfull, pFile) == (unsigned int) numfull ); + p += numempty+numfull; + + } + + fclose(pFile); + + if(success) + return depth_img; + else{ + delete [] depth_img; + return NULL; + } +} + +float* read_gt(const char* fname){ + + //try to read in the ground truth from a binary file + FILE* pFile = fopen(fname, "rb"); + if(!pFile){ + std::cerr << "could not open file " << fname << std::endl; + return NULL; + } + + float* data = new float[6]; + + bool success = true; + success &= ( fread( &data[0], sizeof(float), 6, pFile) == 6 ); + fclose(pFile); + + if(success) + return data; + else{ + delete [] data; + return NULL; + } + +} +``` + +
+ + +### Supported Tasks and Leaderboards + +Biwi Kinect Head Pose Database supports the following tasks : +- Head pose estimation +- Pose estimation +- Face verification + +### Languages + +[Needs More Information] + +## Dataset Structure + +### Data Instances + +A sample from the Biwi Kinect Head Pose dataset is provided below: + +``` +{ + 'sequence_number': '12', + 'subject_id': 'M06', + 'rgb': [,.....], + 'rgb_cal': + { + 'intrisic_mat': [[517.679, 0.0, 320.0], [0.0, 517.679, 240.5], [0.0, 0.0, 1.0]], + 'extrinsic_mat': + { + 'rotation': [[0.999947, 0.00432361, 0.00929419], [-0.00446314, 0.999877, 0.0150443], [-0.009228, -0.015085, 0.999844]], + 'translation': [-24.0198, 5.8896, -13.2308] + } + } + 'depth': ['../hpdb/12/frame_00003_depth.bin', .....], + 'depth_cal': + { + 'intrisic_mat': [[575.816, 0.0, 320.0], [0.0, 575.816, 240.0], [0.0, 0.0, 1.0]], + 'extrinsic_mat': + { + 'rotation': [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]], + 'translation': [0.0, 0.0, 0.0] + } + } + 'head_pose_gt': + { + 'center': [[43.4019, -30.7038, 906.864], [43.0202, -30.8683, 906.94], [43.0255, -30.5611, 906.659], .....], + 'rotation': [[[0.980639, 0.109899, 0.162077], [-0.11023, 0.993882, -0.00697376], [-0.161851, -0.011027, 0.986754]], ......] + } +} +``` + +### Data Fields + +- `sequence_number` : This refers to the sequence number in the dataset. There are a total of 24 sequences. +- `subject_id` : This refers to the subjects in the dataset. There are a total of 20 people with 6 females and 14 males where 4 people were recorded twice. +- `rgb` : List of png frames containing the poses. +- `rgb_cal`: Contains calibration information for the color camera which includes intrinsic matrix, +global rotation and translation. +- `depth` : List of depth frames for the poses. +- `depth_cal`: Contains calibration information for the depth camera which includes intrinsic matrix, global rotation and translation. +- `head_pose_gt` : Contains ground truth information, i.e., the location of the center of the head in 3D and the head rotation, encoded as a 3x3 rotation matrix. + + +### Data Splits + +All the data is contained in the training set. + +## Dataset Creation + +### Curation Rationale + +[More Information Needed] + +### Source Data + +#### Initial Data Collection and Normalization + +The Biwi Kinect Head Pose Database is acquired with the Microsoft Kinect sensor, a structured IR light device. +#### Who are the source language producers? + +[More Information Needed] + +### Annotations + +#### Annotation process + +From Dataset's README : +> The database contains 24 sequences acquired with a Kinect sensor. 20 people (some were recorded twice - 6 women and 14 men) were recorded while turning their heads, sitting in front of the sensor, at roughly one meter of distance. + +#### Who are the annotators? + +[More Information Needed] + +### Personal and Sensitive Information + +[More Information Needed] + +## Considerations for Using the Data + +### Social Impact of Dataset + +[More Information Needed] + +### Discussion of Biases + +[More Information Needed] + +### Other Known Limitations + +[More Information Needed] + +## Additional Information + +### Dataset Curators + +[Needs More Information] + +### Licensing Information + +From Dataset's README : +> This database is made available for non-commercial use such as university research and education. + +### Citation Information + +```bibtex +@article{fanelli_IJCV, + author = {Fanelli, Gabriele and Dantone, Matthias and Gall, Juergen and Fossati, Andrea and Van Gool, Luc}, + title = {Random Forests for Real Time 3D Face Analysis}, + journal = {Int. J. Comput. Vision}, + year = {2013}, + month = {February}, + volume = {101}, + number = {3}, + pages = {437--458} +} +``` + +### Contributions + +Thanks to [@dnaveenr](https://github.com/dnaveenr) for adding this dataset. \ No newline at end of file diff --git a/datasets/biwi_kinect_head_pose/biwi_kinect_head_pose.py b/datasets/biwi_kinect_head_pose/biwi_kinect_head_pose.py new file mode 100644 index 00000000000..798c9e1f89c --- /dev/null +++ b/datasets/biwi_kinect_head_pose/biwi_kinect_head_pose.py @@ -0,0 +1,215 @@ +# Copyright 2022 The HuggingFace Datasets Authors and the current dataset script contributor. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Biwi Kinect Head Pose Database.""" + + +import glob +import os + +import datasets + + +_CITATION = """\ +@article{fanelli_IJCV, + author = {Fanelli, Gabriele and Dantone, Matthias and Gall, Juergen and Fossati, Andrea and Van Gool, Luc}, + title = {Random Forests for Real Time 3D Face Analysis}, + journal = {Int. J. Comput. Vision}, + year = {2013}, + month = {February}, + volume = {101}, + number = {3}, + pages = {437--458} +} +""" + + +_DESCRIPTION = """\ +The Biwi Kinect Head Pose Database is acquired with the Microsoft Kinect sensor, a structured IR light device.It contains 15K images of 20 people with 6 females and 14 males where 4 people were recorded twice. +""" + + +_HOMEPAGE = "https://icu.ee.ethz.ch/research/datsets.html" + + +_LICENSE = "This database is made available for non-commercial use such as university research and education." + + +_URLS = { + "kinect_head_pose_db": "https://data.vision.ee.ethz.ch/cvl/gfanelli/kinect_head_pose_db.tgz", +} + +_sequence_to_subject_map = { + "01": "F01", + "02": "F02", + "03": "F03", + "04": "F04", + "05": "F05", + "06": "F06", + "07": "M01", + "08": "M02", + "09": "M03", + "10": "M04", + "11": "M05", + "12": "M06", + "13": "M07", + "14": "M08", + "15": "F03", + "16": "M09", + "17": "M10", + "18": "F05", + "19": "M11", + "20": "M12", + "21": "F02", + "22": "M01", + "23": "M13", + "24": "M14", +} + + +class BiwiKinectHeadPose(datasets.GeneratorBasedBuilder): + + VERSION = datasets.Version("1.0.0") + + def _info(self): + return datasets.DatasetInfo( + description=_DESCRIPTION, + features=datasets.Features( + { + "sequence_number": datasets.Value("string"), + "subject_id": datasets.Value("string"), + "rgb": datasets.Sequence(datasets.Image()), + "rgb_cal": { + "intrisic_mat": datasets.Array2D(shape=(3, 3), dtype="float64"), + "extrinsic_mat": { + "rotation": datasets.Array2D(shape=(3, 3), dtype="float64"), + "translation": datasets.Sequence(datasets.Value("float64"), length=3), + }, + }, + "depth": datasets.Sequence(datasets.Value("string")), + "depth_cal": { + "intrisic_mat": datasets.Array2D(shape=(3, 3), dtype="float64"), + "extrinsic_mat": { + "rotation": datasets.Array2D(shape=(3, 3), dtype="float64"), + "translation": datasets.Sequence(datasets.Value("float64"), length=3), + }, + }, + "head_pose_gt": datasets.Sequence( + { + "center": datasets.Sequence(datasets.Value("float64"), length=3), + "rotation": datasets.Array2D(shape=(3, 3), dtype="float64"), + } + ), + "head_template": datasets.Value("string"), + } + ), + homepage=_HOMEPAGE, + license=_LICENSE, + citation=_CITATION, + ) + + def _split_generators(self, dl_manager): + + data_dir = dl_manager.download_and_extract(_URLS) + return [ + datasets.SplitGenerator( + name=datasets.Split.TRAIN, + gen_kwargs={ + "dataset_path": os.path.join(data_dir["kinect_head_pose_db"], "hpdb"), + }, + ), + ] + + @staticmethod + def _get_calibration_information(cal_file_path): + with open(cal_file_path, "r", encoding="utf-8") as f: + cal_info = f.read().splitlines() + + intrisic_mat = [] + extrinsic_mat = [] + + for data in cal_info[:3]: + row = list(map(float, data.strip().split(" "))) + intrisic_mat.append(row) + + for data in cal_info[6:9]: + row = list(map(float, data.strip().split(" "))) + extrinsic_mat.append(row) + + translation = list(map(float, cal_info[10].strip().split(" "))) + + return { + "intrisic_mat": intrisic_mat, + "extrinsic_mat": { + "rotation": extrinsic_mat, + "translation": translation, + }, + } + + @staticmethod + def _parse_head_pose_info(head_pose_file): + with open(head_pose_file, "r", encoding="utf-8") as f: + head_pose_info = f.read().splitlines() + + rotation = [] + for data in head_pose_info[:3]: + row = list(map(float, data.strip().split(" "))) + rotation.append(row) + + center = list(map(float, head_pose_info[4].strip().split(" "))) + + return { + "center": center, + "rotation": rotation, + } + + @staticmethod + def _get_head_pose_information(path): + head_pose_files = sorted(glob.glob(os.path.join(path, "*_pose.txt"))) + + head_poses_info = [] + + for head_pose_file in head_pose_files: + head_pose = BiwiKinectHeadPose._parse_head_pose_info(head_pose_file) + head_poses_info.append(head_pose) + + return head_poses_info + + def _generate_examples(self, dataset_path): + + idx = 0 + folders = os.listdir(dataset_path) + for item in folders: + sequence_number = item + sequence_base_path = os.path.join(dataset_path, sequence_number) + if os.path.isdir(sequence_base_path): + rgb_files = sorted(glob.glob(os.path.join(sequence_base_path, "*.png"))) + depth_files = sorted(glob.glob(os.path.join(sequence_base_path, "*.bin"))) + head_template_path = os.path.join(dataset_path, sequence_number + ".obj") + rgb_cal = self._get_calibration_information(os.path.join(sequence_base_path, "rgb.cal")) + depth_cal = self._get_calibration_information(os.path.join(sequence_base_path, "depth.cal")) + head_pose_gt = self._get_head_pose_information(sequence_base_path) + + yield idx, { + "sequence_number": sequence_number, + "subject_id": _sequence_to_subject_map[sequence_number], + "rgb": rgb_files, + "rgb_cal": rgb_cal, + "depth": depth_files, + "depth_cal": depth_cal, + "head_pose_gt": head_pose_gt, + "head_template": head_template_path, + } + + idx += 1 diff --git a/datasets/biwi_kinect_head_pose/dataset_infos.json b/datasets/biwi_kinect_head_pose/dataset_infos.json new file mode 100644 index 00000000000..22f03c8e05b --- /dev/null +++ b/datasets/biwi_kinect_head_pose/dataset_infos.json @@ -0,0 +1 @@ +{"default": {"description": "The Biwi Kinect Head Pose Database is acquired with the Microsoft Kinect sensor, a structured IR light device.It contains 15K images of 20 people with 6 females and 14 males where 4 people were recorded twice.\n", "citation": "@article{fanelli_IJCV,\n author = {Fanelli, Gabriele and Dantone, Matthias and Gall, Juergen and Fossati, Andrea and Van Gool, Luc},\n title = {Random Forests for Real Time 3D Face Analysis},\n journal = {Int. J. Comput. Vision},\n year = {2013},\n month = {February},\n volume = {101},\n number = {3},\n pages = {437--458}\n}\n", "homepage": "https://icu.ee.ethz.ch/research/datsets.html", "license": "This database is made available for non-commercial use such as university research and education.", "features": {"sequence_number": {"dtype": "string", "id": null, "_type": "Value"}, "subject_id": {"dtype": "string", "id": null, "_type": "Value"}, "rgb": {"feature": {"decode": true, "id": null, "_type": "Image"}, "length": -1, "id": null, "_type": "Sequence"}, "rgb_cal": {"intrisic_mat": {"shape": [3, 3], "dtype": "float64", "id": null, "_type": "Array2D"}, "extrinsic_mat": {"rotation": {"shape": [3, 3], "dtype": "float64", "id": null, "_type": "Array2D"}, "translation": {"feature": {"dtype": "float64", "id": null, "_type": "Value"}, "length": 3, "id": null, "_type": "Sequence"}}}, "depth": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "depth_cal": {"intrisic_mat": {"shape": [3, 3], "dtype": "float64", "id": null, "_type": "Array2D"}, "extrinsic_mat": {"rotation": {"shape": [3, 3], "dtype": "float64", "id": null, "_type": "Array2D"}, "translation": {"feature": {"dtype": "float64", "id": null, "_type": "Value"}, "length": 3, "id": null, "_type": "Sequence"}}}, "head_pose_gt": {"feature": {"center": {"feature": {"dtype": "float64", "id": null, "_type": "Value"}, "length": 3, "id": null, "_type": "Sequence"}, "rotation": {"shape": [3, 3], "dtype": "float64", "id": null, "_type": "Array2D"}}, "length": -1, "id": null, "_type": "Sequence"}, "head_template": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "biwi_kinect_head_pose", "config_name": "default", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 6914063, "num_examples": 24, "dataset_name": "biwi_kinect_head_pose"}}, "download_checksums": {"https://data.vision.ee.ethz.ch/cvl/gfanelli/kinect_head_pose_db.tgz": {"num_bytes": 6014398431, "checksum": "d8fc0fee11b6b865b18b292de7c21dd2181492bd770c4fe13821e8dc630f5549"}}, "download_size": 6014398431, "post_processing_size": null, "dataset_size": 6914063, "size_in_bytes": 6021312494}} \ No newline at end of file diff --git a/datasets/biwi_kinect_head_pose/dummy/1.0.0/dummy_data.zip b/datasets/biwi_kinect_head_pose/dummy/1.0.0/dummy_data.zip new file mode 100644 index 00000000000..3c57aa50670 Binary files /dev/null and b/datasets/biwi_kinect_head_pose/dummy/1.0.0/dummy_data.zip differ diff --git a/datasets/blbooks/README.md b/datasets/blbooks/README.md index 2542ce04f9a..835d672c8e8 100644 --- a/datasets/blbooks/README.md +++ b/datasets/blbooks/README.md @@ -3,14 +3,14 @@ annotations_creators: - no-annotation language_creators: - machine-generated -languages: -- en -- fr +language: - de +- en - es +- fr - it - nl -licenses: +license: - cc0-1.0 multilinguality: - multilingual @@ -20,10 +20,12 @@ size_categories: source_datasets: - original task_categories: -- sequence-modeling +- text-generation +- fill-mask - other task_ids: - language-modeling +- masked-language-modeling - other-other-digital-humanities-research --- diff --git a/datasets/blbooksgenre/README.md b/datasets/blbooksgenre/README.md index 6a3a6aee0e0..803e8d63b78 100644 --- a/datasets/blbooksgenre/README.md +++ b/datasets/blbooksgenre/README.md @@ -4,32 +4,34 @@ annotations_creators: language_creators: - crowdsourced - expert-generated -languages: -- en +language: - de +- en - fr - nl -licenses: +license: - cc0-1.0 multilinguality: - multilingual pretty_name: British Library Books Genre size_categories: - title_genre_classifiction: - - 1K The Asirra data set +> +> Web services are often protected with a challenge that's supposed to be easy for people to solve, but difficult for computers. Such a challenge is often called a [CAPTCHA](http://www.captcha.net/) (Completely Automated Public Turing test to tell Computers and Humans Apart) or HIP (Human Interactive Proof). HIPs are used for many purposes, such as to reduce email and blog spam and prevent brute-force attacks on web site passwords. +> +> Asirra (Animal Species Image Recognition for Restricting Access) is a HIP that works by asking users to identify photographs of cats and dogs. This task is difficult for computers, but studies have shown that people can accomplish it quickly and accurately. Many even think it's fun! Here is an example of the Asirra interface: +> +> Asirra is unique because of its partnership with [Petfinder.com](https://www.petfinder.com/), the world's largest site devoted to finding homes for homeless pets. They've provided Microsoft Research with over three million images of cats and dogs, manually classified by people at thousands of animal shelters across the United States. Kaggle is fortunate to offer a subset of this data for fun and research. ### Supported Tasks and Leaderboards @@ -65,7 +76,7 @@ A large set of images of cats and dogs. There are 1738 corrupted images that are ### Languages -English +English. ## Dataset Structure @@ -75,9 +86,8 @@ A sample from the training set is provided below: ``` { - 'image_file_path': '/root/.cache/huggingface/datasets/downloads/extracted/6e1e8c9052e9f3f7ecbcb4b90860668f81c1d36d86cc9606d49066f8da8bfb4f/PetImages/Cat/1.jpg', 'image': , - 'label': 0 + 'labels': 0 } ``` @@ -85,7 +95,6 @@ A sample from the training set is provided below: The data instances have the following fields: -- `image_file_path`: a `string` filepath to an image. - `image`: A `PIL.Image.Image` object containing the image. Note that when accessing the image column: `dataset[0]["image"]` the image file is automatically decoded. Decoding of a large number of image files might take a significant amount of time. Thus it is important to first query the sample index before the `"image"` column, *i.e.* `dataset[0]["image"]` should **always** be preferred over `dataset["image"][0]`. - `labels`: an `int` classification label. @@ -108,27 +117,37 @@ Class Label Mappings: ### Curation Rationale -[More Information Needed] +This subset was to built to test whether computer vision algorithms can beat the Asirra CAPTCHA: + +From the competition page: + +> Image recognition attacks +> +> While random guessing is the easiest form of attack, various forms of image recognition can allow an attacker to make guesses that are better than random. There is enormous diversity in the photo database (a wide variety of backgrounds, angles, poses, lighting, etc.), making accurate automatic classification difficult. In an informal poll conducted many years ago, computer vision experts posited that a classifier with better than 60% accuracy would be difficult without a major advance in the state of the art. For reference, a 60% classifier improves the guessing probability of a 12-image HIP from 1/4096 to 1/459. ### Source Data #### Initial Data Collection and Normalization -[More Information Needed] +This dataset is a subset of the Asirra dataset. + +From the competition page: + +> Asirra is unique because of its partnership with Petfinder.com, the world's largest site devoted to finding homes for homeless pets. They've provided Microsoft Research with over three million images of cats and dogs, manually classified by people at thousands of animal shelters across the United States. #### Who are the source language producers? -[More Information Needed] +The users of [Petfinder.com](https://www.petfinder.com/). ### Annotations #### Annotation process -[More Information Needed] +The images were annotated by selecting a pet category on [Petfinder.com](https://www.petfinder.com/). #### Who are the annotators? -[More Information Needed] +The users of [Petfinder.com](https://www.petfinder.com/). ### Personal and Sensitive Information @@ -142,7 +161,11 @@ Class Label Mappings: ### Discussion of Biases -[More Information Needed] +From the paper: + +> Unlike many image-based CAPTCHAs which are abstract or subjective, Asirra’s challenges are concrete, inoffensive (cute, by some accounts), require no specialized or culturally biased knowledge, and have definite ground truth. This +makes Asirra less frustrating for humans. Some beta-testers found it fun. The four-year-old child of one asked several times to “play the cat and dog game again.” + ### Other Known Limitations @@ -160,7 +183,7 @@ Class Label Mappings: ### Citation Information -``` +```bibtex @Inproceedings (Conference){asirra-a-captcha-that-exploits-interest-aligned-manual-image-categorization, author = {Elson, Jeremy and Douceur, John (JD) and Howell, Jon and Saul, Jared}, title = {Asirra: A CAPTCHA that Exploits Interest-Aligned Manual Image Categorization}, diff --git a/datasets/cats_vs_dogs/cats_vs_dogs.py b/datasets/cats_vs_dogs/cats_vs_dogs.py index ec69e7fe2a9..ecfd3d0874c 100644 --- a/datasets/cats_vs_dogs/cats_vs_dogs.py +++ b/datasets/cats_vs_dogs/cats_vs_dogs.py @@ -23,7 +23,7 @@ logger = datasets.logging.get_logger(__name__) -_URL = "https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_3367a.zip" +_URL = "https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_5340.zip" _HOMEPAGE = "https://www.microsoft.com/en-us/download/details.aspx?id=54765" @@ -44,14 +44,13 @@ class CatsVsDogs(datasets.GeneratorBasedBuilder): - VERSION = datasets.Version("0.0.1") + VERSION = datasets.Version("1.0.0") def _info(self): return datasets.DatasetInfo( description=_DESCRIPTION, features=datasets.Features( { - "image_file_path": datasets.Value("string"), "image": datasets.Image(), "labels": datasets.features.ClassLabel(names=["cat", "dog"]), } @@ -76,7 +75,6 @@ def _generate_examples(self, files): with open(file, "rb") as f: if b"JFIF" in f.peek(10): yield str(i), { - "image_file_path": file, "image": file, "labels": os.path.basename(os.path.dirname(file)).lower(), } diff --git a/datasets/cats_vs_dogs/dataset_infos.json b/datasets/cats_vs_dogs/dataset_infos.json index 868379941db..74b0c4ed688 100644 --- a/datasets/cats_vs_dogs/dataset_infos.json +++ b/datasets/cats_vs_dogs/dataset_infos.json @@ -1 +1 @@ -{"default": {"description": "A large set of images of cats and dogs. There are 1738 corrupted images that are dropped.", "citation": "@Inproceedings (Conference){asirra-a-captcha-that-exploits-interest-aligned-manual-image-categorization,\n author = {Elson, Jeremy and Douceur, John (JD) and Howell, Jon and Saul, Jared},\n title = {Asirra: A CAPTCHA that Exploits Interest-Aligned Manual Image Categorization},\n booktitle = {Proceedings of 14th ACM Conference on Computer and Communications Security (CCS)},\n year = {2007},\n month = {October},\n publisher = {Association for Computing Machinery, Inc.},\n url = {https://www.microsoft.com/en-us/research/publication/asirra-a-captcha-that-exploits-interest-aligned-manual-image-categorization/},\n edition = {Proceedings of 14th ACM Conference on Computer and Communications Security (CCS)},\n}\n", "homepage": "https://www.microsoft.com/en-us/download/details.aspx?id=54765", "license": "", "features": {"image_file_path": {"dtype": "string", "id": null, "_type": "Value"}, "image": {"decode": true, "id": null, "_type": "Image"}, "labels": {"num_classes": 2, "names": ["cat", "dog"], "id": null, "_type": "ClassLabel"}}, "post_processed": null, "supervised_keys": {"input": "image", "output": "labels"}, "task_templates": [{"task": "image-classification", "image_column": "image", "label_column": "labels"}], "builder_name": "cats_vs_dogs", "config_name": "default", "version": {"version_str": "0.0.1", "description": null, "major": 0, "minor": 0, "patch": 1}, "splits": {"train": {"name": "train", "num_bytes": 7593077, "num_examples": 23410, "dataset_name": "cats_vs_dogs"}}, "download_checksums": {"https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_3367a.zip": {"num_bytes": 824894548, "checksum": "f9553e426bd725354ed3a27e3c6920caadb55c835d1ebd880d2e56d3f1fbb22b"}}, "download_size": 824894548, "post_processing_size": null, "dataset_size": 7593077, "size_in_bytes": 832487625}} \ No newline at end of file +{"default": {"description": "A large set of images of cats and dogs. There are 1738 corrupted images that are dropped.", "citation": "@Inproceedings (Conference){asirra-a-captcha-that-exploits-interest-aligned-manual-image-categorization,\n author = {Elson, Jeremy and Douceur, John (JD) and Howell, Jon and Saul, Jared},\n title = {Asirra: A CAPTCHA that Exploits Interest-Aligned Manual Image Categorization},\n booktitle = {Proceedings of 14th ACM Conference on Computer and Communications Security (CCS)},\n year = {2007},\n month = {October},\n publisher = {Association for Computing Machinery, Inc.},\n url = {https://www.microsoft.com/en-us/research/publication/asirra-a-captcha-that-exploits-interest-aligned-manual-image-categorization/},\n edition = {Proceedings of 14th ACM Conference on Computer and Communications Security (CCS)},\n}\n", "homepage": "https://www.microsoft.com/en-us/download/details.aspx?id=54765", "license": "", "features": {"image": {"decode": true, "id": null, "_type": "Image"}, "labels": {"num_classes": 2, "names": ["cat", "dog"], "id": null, "_type": "ClassLabel"}}, "post_processed": null, "supervised_keys": {"input": "image", "output": "labels"}, "task_templates": [{"task": "image-classification", "image_column": "image", "label_column": "labels"}], "builder_name": "cats_vs_dogs", "config_name": "default", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 3893603, "num_examples": 23422, "dataset_name": "cats_vs_dogs"}}, "download_checksums": {"https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_5340.zip": {"num_bytes": 824887076, "checksum": "b7974bd00a84a99921f36ee4403f089853777b5ae8d151c76a86e64900334af9"}}, "download_size": 824887076, "post_processing_size": null, "dataset_size": 3893603, "size_in_bytes": 828780679}} \ No newline at end of file diff --git a/datasets/cats_vs_dogs/dummy/0.0.1/dummy_data.zip b/datasets/cats_vs_dogs/dummy/1.0.0/dummy_data.zip similarity index 100% rename from datasets/cats_vs_dogs/dummy/0.0.1/dummy_data.zip rename to datasets/cats_vs_dogs/dummy/1.0.0/dummy_data.zip diff --git a/datasets/cawac/README.md b/datasets/cawac/README.md index 0db683b591c..29e6e42cb59 100644 --- a/datasets/cawac/README.md +++ b/datasets/cawac/README.md @@ -3,9 +3,9 @@ annotations_creators: - no-annotation language_creators: - found -languages: +language: - ca -licenses: +license: - cc-by-sa-3.0 multilinguality: - monolingual @@ -14,9 +14,11 @@ size_categories: source_datasets: - original task_categories: -- sequence-modeling +- text-generation +- fill-mask task_ids: - language-modeling +- masked-language-modeling paperswithcode_id: cawac pretty_name: caWaC --- diff --git a/datasets/cbt/README.md b/datasets/cbt/README.md index 25e60cb0e02..40da2e47e6b 100644 --- a/datasets/cbt/README.md +++ b/datasets/cbt/README.md @@ -4,48 +4,30 @@ annotations_creators: - machine-generated language_creators: - found -languages: +language: - en -licenses: -- gfdl-1.3 +license: +- gfdl multilinguality: - monolingual size_categories: - CN: - - 100K and around each\n highlight, which is the target summary\n", "citation": "@article{DBLP:journals/corr/SeeLM17,\n author = {Abigail See and\n Peter J. Liu and\n Christopher D. Manning},\n title = {Get To The Point: Summarization with Pointer-Generator Networks},\n journal = {CoRR},\n volume = {abs/1704.04368},\n year = {2017},\n url = {http://arxiv.org/abs/1704.04368},\n archivePrefix = {arXiv},\n eprint = {1704.04368},\n timestamp = {Mon, 13 Aug 2018 16:46:08 +0200},\n biburl = {https://dblp.org/rec/bib/journals/corr/SeeLM17},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}\n\n@inproceedings{hermann2015teaching,\n title={Teaching machines to read and comprehend},\n author={Hermann, Karl Moritz and Kocisky, Tomas and Grefenstette, Edward and Espeholt, Lasse and Kay, Will and Suleyman, Mustafa and Blunsom, Phil},\n booktitle={Advances in neural information processing systems},\n pages={1693--1701},\n year={2015}\n}\n", "homepage": "https://github.com/abisee/cnn-dailymail", "license": "", "features": {"article": {"dtype": "string", "id": null, "_type": "Value"}, "highlights": {"dtype": "string", "id": null, "_type": "Value"}, "id": {"dtype": "string", "id": null, "_type": "Value"}}, "supervised_keys": null, "builder_name": "cnn_dailymail", "config_name": "3.0.0", "version": {"version_str": "3.0.0", "description": "Using cased version.", "datasets_version_to_prepare": null, "major": 3, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1261704307, "num_examples": 287113, "dataset_name": "cnn_dailymail"}, "validation": {"name": "validation", "num_bytes": 57732436, "num_examples": 13368, "dataset_name": "cnn_dailymail"}, "test": {"name": "test", "num_bytes": 49925756, "num_examples": 11490, "dataset_name": "cnn_dailymail"}}, "download_checksums": {"https://drive.google.com/uc?export=download&id=0BwmD_VLjROrfTHk4NFg2SndKcjQ": {"num_bytes": 158577824, "checksum": "e8fbc0027e54e0a916abd9c969eb35f708ed1467d7ef4e3b17a56739d65cb200"}, "https://drive.google.com/uc?export=download&id=0BwmD_VLjROrfM1BxdkxVaTY2bWs": {"num_bytes": 375893739, "checksum": "ad69010002210b7c406718248ee66e65868b9f6820f163aa966369878d14147e"}, "https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/all_test.txt": {"num_bytes": 2109547, "checksum": "c4f5efb5ec2126430a5c156efbd13d0e9c4cb490169e552c38b4a51981a009bd"}, "https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/all_train.txt": {"num_bytes": 46424688, "checksum": "a5cee49f3a6c862c26ce29308236d2a99625ab6c86a43be22d5206b2790d8029"}, "https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/all_val.txt": {"num_bytes": 2433674, "checksum": "81887e982b045083409c6ee838aede8ff4b97291605bcfb21bffc456a16991db"}}, "download_size": 585439472, "dataset_size": 1369362499, "size_in_bytes": 1954801971}, "1.0.0": {"description": "CNN/DailyMail non-anonymized summarization dataset.\n\nThere are two features:\n - article: text of news article, used as the document to be summarized\n - highlights: joined text of highlights with and around each\n highlight, which is the target summary\n", "citation": "@article{DBLP:journals/corr/SeeLM17,\n author = {Abigail See and\n Peter J. Liu and\n Christopher D. Manning},\n title = {Get To The Point: Summarization with Pointer-Generator Networks},\n journal = {CoRR},\n volume = {abs/1704.04368},\n year = {2017},\n url = {http://arxiv.org/abs/1704.04368},\n archivePrefix = {arXiv},\n eprint = {1704.04368},\n timestamp = {Mon, 13 Aug 2018 16:46:08 +0200},\n biburl = {https://dblp.org/rec/bib/journals/corr/SeeLM17},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}\n\n@inproceedings{hermann2015teaching,\n title={Teaching machines to read and comprehend},\n author={Hermann, Karl Moritz and Kocisky, Tomas and Grefenstette, Edward and Espeholt, Lasse and Kay, Will and Suleyman, Mustafa and Blunsom, Phil},\n booktitle={Advances in neural information processing systems},\n pages={1693--1701},\n year={2015}\n}\n", "homepage": "https://github.com/abisee/cnn-dailymail", "license": "", "features": {"article": {"dtype": "string", "id": null, "_type": "Value"}, "highlights": {"dtype": "string", "id": null, "_type": "Value"}, "id": {"dtype": "string", "id": null, "_type": "Value"}}, "supervised_keys": null, "builder_name": "cnn_dailymail", "config_name": "1.0.0", "version": {"version_str": "1.0.0", "description": "", "datasets_version_to_prepare": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1261704307, "num_examples": 287113, "dataset_name": "cnn_dailymail"}, "validation": {"name": "validation", "num_bytes": 57732436, "num_examples": 13368, "dataset_name": "cnn_dailymail"}, "test": {"name": "test", "num_bytes": 49925756, "num_examples": 11490, "dataset_name": "cnn_dailymail"}}, "download_checksums": {"https://drive.google.com/uc?export=download&id=0BwmD_VLjROrfTHk4NFg2SndKcjQ": {"num_bytes": 158577824, "checksum": "e8fbc0027e54e0a916abd9c969eb35f708ed1467d7ef4e3b17a56739d65cb200"}, "https://drive.google.com/uc?export=download&id=0BwmD_VLjROrfM1BxdkxVaTY2bWs": {"num_bytes": 375893739, "checksum": "ad69010002210b7c406718248ee66e65868b9f6820f163aa966369878d14147e"}, "https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/all_test.txt": {"num_bytes": 2109547, "checksum": "c4f5efb5ec2126430a5c156efbd13d0e9c4cb490169e552c38b4a51981a009bd"}, "https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/all_train.txt": {"num_bytes": 46424688, "checksum": "a5cee49f3a6c862c26ce29308236d2a99625ab6c86a43be22d5206b2790d8029"}, "https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/all_val.txt": {"num_bytes": 2433674, "checksum": "81887e982b045083409c6ee838aede8ff4b97291605bcfb21bffc456a16991db"}}, "download_size": 585439472, "dataset_size": 1369362499, "size_in_bytes": 1954801971}, "2.0.0": {"description": "CNN/DailyMail non-anonymized summarization dataset.\n\nThere are two features:\n - article: text of news article, used as the document to be summarized\n - highlights: joined text of highlights with and around each\n highlight, which is the target summary\n", "citation": "@article{DBLP:journals/corr/SeeLM17,\n author = {Abigail See and\n Peter J. Liu and\n Christopher D. Manning},\n title = {Get To The Point: Summarization with Pointer-Generator Networks},\n journal = {CoRR},\n volume = {abs/1704.04368},\n year = {2017},\n url = {http://arxiv.org/abs/1704.04368},\n archivePrefix = {arXiv},\n eprint = {1704.04368},\n timestamp = {Mon, 13 Aug 2018 16:46:08 +0200},\n biburl = {https://dblp.org/rec/bib/journals/corr/SeeLM17},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}\n\n@inproceedings{hermann2015teaching,\n title={Teaching machines to read and comprehend},\n author={Hermann, Karl Moritz and Kocisky, Tomas and Grefenstette, Edward and Espeholt, Lasse and Kay, Will and Suleyman, Mustafa and Blunsom, Phil},\n booktitle={Advances in neural information processing systems},\n pages={1693--1701},\n year={2015}\n}\n", "homepage": "https://github.com/abisee/cnn-dailymail", "license": "", "features": {"article": {"dtype": "string", "id": null, "_type": "Value"}, "highlights": {"dtype": "string", "id": null, "_type": "Value"}, "id": {"dtype": "string", "id": null, "_type": "Value"}}, "supervised_keys": null, "builder_name": "cnn_dailymail", "config_name": "2.0.0", "version": {"version_str": "2.0.0", "description": "Separate target sentences with newline.", "datasets_version_to_prepare": null, "major": 2, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1261704307, "num_examples": 287113, "dataset_name": "cnn_dailymail"}, "validation": {"name": "validation", "num_bytes": 57732436, "num_examples": 13368, "dataset_name": "cnn_dailymail"}, "test": {"name": "test", "num_bytes": 49925756, "num_examples": 11490, "dataset_name": "cnn_dailymail"}}, "download_checksums": {"https://drive.google.com/uc?export=download&id=0BwmD_VLjROrfTHk4NFg2SndKcjQ": {"num_bytes": 158577824, "checksum": "e8fbc0027e54e0a916abd9c969eb35f708ed1467d7ef4e3b17a56739d65cb200"}, "https://drive.google.com/uc?export=download&id=0BwmD_VLjROrfM1BxdkxVaTY2bWs": {"num_bytes": 375893739, "checksum": "ad69010002210b7c406718248ee66e65868b9f6820f163aa966369878d14147e"}, "https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/all_test.txt": {"num_bytes": 2109547, "checksum": "c4f5efb5ec2126430a5c156efbd13d0e9c4cb490169e552c38b4a51981a009bd"}, "https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/all_train.txt": {"num_bytes": 46424688, "checksum": "a5cee49f3a6c862c26ce29308236d2a99625ab6c86a43be22d5206b2790d8029"}, "https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/all_val.txt": {"num_bytes": 2433674, "checksum": "81887e982b045083409c6ee838aede8ff4b97291605bcfb21bffc456a16991db"}}, "download_size": 585439472, "dataset_size": 1369362499, "size_in_bytes": 1954801971}} \ No newline at end of file +{"3.0.0": {"description": "CNN/DailyMail non-anonymized summarization dataset.\n\nThere are two features:\n - article: text of news article, used as the document to be summarized\n - highlights: joined text of highlights with and around each\n highlight, which is the target summary\n", "citation": "@article{DBLP:journals/corr/SeeLM17,\n author = {Abigail See and\n Peter J. Liu and\n Christopher D. Manning},\n title = {Get To The Point: Summarization with Pointer-Generator Networks},\n journal = {CoRR},\n volume = {abs/1704.04368},\n year = {2017},\n url = {http://arxiv.org/abs/1704.04368},\n archivePrefix = {arXiv},\n eprint = {1704.04368},\n timestamp = {Mon, 13 Aug 2018 16:46:08 +0200},\n biburl = {https://dblp.org/rec/bib/journals/corr/SeeLM17},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}\n\n@inproceedings{hermann2015teaching,\n title={Teaching machines to read and comprehend},\n author={Hermann, Karl Moritz and Kocisky, Tomas and Grefenstette, Edward and Espeholt, Lasse and Kay, Will and Suleyman, Mustafa and Blunsom, Phil},\n booktitle={Advances in neural information processing systems},\n pages={1693--1701},\n year={2015}\n}\n", "homepage": "https://github.com/abisee/cnn-dailymail", "license": "", "features": {"article": {"dtype": "string", "id": null, "_type": "Value"}, "highlights": {"dtype": "string", "id": null, "_type": "Value"}, "id": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "cnn_dailymail", "config_name": "3.0.0", "version": {"version_str": "3.0.0", "description": "Using cased version.", "major": 3, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1261704133, "num_examples": 287113, "dataset_name": "cnn_dailymail"}, "validation": {"name": "validation", "num_bytes": 57732436, "num_examples": 13368, "dataset_name": "cnn_dailymail"}, "test": {"name": "test", "num_bytes": 49925756, "num_examples": 11490, "dataset_name": "cnn_dailymail"}}, "download_checksums": {"https://huggingface.co/datasets/cnn_dailymail/resolve/11343c3752184397d56efc19a8a7cceb68089318/data/cnn_stories.tgz": {"num_bytes": 158577824, "checksum": "e8fbc0027e54e0a916abd9c969eb35f708ed1467d7ef4e3b17a56739d65cb200"}, "https://huggingface.co/datasets/cnn_dailymail/resolve/11343c3752184397d56efc19a8a7cceb68089318/data/dailymail_stories.tgz": {"num_bytes": 375893739, "checksum": "ad69010002210b7c406718248ee66e65868b9f6820f163aa966369878d14147e"}, "https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/all_train.txt": {"num_bytes": 46424688, "checksum": "a5cee49f3a6c862c26ce29308236d2a99625ab6c86a43be22d5206b2790d8029"}, "https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/all_val.txt": {"num_bytes": 2433674, "checksum": "81887e982b045083409c6ee838aede8ff4b97291605bcfb21bffc456a16991db"}, "https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/all_test.txt": {"num_bytes": 2109547, "checksum": "c4f5efb5ec2126430a5c156efbd13d0e9c4cb490169e552c38b4a51981a009bd"}}, "download_size": 585439472, "post_processing_size": null, "dataset_size": 1369362325, "size_in_bytes": 1954801797}, "1.0.0": {"description": "CNN/DailyMail non-anonymized summarization dataset.\n\nThere are two features:\n - article: text of news article, used as the document to be summarized\n - highlights: joined text of highlights with and around each\n highlight, which is the target summary\n", "citation": "@article{DBLP:journals/corr/SeeLM17,\n author = {Abigail See and\n Peter J. Liu and\n Christopher D. Manning},\n title = {Get To The Point: Summarization with Pointer-Generator Networks},\n journal = {CoRR},\n volume = {abs/1704.04368},\n year = {2017},\n url = {http://arxiv.org/abs/1704.04368},\n archivePrefix = {arXiv},\n eprint = {1704.04368},\n timestamp = {Mon, 13 Aug 2018 16:46:08 +0200},\n biburl = {https://dblp.org/rec/bib/journals/corr/SeeLM17},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}\n\n@inproceedings{hermann2015teaching,\n title={Teaching machines to read and comprehend},\n author={Hermann, Karl Moritz and Kocisky, Tomas and Grefenstette, Edward and Espeholt, Lasse and Kay, Will and Suleyman, Mustafa and Blunsom, Phil},\n booktitle={Advances in neural information processing systems},\n pages={1693--1701},\n year={2015}\n}\n", "homepage": "https://github.com/abisee/cnn-dailymail", "license": "", "features": {"article": {"dtype": "string", "id": null, "_type": "Value"}, "highlights": {"dtype": "string", "id": null, "_type": "Value"}, "id": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "cnn_dailymail", "config_name": "1.0.0", "version": {"version_str": "1.0.0", "description": "", "major": 1, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1261704133, "num_examples": 287113, "dataset_name": "cnn_dailymail"}, "validation": {"name": "validation", "num_bytes": 57732436, "num_examples": 13368, "dataset_name": "cnn_dailymail"}, "test": {"name": "test", "num_bytes": 49925756, "num_examples": 11490, "dataset_name": "cnn_dailymail"}}, "download_checksums": {"https://huggingface.co/datasets/cnn_dailymail/resolve/11343c3752184397d56efc19a8a7cceb68089318/data/cnn_stories.tgz": {"num_bytes": 158577824, "checksum": "e8fbc0027e54e0a916abd9c969eb35f708ed1467d7ef4e3b17a56739d65cb200"}, "https://huggingface.co/datasets/cnn_dailymail/resolve/11343c3752184397d56efc19a8a7cceb68089318/data/dailymail_stories.tgz": {"num_bytes": 375893739, "checksum": "ad69010002210b7c406718248ee66e65868b9f6820f163aa966369878d14147e"}, "https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/all_train.txt": {"num_bytes": 46424688, "checksum": "a5cee49f3a6c862c26ce29308236d2a99625ab6c86a43be22d5206b2790d8029"}, "https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/all_val.txt": {"num_bytes": 2433674, "checksum": "81887e982b045083409c6ee838aede8ff4b97291605bcfb21bffc456a16991db"}, "https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/all_test.txt": {"num_bytes": 2109547, "checksum": "c4f5efb5ec2126430a5c156efbd13d0e9c4cb490169e552c38b4a51981a009bd"}}, "download_size": 585439472, "post_processing_size": null, "dataset_size": 1369362325, "size_in_bytes": 1954801797}, "2.0.0": {"description": "CNN/DailyMail non-anonymized summarization dataset.\n\nThere are two features:\n - article: text of news article, used as the document to be summarized\n - highlights: joined text of highlights with and around each\n highlight, which is the target summary\n", "citation": "@article{DBLP:journals/corr/SeeLM17,\n author = {Abigail See and\n Peter J. Liu and\n Christopher D. Manning},\n title = {Get To The Point: Summarization with Pointer-Generator Networks},\n journal = {CoRR},\n volume = {abs/1704.04368},\n year = {2017},\n url = {http://arxiv.org/abs/1704.04368},\n archivePrefix = {arXiv},\n eprint = {1704.04368},\n timestamp = {Mon, 13 Aug 2018 16:46:08 +0200},\n biburl = {https://dblp.org/rec/bib/journals/corr/SeeLM17},\n bibsource = {dblp computer science bibliography, https://dblp.org}\n}\n\n@inproceedings{hermann2015teaching,\n title={Teaching machines to read and comprehend},\n author={Hermann, Karl Moritz and Kocisky, Tomas and Grefenstette, Edward and Espeholt, Lasse and Kay, Will and Suleyman, Mustafa and Blunsom, Phil},\n booktitle={Advances in neural information processing systems},\n pages={1693--1701},\n year={2015}\n}\n", "homepage": "https://github.com/abisee/cnn-dailymail", "license": "", "features": {"article": {"dtype": "string", "id": null, "_type": "Value"}, "highlights": {"dtype": "string", "id": null, "_type": "Value"}, "id": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "cnn_dailymail", "config_name": "2.0.0", "version": {"version_str": "2.0.0", "description": "Separate target sentences with newline.", "major": 2, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1261704133, "num_examples": 287113, "dataset_name": "cnn_dailymail"}, "validation": {"name": "validation", "num_bytes": 57732436, "num_examples": 13368, "dataset_name": "cnn_dailymail"}, "test": {"name": "test", "num_bytes": 49925756, "num_examples": 11490, "dataset_name": "cnn_dailymail"}}, "download_checksums": {"https://huggingface.co/datasets/cnn_dailymail/resolve/11343c3752184397d56efc19a8a7cceb68089318/data/cnn_stories.tgz": {"num_bytes": 158577824, "checksum": "e8fbc0027e54e0a916abd9c969eb35f708ed1467d7ef4e3b17a56739d65cb200"}, "https://huggingface.co/datasets/cnn_dailymail/resolve/11343c3752184397d56efc19a8a7cceb68089318/data/dailymail_stories.tgz": {"num_bytes": 375893739, "checksum": "ad69010002210b7c406718248ee66e65868b9f6820f163aa966369878d14147e"}, "https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/all_train.txt": {"num_bytes": 46424688, "checksum": "a5cee49f3a6c862c26ce29308236d2a99625ab6c86a43be22d5206b2790d8029"}, "https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/all_val.txt": {"num_bytes": 2433674, "checksum": "81887e982b045083409c6ee838aede8ff4b97291605bcfb21bffc456a16991db"}, "https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/all_test.txt": {"num_bytes": 2109547, "checksum": "c4f5efb5ec2126430a5c156efbd13d0e9c4cb490169e552c38b4a51981a009bd"}}, "download_size": 585439472, "post_processing_size": null, "dataset_size": 1369362325, "size_in_bytes": 1954801797}} \ No newline at end of file diff --git a/datasets/cnn_dailymail/dummy/1.0.0/1.0.0/dummy_data.zip b/datasets/cnn_dailymail/dummy/1.0.0/1.0.0/dummy_data.zip index a6efad7ad3e..4b4cb90442a 100644 Binary files a/datasets/cnn_dailymail/dummy/1.0.0/1.0.0/dummy_data.zip and b/datasets/cnn_dailymail/dummy/1.0.0/1.0.0/dummy_data.zip differ diff --git a/datasets/cnn_dailymail/dummy/2.0.0/2.0.0/dummy_data.zip b/datasets/cnn_dailymail/dummy/2.0.0/2.0.0/dummy_data.zip index d668e1e424b..4b4cb90442a 100644 Binary files a/datasets/cnn_dailymail/dummy/2.0.0/2.0.0/dummy_data.zip and b/datasets/cnn_dailymail/dummy/2.0.0/2.0.0/dummy_data.zip differ diff --git a/datasets/cnn_dailymail/dummy/3.0.0/3.0.0/dummy_data.zip b/datasets/cnn_dailymail/dummy/3.0.0/3.0.0/dummy_data.zip index 817382a9a0e..4b4cb90442a 100644 Binary files a/datasets/cnn_dailymail/dummy/3.0.0/3.0.0/dummy_data.zip and b/datasets/cnn_dailymail/dummy/3.0.0/3.0.0/dummy_data.zip differ diff --git a/datasets/coached_conv_pref/README.md b/datasets/coached_conv_pref/README.md index b7d7ac2904a..26d54166f89 100644 --- a/datasets/coached_conv_pref/README.md +++ b/datasets/coached_conv_pref/README.md @@ -3,9 +3,9 @@ annotations_creators: - expert-generated language_creators: - found -languages: +language: - en -licenses: +license: - cc-by-sa-4.0 multilinguality: - monolingual @@ -15,8 +15,9 @@ source_datasets: - original task_categories: - other -- sequence-modeling -- structure-prediction +- text-generation +- fill-mask +- token-classification task_ids: - other-other-Conversational Recommendation - dialogue-modeling diff --git a/datasets/coarse_discourse/README.md b/datasets/coarse_discourse/README.md index c0a4bee4b91..6697a2d7cd1 100644 --- a/datasets/coarse_discourse/README.md +++ b/datasets/coarse_discourse/README.md @@ -1,6 +1,24 @@ --- -paperswithcode_id: coarse-discourse +annotations_creators: +- crowdsourced +language: +- en +language_creators: +- found +license: +- cc-by-4.0 +multilinguality: +- monolingual pretty_name: Coarse Discourse +size_categories: +- 100K To arrive at CC12M, we keep +the image-text filtering intact, and relax the unimodal filters only. First, for image-based filtering, we set the maximum ratio of larger to smaller dimension to 2.5 instead of 2. +We still keep only JPEG images with size greater than +400 pixels, and still exclude images that trigger pornography detectors. Second, in text-based filtering, we allow text +between 3 and 256 words in the alt-text. We still discard +candidates with no noun or no determiner, but permit ones +without prepositions. We discard the heuristics regarding +high unique-word ratio covering various POS tags and word +capitalization. We set the maximum fraction of word repetition allowed to 0.2. Given a larger pool of text due to the +above relaxations, the threshold for counting a word type as +rare is increased from 5 to 20 + +> The main motivation for CC3M to +perform text transformation is that a majority of candidate +captions contain ultrafine-grained entities such as proper +names (people, venues, locations, etc.), making it extremely +difficult to learn as part of the image captioning task. In +contrast, we are not restricted by the end task of image caption generation. Our intuition is that relatively more difficult pre-training data would lead to better transferability. +We thus do not perform hypernimization or digit substitution. [...] The only exception to the “keep alt-texts as +raw as possible” rule is performing person-name substitutions, which we identify as necessary to protect the privacy +of the individuals in these images. For this step, we use the +Google Cloud Natural Language APIs to detect all named +entities of type Person, and substitute them by a special token . Around 25% of all the alt-texts in CC12M +are transformed in this fashion. + +#### Who are the source language producers? + +Not specified. + +### Annotations + +#### Annotation process + +Annotations are extracted jointly with the images using the automatic pipeline. + +#### Who are the annotators? + +Not specified. + +### Personal and Sensitive Information + +From the paper: + +> The only exception to the “keep alt-texts as +raw as possible” rule is performing person-name substitutions, which we identify as necessary to protect the privacy +of the individuals in these images. For this step, we use the +Google Cloud Natural Language APIs to detect all named +entities of type Person, and substitute them by a special token . Around 25% of all the alt-texts in CC12M +are transformed in this fashion. + +## Considerations for Using the Data + +### Social Impact of Dataset + +[More Information Needed] + +### Discussion of Biases + +[More Information Needed] + +### Other Known Limitations + +[More Information Needed] + +## Additional Information + +### Dataset Curators + +Soravit Changpinyo, Piyush Sharma, Nan Ding and Radu Soricut. + +### Licensing Information + +The dataset may be freely used for any purpose, although acknowledgement of +Google LLC ("Google") as the data source would be appreciated. The dataset is +provided "AS IS" without any warranty, express or implied. Google disclaims all +liability for any damages, direct or indirect, resulting from the use of the +dataset. + +### Citation Information + +```bibtex +@inproceedings{changpinyo2021cc12m, + title = {{Conceptual 12M}: Pushing Web-Scale Image-Text Pre-Training To Recognize Long-Tail Visual Concepts}, + author = {Changpinyo, Soravit and Sharma, Piyush and Ding, Nan and Soricut, Radu}, + booktitle = {CVPR}, + year = {2021}, +} +``` + +### Contributions + +Thanks to [@thomasw21](https://github.com/thomasw21) for adding this dataset. \ No newline at end of file diff --git a/datasets/conceptual_12m/conceptual_12m.py b/datasets/conceptual_12m/conceptual_12m.py new file mode 100644 index 00000000000..401b41fd6d7 --- /dev/null +++ b/datasets/conceptual_12m/conceptual_12m.py @@ -0,0 +1,77 @@ +# coding=utf-8 +# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Conceptual 12M dataset.""" + +import datasets + + +_CITATION = """\ +@inproceedings{changpinyo2021cc12m, + title = {{Conceptual 12M}: Pushing Web-Scale Image-Text Pre-Training To Recognize Long-Tail Visual Concepts}, + author = {Changpinyo, Soravit and Sharma, Piyush and Ding, Nan and Soricut, Radu}, + booktitle = {CVPR}, + year = {2021}, +} +""" + +_DESCRIPTION = """\ +Conceptual 12M is a large-scale dataset of 12 million +image-text pairs specifically meant to be used for visionand-language pre-training. +Its data collection pipeline is a relaxed version of the one used in Conceptual Captions 3M. +""" + +_HOMEPAGE = "https://github.com/google-research-datasets/conceptual-12m" + +_LICENSE = """\ +The dataset may be freely used for any purpose, although acknowledgement of +Google LLC ("Google") as the data source would be appreciated. The dataset is +provided "AS IS" without any warranty, express or implied. Google disclaims all +liability for any damages, direct or indirect, resulting from the use of the +dataset. +""" + +_URL = "https://storage.googleapis.com/conceptual_12m/cc12m.tsv" + + +class Conceptual12M(datasets.GeneratorBasedBuilder): + """Conceptual 12M dataset.""" + + def _info(self): + features = datasets.Features({"image_url": datasets.Value("string"), "caption": datasets.Value("string")}) + + return datasets.DatasetInfo( + description=_DESCRIPTION, + features=features, + homepage=_HOMEPAGE, + license=_LICENSE, + citation=_CITATION, + ) + + def _split_generators(self, dl_manager): + file = dl_manager.download(_URL) + return [ + datasets.SplitGenerator( + name=datasets.Split.TRAIN, + gen_kwargs={ + "file": file, + }, + ), + ] + + def _generate_examples(self, file): + with open(file, "r", encoding="utf-8") as fi: + for idx, line in enumerate(fi): + image_url, caption = line.split("\t", maxsplit=1) + yield idx, {"image_url": image_url, "caption": caption} diff --git a/datasets/conceptual_12m/dataset_infos.json b/datasets/conceptual_12m/dataset_infos.json new file mode 100644 index 00000000000..1b3fd95d1b1 --- /dev/null +++ b/datasets/conceptual_12m/dataset_infos.json @@ -0,0 +1 @@ +{"default": {"description": "Conceptual 12M is a large-scale dataset of 12 million\nimage-text pairs specifically meant to be used for visionand-language pre-training.\nIts data collection pipeline is a relaxed version of the one used in Conceptual Captions 3M.\n", "citation": "@inproceedings{changpinyo2021cc12m,\n title = {{Conceptual 12M}: Pushing Web-Scale Image-Text Pre-Training To Recognize Long-Tail Visual Concepts},\n author = {Changpinyo, Soravit and Sharma, Piyush and Ding, Nan and Soricut, Radu},\n booktitle = {CVPR},\n year = {2021},\n}\n", "homepage": "https://github.com/google-research-datasets/conceptual-12m", "license": "The dataset may be freely used for any purpose, although acknowledgement of\nGoogle LLC (\"Google\") as the data source would be appreciated. The dataset is\nprovided \"AS IS\" without any warranty, express or implied. Google disclaims all\nliability for any damages, direct or indirect, resulting from the use of the\ndataset.\n", "features": {"image_url": {"dtype": "string", "id": null, "_type": "Value"}, "caption": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "conceptual12_m", "config_name": "default", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 2794168030, "num_examples": 12423374, "dataset_name": "conceptual12_m"}}, "download_checksums": {"https://storage.googleapis.com/conceptual_12m/cc12m.tsv": {"num_bytes": 2707204412, "checksum": "892b549d493c7e75ade10d46c88c9ddabb097790d912b74cfc0ea4ff035ec2c3"}}, "download_size": 2707204412, "post_processing_size": null, "dataset_size": 2794168030, "size_in_bytes": 5501372442}} \ No newline at end of file diff --git a/datasets/conceptual_12m/dummy/0.0.0/dummy_data.zip b/datasets/conceptual_12m/dummy/0.0.0/dummy_data.zip new file mode 100644 index 00000000000..a4da4fad959 Binary files /dev/null and b/datasets/conceptual_12m/dummy/0.0.0/dummy_data.zip differ diff --git a/datasets/conceptual_captions/README.md b/datasets/conceptual_captions/README.md new file mode 100644 index 00000000000..4609f9bcf65 --- /dev/null +++ b/datasets/conceptual_captions/README.md @@ -0,0 +1,256 @@ +--- +annotations_creators: +- found +language_creators: +- found +language: +- en +license: +- other +multilinguality: +- monolingual +size_categories: +- 1M