Skip to content

fix for word_tokenize() Failing to Split English Contractions When Followed by [\t\n\f\r] #814

fix for word_tokenize() Failing to Split English Contractions When Followed by [\t\n\f\r]

fix for word_tokenize() Failing to Split English Contractions When Followed by [\t\n\f\r] #814

Workflow file for this run

name: ci-workflow
# run workflow for these events
on: [push, pull_request, workflow_dispatch]
env:
CORENLP: /home/runner/third/stanford-corenlp
CORENLP_MODELS: /home/runner/third/stanford-corenlp
STANFORD_PARSER: /home/runner/third/stanford-parser
STANFORD_MODELS: /home/runner/third/stanford-postagger
STANFORD_POSTAGGER: /home/runner/third/stanford-postagger
SENNA: /home/runner/third/senna
PROVER9: /home/runner/third/prover9/bin
MEGAM: /home/runner/third/megam
# TADM requires `libtaopetsc.so` from PETSc v2.3.3, and likely has more
# tricky to install requirements, so we don't run tests for it.
# TADM: /home/runner/third/tadm/bin
MALT_PARSER: /home/runner/third/maltparser
jobs:
pre-commit:
name: Run pre-commit
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.x' # run with latest python version
- run: |
pip install pre-commit
pre-commit run --all-files
cache_nltk_data:
name: Cache nltk_data
needs: pre-commit
strategy:
matrix:
os: [ubuntu-latest, windows-latest]
runs-on: ${{ matrix.os }}
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Cache nltk data
uses: actions/cache@v3
id: restore-cache
with:
path: ~/nltk_data
key: nltk_data_${{ secrets.CACHE_VERSION }}
- name: Download nltk data packages on cache miss
run: |
pip install regex # dependencies needed to download nltk data
python -c "import nltk; from pathlib import Path; path = Path('~/nltk_data').expanduser(); path.mkdir(exist_ok=True); nltk.download('all', download_dir=path)"
shell: bash
if: steps.restore-cache.outputs.cache-hit != 'true'
cache_third_party:
name: Cache third party tools
needs: pre-commit
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Cache third party tools
uses: actions/cache@v3
id: restore-cache
with:
path: ~/third
key: third_${{ hashFiles('tools/github_actions/third-party.sh') }}_${{ secrets.CACHE_VERSION }}
- name: Download third party data
run: |
chmod +x ./tools/github_actions/third-party.sh
./tools/github_actions/third-party.sh
if: steps.restore-cache.outputs.cache-hit != 'true'
test:
name: Python ${{ matrix.python-version }} on ${{ matrix.os }}
needs: [cache_nltk_data, cache_third_party]
strategy:
matrix:
python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
os: [ubuntu-latest, macos-latest, windows-latest]
fail-fast: false
runs-on: ${{ matrix.os }}
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Setup python
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Restore cached dependencies
uses: actions/cache@v3
id: restore-cache
with:
path: ${{ env.pythonLocation }}
key: python-dependencies-${{ matrix.os }}-${{ matrix.python-version }}-${{ hashFiles('requirements-ci.txt') }}-${{ env.pythonLocation }}
- name: Install dependencies
run: |
pip install --upgrade pip
pip install --upgrade --requirement requirements-ci.txt
#if: steps.restore-cache.outputs.cache-hit != 'true' # disabled due to a persistent issue with restoring cache on macos runner
- name: Use cached nltk data
uses: actions/cache@v3
with:
path: ~/nltk_data
key: nltk_data_${{ secrets.CACHE_VERSION }}
- name: Use cached third party tools
uses: actions/cache@v3
with:
path: ~/third
key: third_${{ hashFiles('tools/github_actions/third-party.sh') }}_${{ secrets.CACHE_VERSION }}
if: runner.os == 'Linux'
- name: Set up JDK 16
uses: actions/setup-java@v4
with:
distribution: 'zulu'
java-version: '16'
if: runner.os == 'Linux'
- name: Run pytest
shell: bash
run: |
pytest --numprocesses auto -rsx --doctest-modules nltk