From e9b6d17df41c8648cd9abd3d926fb77a90c6f4b6 Mon Sep 17 00:00:00 2001 From: amyeroberts <22614925+amyeroberts@users.noreply.github.com> Date: Wed, 12 Oct 2022 18:32:02 +0100 Subject: [PATCH] Image transforms library (#18520) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Adapt FE methods to transforms library * Mixin for saving the image processor * Base processor skeleton * BatchFeature for packaging image processor outputs * Initial image processor for GLPN * REmove accidental import * Fixup and docs * Mixin for saving the image processor * Fixup and docs * Import BatchFeature from feature_extraction_utils * Fixup and docs * Fixup and docs * Fixup and docs * Fixup and docs * BatchFeature for packaging image processor outputs * Import BatchFeature from feature_extraction_utils * Import BatchFeature from feature_extraction_utils * Fixup and docs * Fixup and docs * BatchFeature for packaging image processor outputs * Import BatchFeature from feature_extraction_utils * Fixup and docs * Mixin for saving the image processor * Fixup and docs * Add rescale back and remove ImageType * fix import mistake * Fix enum var reference * Can transform and specify image data format * Remove redundant function * Update reference * Data format flag for rescale * Fix typo * Fix dimension check * Fixes to make IP and FE outputs match * Add tests for transforms * Add test for utils * Update some docstrings * Make sure in channels last before converting to PIL * Remove default to numpy batching * Fix up * Add docstring and model_input_types * Use feature processor config from hub * Alias GLPN feature extractor to image processor * Alias feature extractor mixin * Add return_numpy=False flag for resize * Fix up * Fix up * Use different frameworks safely * Safely import PIL * Call function checking if PIL available * Only import if vision available * Address Sylvain PR comments Co-authored-by: Sylvain.gugger@gmail.com * Apply suggestions from code review Co-authored-by: Sylvain Gugger Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update src/transformers/image_transforms.py Co-authored-by: Alara Dirik <8944735+alaradirik@users.noreply.github.com> * Update src/transformers/models/glpn/feature_extraction_glpn.py Co-authored-by: NielsRogge <48327001+NielsRogge@users.noreply.github.com> * Add in docstrings * Fix TFSwinSelfAttention to have relative position index as non-trainable weight (#18226) Signed-off-by: Seunghwan Hong * Refactor `TFSwinLayer` to increase serving compatibility (#18352) * Refactor `TFSwinLayer` to increase serving compatibility Signed-off-by: Seunghwan Hong * Fix missed parameters while refactoring Signed-off-by: Seunghwan Hong * Fix window_reverse to calculate batch size Signed-off-by: Seunghwan Hong Co-Authored-By: amyeroberts <22614925+amyeroberts@users.noreply.github.com> Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * Add TF prefix to TF-Res test class (#18481) Co-authored-by: ydshieh * Remove py.typed (#18485) * Fix pipeline tests (#18487) * Fix pipeline tests * Make sure all pipelines tests run with init changes * Use new huggingface_hub tools for download models (#18438) * Draft new cached_file * Initial draft for config and model * Small fixes * Fix first batch of tests * Look in cache when internet is down * Fix last tests * Bad black, not fixing all quality errors * Make diff less * Implement change for TF and Flax models * Add tokenizer and feature extractor * For compatibility with main * Add utils to move the cache and auto-do it at first use. * Quality * Deal with empty commit shas * Deal with empty etag * Address review comments * Fix `test_dbmdz_english` by updating expected values (#18482) Co-authored-by: ydshieh * Move cache folder to huggingface/hub for consistency with hf_hub (#18492) * Move cache folder to just huggingface * Thank you VsCode for this needless import * Move to hub * Forgot one * Update some expected values in `quicktour.mdx` for `resampy 0.3.0` (#18484) Co-authored-by: ydshieh * Forgot one new_ for cache migration * disable Onnx test for google/long-t5-tglobal-base (#18454) Co-authored-by: ydshieh * Typo reported by Joel Grus on TWTR (#18493) * Just re-reading the whole doc every couple of months 😬 (#18489) * Delete valohai.yaml * NLP => ML * typo * website supports https * datasets * 60k + modalities * unrelated link fixing for accelerate * Ok those links were actually broken * Fix link * Make `AutoTokenizer` auto-link * wording tweak * add at least one non-nlp task * `transformers-cli login` => `huggingface-cli login` (#18490) * zero chance anyone's using that constant no? * `transformers-cli login` => `huggingface-cli login` * `transformers-cli repo create` => `huggingface-cli repo create` * `make style` * Add seed setting to image classification example (#18519) * [DX fix] Fixing QA pipeline streaming a dataset. (#18516) * [DX fix] Fixing QA pipeline streaming a dataset. QuestionAnsweringArgumentHandler would iterate over the whole dataset effectively killing all properties of the pipeline. This restores nice properties when using `Dataset` or `Generator` since those are meant to be consumed lazily. * Handling TF better. * Clean up hub (#18497) * Clean up utils.hub * Remove imports * More fixes * Last fix * update fsdp docs (#18521) * updating fsdp documentation * typo fix * Fix compatibility with 1.12 (#17925) * Fix compatibility with 1.12 * Remove pin from examples requirements * Update torch scatter version * Fix compatibility with 1.12 * Remove pin from examples requirements * Update torch scatter version * fix torch.onnx.symbolic_opset12 import * Reject bad version Co-authored-by: ydshieh * Remove debug statement * Specify en in doc-builder README example (#18526) Co-authored-by: Ankur Goyal * New cache fixes: add safeguard before looking in folders (#18522) * unpin resampy (#18527) Co-authored-by: ydshieh * ✨ update to use interlibrary links instead of Markdown (#18500) * Add example of multimodal usage to pipeline tutorial (#18498) * 📝 add example of multimodal usage to pipeline tutorial * 🖍 apply feedbacks * 🖍 apply niels feedback * [VideoMAE] Add model to doc tests (#18523) * Add videomae to doc tests * Add pip install decord Co-authored-by: Niels Rogge * Update perf_train_gpu_one.mdx (#18532) * Update no_trainer.py scripts to include accelerate gradient accumulation wrapper (#18473) * Added accelerate gradient accumulation wrapper to run_image_classification_no_trainer.py example script * make fixup changes * PR comments * changed input to Acceletor based on PR comment, ran make fixup * Added comment explaining the sync_gradients statement * Fixed lr scheduler max steps * Changed run_clm_no_trainer.py script to use accelerate gradient accum wrapper * Fixed all scripts except wav2vec2 pretraining to use accelerate gradient accum wrapper * Added accelerate gradient accum wrapper for wav2vec2_pretraining_no_trainer.py script * make fixup and lr_scheduler step inserted back into run_qa_beam_search_no_trainer.py * removed changes to run_wav2vec2_pretraining_no_trainer.py script and fixed using wrong constant in qa_beam_search_no_trainer.py script * Add Spanish translation of converting_tensorflow_models.mdx (#18512) * Add file in spanish docs to be translated * Finish translation to Spanish * Improve Spanish wording * Add suggested changes from review * Spanish translation of summarization.mdx (#15947) (#18477) * Add Spanish translation of summarization.mdx * Apply suggestions from code review Co-authored-by: Omar U. Espejel Co-authored-by: Omar U. Espejel * Let's not cast them all (#18471) * add correct dtypes when checking for params dtype * forward contrib credits * Update src/transformers/modeling_utils.py Co-authored-by: Thomas Wang <24695242+thomasw21@users.noreply.github.com> * more comments - added more comments on why we cast only floating point parameters * Update src/transformers/modeling_utils.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Co-authored-by: sgugger <35901082+sgugger@users.noreply.github.com> Co-authored-by: Thomas Wang <24695242+thomasw21@users.noreply.github.com> * fix: data2vec-vision Onnx ready-made configuration. (#18427) * feat: add the data2vec conf that are missing https://huggingface.co/docs/transformers/serialization * fix: wrong config * Add mt5 onnx config (#18394) * update features * MT5OnnxConfig added with updated with tests and docs * fix imports * fix onnc_config_cls for mt5 Co-authored-by: Thomas Chaigneau * Minor update of `run_call_with_unpacked_inputs` (#18541) Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Co-authored-by: ydshieh * BART - Fix attention mask device issue on copied models (#18540) * attempt to fix attn mask device * fix bart `_prepare_decoder_attention_mask` - add correct device - run `make fix-copies` to propagate the fix * Adding a new `align_to_words` param to qa pipeline. (#18010) * Adding a new `align_to_words` param to qa pipeline. * Update src/transformers/pipelines/question_answering.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Import protection. Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * 📝 update metric with evaluate (#18535) * Restore _init_weights value in no_init_weights (#18504) * Recover _init_weights value in no_init_weights For potential nested use. In addition, users might modify private no_init_weights as well. * Apply suggestions from code review Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Remove private variable change check Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Clean up comment * 📝 update documentation build section (#18548) * `bitsandbytes` - `Linear8bitLt` integration into `transformers` models (#17901) * first commit * correct replace function * add final changes - works like charm! - cannot implement tests yet - tested * clean up a bit * add bitsandbytes dependencies * working version - added import function - added bitsandbytes utils file * small fix * small fix - fix import issue * fix import issues * Apply suggestions from code review Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * refactor a bit - move bitsandbytes utils to utils - change comments on functions * reformat docstring - reformat docstring on init_empty_weights_8bit * Update src/transformers/__init__.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * revert bad formatting * change to bitsandbytes * refactor a bit - remove init8bit since it is useless * more refactoring - fixed init empty weights issue - added threshold param * small hack to make it work * Update src/transformers/modeling_utils.py * Update src/transformers/modeling_utils.py * revmoe the small hack * modify utils file * make style + refactor a bit * create correctly device map * add correct dtype for device map creation * Apply suggestions from code review Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * apply suggestions - remove with torch.grad - do not rely on Python bool magic! * add docstring - add docstring for new kwargs * add docstring - comment `replace_8bit_linear` function - fix weird formatting * - added more documentation - added new utility function for memory footprint tracking - colab demo to add * few modifs - typo doc - force cast into float16 when load_in_8bit is enabled * added colab link * add test architecture + docstring a bit * refactor a bit testing class * make style + refactor a bit * enhance checks - add more checks - start writing saving test * clean up a bit * male style * add more details on doc * add more tests - still needs to fix 2 tests * replace by "or" - could not fix it from GitHub GUI Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * refactor a bit testing code + add readme * make style * fix import issue * Update src/transformers/modeling_utils.py Co-authored-by: Michael Benayoun * add few comments * add more doctring + make style * more docstring * raise error when loaded in 8bit * make style * add warning if loaded on CPU * add small sanity check * fix small comment * add bitsandbytes on dockerfile * Improve documentation - improve documentation from comments * add few comments * slow tests pass on the VM but not on the CI VM * Fix merge conflict * make style * another test should pass on a multi gpu setup * fix bad import in testing file * Fix slow tests - remove dummy batches - no more CUDA illegal memory errors * odify dockerfile * Update docs/source/en/main_classes/model.mdx * Update Dockerfile * Update model.mdx * Update Dockerfile * Apply suggestions from code review * few modifications - lm head can stay on disk/cpu - change model name so that test pass * change test value - change test value to the correct output - torch bmm changed to baddmm in bloom modeling when merging * modify installation guidelines * Apply suggestions from code review Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Apply suggestions from code review Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Apply suggestions from code review Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * replace `n`by `name` * merge `load_in_8bit` and `low_cpu_mem_usage` * first try - keep the lm head in full precision * better check - check the attribute `base_model_prefix` instead of computing the number of parameters * added more tests * Update src/transformers/utils/bitsandbytes.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Merge branch 'integration-8bit' of https://github.com/younesbelkada/transformers into integration-8bit * improve documentation - fix typos for installation - change title in the documentation Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Co-authored-by: Michael Benayoun * TF: XLA-trainable DeBERTa v2 (#18546) * fix deberta issues * add different code paths for gpu and tpu * shorter gpu take along axis * Stable Dropout without tf cond * variable must be float * Preserve hub-related kwargs in AutoModel.from_pretrained (#18545) * Preserve hub-related kwargs in AutoModel.from_pretrained * Fix tests * Remove debug statement * TF Examples Rewrite (#18451) * Finished QA example * Dodge a merge conflict * Update text classification and LM examples * Update NER example * New Keras metrics WIP, fix NER example * Update NER example * Update MC, summarization and translation examples * Add XLA warnings when shapes are variable * Make sure batch_size is consistently scaled by num_replicas * Add PushToHubCallback to all models * Add docs links for KerasMetricCallback * Add docs links for prepare_tf_dataset and jit_compile * Correct inferred model names * Don't assume the dataset has 'lang' * Don't assume the dataset has 'lang' * Write metrics in text classification * Add 'framework' to TrainingArguments and TFTrainingArguments * Export metrics in all examples and add tests * Fix training args for Flax * Update command line args for translation test * make fixup * Fix accidentally running other tests in fp16 * Remove do_train/do_eval from run_clm.py * Remove do_train/do_eval from run_mlm.py * Add tensorflow tests to circleci * Fix circleci * Update examples/tensorflow/language-modeling/run_mlm.py Co-authored-by: Joao Gante * Update examples/tensorflow/test_tensorflow_examples.py Co-authored-by: Joao Gante * Update examples/tensorflow/translation/run_translation.py Co-authored-by: Joao Gante * Update examples/tensorflow/token-classification/run_ner.py Co-authored-by: Joao Gante * Fix save path for tests * Fix some model card kwargs * Explain the magical -1000 * Actually enable tests this time * Skip text classification PR until we fix shape inference * make fixup Co-authored-by: Joao Gante * Use commit hash to look in cache instead of calling head (#18534) * Use commit hash to look in cache instead of calling head * Add tests * Add attr for local configs too * Stupid typos * Fix tests * Update src/transformers/utils/hub.py Co-authored-by: Julien Chaumond * Address Julien's comments Co-authored-by: Julien Chaumond * `pipeline` support for `device="mps"` (or any other string) (#18494) * `pipeline` support for `device="mps"` (or any other string) * Simplify `if` nesting * Update src/transformers/pipelines/base.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Fix? @sgugger * passing `attr=None` is not the same as not passing `attr` 🤯 Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update philosophy to include other preprocessing classes (#18550) * 📝 update philosophy to include other preprocessing classes * 🖍 apply feedbacks * Properly move cache when it is not in default path (#18563) * Adds CLIP to models exportable with ONNX (#18515) * onnx config for clip * default opset as 14 * changes from the original repo * input values order fix * outputs fix * remove unused import * ran make fix-copies * black format * review comments: forward ref, import fix, model change revert, .to cleanup * make style * formatting fixes * revert groupvit * comment for cast to int32 * comment fix * make .T as .t() for onnx conversion * ran make fix-copies * remove unneeded comment Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * fix copies * remove comment Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * raise atol for MT5OnnxConfig (#18560) Co-authored-by: ydshieh * fix string (#18568) * Segformer TF: fix output size in documentation (#18572) * Segformer TF: fix output size in doc * Segformer pytorch: fix output size in doc Co-authored-by: Maxime Gardoni * Fix resizing bug in OWL-ViT (#18573) * Fixes resizing bug in OWL-ViT * Defaults to square resize if size is set to an int * Sets do_center_crop default value to False * Fix LayoutLMv3 documentation (#17932) * fix typos * fix sequence_length docs of LayoutLMv3Model * delete trailing white spaces * fix layoutlmv3 docs more * apply make fixup & quality * change to two versions of input docstring * apply make fixup & quality * Skip broken tests * Change BartLearnedPositionalEmbedding's forward method signature to support Opacus training (#18486) * changing BartLearnedPositionalEmbedding forward signature and references to it * removing debugging dead code (thanks style checker) * blackened modeling_bart file * removing copy inconsistencies via make fix-copies * changing references to copied signatures in Bart variants * make fix-copies once more * using expand over repeat (thanks @michaelbenayoun) * expand instead of repeat for all model copies Co-authored-by: Daniel Jones * german docs translation (#18544) * Create _config.py * Create _toctree.yml * Create index.mdx not sure about "du / ihr" oder "sie" * Create quicktour.mdx * Update _toctree.yml * Update build_documentation.yml * Update build_pr_documentation.yml * fix build * Update index.mdx * Update quicktour.mdx * Create installation.mdx * Update _toctree.yml * Deberta V2: Fix critical trace warnings to allow ONNX export (#18272) * Fix critical trace warnings to allow ONNX export * Force input to `sqrt` to be float type * Cleanup code * Remove unused import statement * Update model sew * Small refactor Co-authored-by: Michael Benayoun * Use broadcasting instead of repeat * Implement suggestion Co-authored-by: Michael Benayoun * Match deberta v2 changes in sew_d * Improve code quality * Update code quality * Consistency of small refactor * Match changes in sew_d Co-authored-by: Michael Benayoun * [FX] _generate_dummy_input supports audio-classification models for labels (#18580) * Support audio classification architectures for labels generation, as well as provides a flag to print warnings or not * Use ENV_VARS_TRUE_VALUES * Fix docstrings with last version of hf-doc-builder styler (#18581) * Fix docstrings with last version of hf-doc-builder styler * Remove empty Parameter block * Bump nbconvert from 6.0.1 to 6.3.0 in /examples/research_projects/lxmert (#18565) Bumps [nbconvert](https://github.com/jupyter/nbconvert) from 6.0.1 to 6.3.0. - [Release notes](https://github.com/jupyter/nbconvert/releases) - [Commits](https://github.com/jupyter/nbconvert/compare/6.0.1...6.3.0) --- updated-dependencies: - dependency-name: nbconvert dependency-type: direct:production ... Signed-off-by: dependabot[bot] Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Bump nbconvert in /examples/research_projects/visual_bert (#18566) Bumps [nbconvert](https://github.com/jupyter/nbconvert) from 6.0.1 to 6.3.0. - [Release notes](https://github.com/jupyter/nbconvert/releases) - [Commits](https://github.com/jupyter/nbconvert/compare/6.0.1...6.3.0) --- updated-dependencies: - dependency-name: nbconvert dependency-type: direct:production ... Signed-off-by: dependabot[bot] Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * fix owlvit tests, update docstring examples (#18586) * Return the permuted hidden states if return_dict=True (#18578) * Load sharded pt to flax (#18419) * initial commit * add small test * add cross pt tf flag to test * fix quality * style * update test with new repo * fix failing test * update * fix wrong param ordering * style * update based on review * update related to recent new caching mechanism * quality * Update based on review Co-authored-by: sgugger * quality and style * Update src/transformers/modeling_flax_utils.py Co-authored-by: sgugger Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Add type hints for ViLT models (#18577) * Add type hints for Vilt models * Add missing return type for TokenClassification class * update doc for perf_train_cpu_many, add intel mpi introduction (#18576) * update doc for perf_train_cpu_many, add mpi introduction Signed-off-by: Wang, Yi A * Update docs/source/en/perf_train_cpu_many.mdx Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update docs/source/en/perf_train_cpu_many.mdx Signed-off-by: Wang, Yi A Signed-off-by: Wang, Yi A Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * typos (#18594) * FSDP bug fix for `load_state_dict` (#18596) * Add `TFAutoModelForSemanticSegmentation` to the main `__init__.py` (#18600) Co-authored-by: ydshieh * Generate: validate `model_kwargs` (and catch typos in generate arguments) (#18261) * validate generate model_kwargs * generate tests -- not all models have an attn mask * Supporting seq2seq models for `bitsandbytes` integration (#18579) * Supporting seq2seq models for `bitsandbytes` integration - `bitsandbytes` integration supports now seq2seq models - check if a model has tied weights as an additional check * small modification - tie the weights before looking at tied weights! * Add Donut (#18488) * First draft * Improve script * Update script * Make conversion work * Add final_layer_norm attribute to Swin's config * Add DonutProcessor * Convert more models * Improve feature extractor and convert base models * Fix bug * Improve integration tests * Improve integration tests and add model to README * Add doc test * Add feature extractor to docs * Fix integration tests * Remove register_buffer * Fix toctree and add missing attribute * Add DonutSwin * Make conversion script work * Improve conversion script * Address comment * Fix bug * Fix another bug * Remove deprecated method from docs * Make Swin and Swinv2 untouched * Fix code examples * Fix processor * Update model_type to donut-swin * Add feature extractor tests, add token2json method, improve feature extractor * Fix failing tests, remove integration test * Add do_thumbnail for consistency * Improve code examples * Add code example for document parsing * Add DonutSwin to MODEL_NAMES_MAPPING * Add model to appropriate place in toctree * Update namespace to appropriate organization Co-authored-by: Niels Rogge * Fix URLs (#18604) Co-authored-by: Niels Rogge * Update BLOOM parameter counts (#18531) * Update BLOOM parameter counts * Update BLOOM parameter counts * [doc] fix anchors (#18591) the manual anchors end up being duplicated with automatically added anchors and no longer work. * [fsmt] deal with -100 indices in decoder ids (#18592) * [fsmt] deal with -100 indices in decoder ids Fixes: https://github.com/huggingface/transformers/issues/17945 decoder ids get the default index -100, which breaks the model - like t5 and many other models add a fix to replace -100 with the correct pad index. For some reason this use case hasn't been used with this model until recently - so this issue was there since the beginning it seems. Any suggestions to how to add a simple test here? or perhaps we have something similar already? user's script is quite massive. * style * small change (#18584) * Flax Remat for LongT5 (#17994) * [Flax] Add remat (gradient checkpointing) * fix variable naming in test * flip: checkpoint using a method * fix naming * fix class naming * apply PVP's suggestions from code review * add gradient_checkpointing to examples * Add gradient_checkpointing to run_mlm_flax * Add remat to longt5 * Add gradient checkpointing test longt5 * Fix args errors * Fix remaining tests * Make fixup & quality fixes * replace kwargs * remove unecessary kwargs * Make fixup changes * revert long_t5_flax changes * Remove return_dict and copy to LongT5 * Remove test_gradient_checkpointing Co-authored-by: sanchit-gandhi * mac m1 `mps` integration (#18598) * mac m1 `mps` integration * Update docs/source/en/main_classes/trainer.mdx Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * addressing comments * Apply suggestions from code review Co-authored-by: Dan Saattrup Nielsen <47701536+saattrupdan@users.noreply.github.com> * resolve comment Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Co-authored-by: Dan Saattrup Nielsen <47701536+saattrupdan@users.noreply.github.com> * Change scheduled CIs to use torch 1.12.1 (#18644) Co-authored-by: ydshieh * Add checks for some workflow jobs (#18583) Co-authored-by: ydshieh * TF: Fix generation repetition penalty with XLA (#18648) * Update longt5.mdx (#18634) * Update run_translation_no_trainer.py (#18637) * Update run_translation_no_trainer.py found an error in selecting `no_decay` parameters and some small modifications when the user continues to train from a checkpoint * fixs `no_decay` and `resume_step` issue 1. change `no_decay` list 2. if use continue to train their model from provided checkpoint, the `resume_step` will not be initialized properly if `args.gradient_accumulation_steps != 1` * [bnb] Minor modifications (#18631) * bnb minor modifications - refactor documentation - add troubleshooting README - add PyPi library on DockerFile * Apply suggestions from code review Co-authored-by: Stas Bekman * Apply suggestions from code review * Apply suggestions from code review * Apply suggestions from code review * put in one block - put bash instructions in one block * update readme - refactor a bit hardware requirements * change text a bit * Apply suggestions from code review Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com> * apply suggestions Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com> * add link to paper * Apply suggestions from code review Co-authored-by: Stas Bekman * Update tests/mixed_int8/README.md * Apply suggestions from code review * refactor a bit * add instructions Turing & Amperer Co-authored-by: Stas Bekman * add A6000 * clarify a bit * remove small part * Update tests/mixed_int8/README.md Co-authored-by: Stas Bekman Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com> * Examples: add Bloom support for token classification (#18632) * examples: add Bloom support for token classification (FLAX, PyTorch and TensorFlow) * examples: remove support for Bloom in token classication (FLAX and TensorFlow currently have no support for it) * Fix Yolos ONNX export test (#18606) Co-authored-by: lewtun Co-authored-by: ydshieh * Fixup * Fix up * Move PIL default arguments inside function for safe imports * Add image utils to toctree * Update `rescale` method to reflect changes in #18677 * Update docs/source/en/internal/image_processing_utils.mdx Co-authored-by: NielsRogge <48327001+NielsRogge@users.noreply.github.com> * Address Niels PR comments * Apply suggestions from code review - remove defaults to None Co-authored-by: Sylvain Gugger Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Fix docstrings and revert to PIL.Image.XXX resampling Use PIL.Image.XXX resampling values instead of PIL.Image.Resampling.XXX enum as it's only in the recent version >= 9.10 and version is not yet pinned and older version support deprecated * Some more docstrings and PIL.Image tidy up * Reorganise arguments so flags by modifiers * Few last docstring fixes Signed-off-by: Seunghwan Hong Signed-off-by: dependabot[bot] Signed-off-by: Wang, Yi A Co-authored-by: Amy Roberts Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Co-authored-by: Alara Dirik <8944735+alaradirik@users.noreply.github.com> Co-authored-by: NielsRogge <48327001+NielsRogge@users.noreply.github.com> Co-authored-by: Seunghwan Hong Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com> Co-authored-by: ydshieh Co-authored-by: Sylvain Gugger Co-authored-by: Julien Chaumond Co-authored-by: regisss <15324346+regisss@users.noreply.github.com> Co-authored-by: Nicolas Patry Co-authored-by: Sourab Mangrulkar <13534540+pacman100@users.noreply.github.com> Co-authored-by: Ankur Goyal Co-authored-by: Ankur Goyal Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com> Co-authored-by: Niels Rogge Co-authored-by: Mishig Davaadorj Co-authored-by: Rasmus Arpe Fogh Jensen Co-authored-by: Ian Castillo <7807897+donelianc@users.noreply.github.com> Co-authored-by: AguilaCudicio Co-authored-by: Omar U. Espejel Co-authored-by: Younes Belkada <49240599+younesbelkada@users.noreply.github.com> Co-authored-by: Thomas Wang <24695242+thomasw21@users.noreply.github.com> Co-authored-by: Niklas Hansson Co-authored-by: Thomas Chaigneau Co-authored-by: YouJiacheng <1503679330@qq.com> Co-authored-by: Michael Benayoun Co-authored-by: Joao Gante Co-authored-by: Matt Co-authored-by: Dhruv Karan Co-authored-by: Michael Wyatt Co-authored-by: Maxime G Co-authored-by: Maxime Gardoni Co-authored-by: Wonseok Lee (Jack) Co-authored-by: Dan Jones Co-authored-by: Daniel Jones Co-authored-by: flozi00 Co-authored-by: iiLaurens Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com> Co-authored-by: Wang, Yi Co-authored-by: Stas Bekman Co-authored-by: Niklas Muennighoff Co-authored-by: Karim Foda <35491698+KMFODA@users.noreply.github.com> Co-authored-by: sanchit-gandhi Co-authored-by: Dan Saattrup Nielsen <47701536+saattrupdan@users.noreply.github.com> Co-authored-by: zhoutang776 <47708118+zhoutang776@users.noreply.github.com> Co-authored-by: Stefan Schweter Co-authored-by: lewtun --- src/transformers/image_processing_utils.py | 384 +----------------- src/transformers/image_transforms.py | 125 +----- src/transformers/image_utils.py | 35 +- .../models/glpn/image_processing_glpn.py | 2 +- tests/test_image_transforms.py | 52 --- tests/utils/test_image_utils.py | 25 +- 6 files changed, 18 insertions(+), 605 deletions(-) diff --git a/src/transformers/image_processing_utils.py b/src/transformers/image_processing_utils.py index 1c099cf14e2d44..ba9d3c0962e3f6 100644 --- a/src/transformers/image_processing_utils.py +++ b/src/transformers/image_processing_utils.py @@ -38,387 +38,9 @@ class BatchFeature(BaseBatchFeature): """ -class ImageProcessorMixin(PushToHubMixin): - """ - Image processor mixin used to provide saving/loading functionality - """ - - _auto_class = None - - def __init__(self, **kwargs): - """Set elements of `kwargs` as attributes.""" - # Pop "processor_class" as it should be saved as private attribute - self._processor_class = kwargs.pop("processor_class", None) - # Additional attributes without default values - for key, value in kwargs.items(): - try: - setattr(self, key, value) - except AttributeError as err: - logger.error(f"Can't set {key} with value {value} for {self}") - raise err - - def _set_processor_class(self, processor_class: str): - """Sets processor class as an attribute.""" - self._processor_class = processor_class - - @classmethod - def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs): - r""" - Instantiate a type of [`~image_processing_utils.ImageProcessorMixin`] from a image processor, *e.g.* a derived - class of [`BaseImageProcessor`]. - - Args: - pretrained_model_name_or_path (`str` or `os.PathLike`): - This can be either: - - - a string, the *model id* of a pretrained image_processor hosted inside a model repo on - huggingface.co. Valid model ids can be located at the root-level, like `bert-base-uncased`, or - namespaced under a user or organization name, like `dbmdz/bert-base-german-cased`. - - a path to a *directory* containing a image processor file saved using the - [`~feature_extraction_utils.FeatureExtractionMixin.save_pretrained`] method, e.g., - `./my_model_directory/`. - - a path or url to a saved image processor JSON *file*, e.g., - `./my_model_directory/preprocessor_config.json`. - cache_dir (`str` or `os.PathLike`, *optional*): - Path to a directory in which a downloaded pretrained model image processor should be cached if the - standard cache should not be used. - force_download (`bool`, *optional*, defaults to `False`): - Whether or not to force to (re-)download the image processor files and override the cached versions if - they exist. - resume_download (`bool`, *optional*, defaults to `False`): - Whether or not to delete incompletely received file. Attempts to resume the download if such a file - exists. - proxies (`Dict[str, str]`, *optional*): - A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128', - 'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request. - use_auth_token (`str` or *bool*, *optional*): - The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated - when running `transformers-cli login` (stored in `~/.huggingface`). - revision (`str`, *optional*, defaults to `"main"`): - The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a - git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any - identifier allowed by git. - return_unused_kwargs (`bool`, *optional*, defaults to `False`): - If `False`, then this function returns just the final image processor object. If `True`, then this - functions returns a `Tuple(image_processor, unused_kwargs)` where *unused_kwargs* is a dictionary - consisting of the key/value pairs whose keys are not image processor attributes: i.e., the part of - `kwargs` which has not been used to update `image_processor` and is otherwise ignored. - kwargs (`Dict[str, Any]`, *optional*): - The values in kwargs of any keys which are image processor attributes will be used to override the - loaded values. Behavior concerning key/value pairs whose keys are *not* image processor attributes is - controlled by the `return_unused_kwargs` keyword parameter. - - - - Passing `use_auth_token=True` is required when you want to use a private model. - - - - Returns: - An image processor of type [`~image_processing_utils.ImageProcessorMixin`]. - - Examples: FIXME - - """ - image_processor_dict, kwargs = cls.get_image_processor_dict(pretrained_model_name_or_path, **kwargs) - - return cls.from_dict(image_processor_dict, **kwargs) - - def save_pretrained(self, save_directory: Union[str, os.PathLike], push_to_hub: bool = False, **kwargs): - """ - Save an image_processor object to the directory `save_directory`, so that it can be re-loaded using the - [`~feature_extraction_utils.FeatureExtractionMixin.from_pretrained`] class method. - - Args: - save_directory (`str` or `os.PathLike`): - Directory where the image processor JSON file will be saved (will be created if it does not exist). - push_to_hub (`bool`, *optional*, defaults to `False`): - Whether or not to push your image processor to the Hugging Face model hub after saving it. - - - - Using `push_to_hub=True` will synchronize the repository you are pushing to with `save_directory`, - which requires `save_directory` to be a local clone of the repo you are pushing to if it's an existing - folder. Pass along `temp_dir=True` to use a temporary directory instead. - - - - kwargs: - Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method. - """ - if os.path.isfile(save_directory): - raise AssertionError(f"Provided path ({save_directory}) should be a directory, not a file") - - if push_to_hub: - commit_message = kwargs.pop("commit_message", None) - repo = self._create_or_get_repo(save_directory, **kwargs) - - # If we have a custom config, we copy the file defining it in the folder and set the attributes so it can be - # loaded from the Hub. - if self._auto_class is not None: - custom_object_save(self, save_directory, config=self) - - os.makedirs(save_directory, exist_ok=True) - # If we save using the predefined names, we can load using `from_pretrained` - output_image_processor_file = os.path.join(save_directory, IMAGE_PROCESSOR_NAME) - - self.to_json_file(output_image_processor_file) - logger.info(f"Image processor saved in {output_image_processor_file}") - - if push_to_hub: - url = self._push_to_hub(repo, commit_message=commit_message) - logger.info(f"Image processor pushed to the hub in this commit: {url}") - - return [output_image_processor_file] - - @classmethod - def get_image_processor_dict( - cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs - ) -> Tuple[Dict[str, Any], Dict[str, Any]]: - """ - From a `pretrained_model_name_or_path`, resolve to a dictionary of parameters, to be used for instantiating a - image processor of type [`~feature_extraction_utils.FeatureExtractionMixin`] using `from_dict`. - - Parameters: - pretrained_model_name_or_path (`str` or `os.PathLike`): - The identifier of the pre-trained checkpoint from which we want the dictionary of parameters. - - Returns: - `Tuple[Dict, Dict]`: The dictionary(ies) that will be used to instantiate the image processor object. - """ - cache_dir = kwargs.pop("cache_dir", None) - force_download = kwargs.pop("force_download", False) - resume_download = kwargs.pop("resume_download", False) - proxies = kwargs.pop("proxies", None) - use_auth_token = kwargs.pop("use_auth_token", None) - local_files_only = kwargs.pop("local_files_only", False) - revision = kwargs.pop("revision", None) - - from_pipeline = kwargs.pop("_from_pipeline", None) - from_auto_class = kwargs.pop("_from_auto", False) - - user_agent = {"file_type": "image processor", "from_auto_class": from_auto_class} - if from_pipeline is not None: - user_agent["using_pipeline"] = from_pipeline - - if is_offline_mode() and not local_files_only: - logger.info("Offline mode: forcing local_files_only=True") - local_files_only = True - - pretrained_model_name_or_path = str(pretrained_model_name_or_path) - if os.path.isdir(pretrained_model_name_or_path): - image_processor_file = os.path.join(pretrained_model_name_or_path, IMAGE_PROCESSOR_NAME) - elif os.path.isfile(pretrained_model_name_or_path) or is_remote_url(pretrained_model_name_or_path): - image_processor_file = pretrained_model_name_or_path - else: - image_processor_file = hf_bucket_url( - pretrained_model_name_or_path, filename=IMAGE_PROCESSOR_NAME, revision=revision, mirror=None - ) - - try: - # Load from URL or cache if already cached - resolved_image_processor_file = cached_path( - image_processor_file, - cache_dir=cache_dir, - force_download=force_download, - proxies=proxies, - resume_download=resume_download, - local_files_only=local_files_only, - use_auth_token=use_auth_token, - user_agent=user_agent, - ) - - except RepositoryNotFoundError: - raise EnvironmentError( - f"{pretrained_model_name_or_path} is not a local folder and is not a valid model identifier listed on " - "'https://huggingface.co/models'\nIf this is a private repository, make sure to pass a token having " - "permission to this repo with `use_auth_token` or log in with `huggingface-cli login` and pass " - "`use_auth_token=True`." - ) - except RevisionNotFoundError: - raise EnvironmentError( - f"{revision} is not a valid git identifier (branch name, tag name or commit id) that exists for this " - f"model name. Check the model page at 'https://huggingface.co/{pretrained_model_name_or_path}' for " - "available revisions." - ) - except EntryNotFoundError: - raise EnvironmentError( - f"{pretrained_model_name_or_path} does not appear to have a file named {IMAGE_PROCESSOR_NAME}." - ) - except HTTPError as err: - raise EnvironmentError( - f"There was a specific connection error when trying to load {pretrained_model_name_or_path}:\n{err}" - ) - except ValueError: - raise EnvironmentError( - f"We couldn't connect to '{HUGGINGFACE_CO_RESOLVE_ENDPOINT}' to load this model, couldn't find it in" - f" the cached files and it looks like {pretrained_model_name_or_path} is not the path to a directory" - f" containing a {IMAGE_PROCESSOR_NAME} file.\nCheckout your internet connection or see how to run" - " the library in offline mode at" - " 'https://huggingface.co/docs/transformers/installation#offline-mode'." - ) - except EnvironmentError: - raise EnvironmentError( - f"Can't load image processor for '{pretrained_model_name_or_path}'. If you were trying to load it " - "from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. " - f"Otherwise, make sure '{pretrained_model_name_or_path}' is the correct path to a directory " - f"containing a {IMAGE_PROCESSOR_NAME} file" - ) - - try: - # Load image_processor dict - with open(resolved_image_processor_file, "r", encoding="utf-8") as reader: - text = reader.read() - image_processor_dict = json.loads(text) - - except json.JSONDecodeError: - raise EnvironmentError( - f"It looks like the config file at '{resolved_image_processor_file}' is not a valid JSON file." - ) - - if resolved_image_processor_file == image_processor_file: - logger.info(f"loading image processor configuration file {image_processor_file}") - else: - logger.info( - f"loading image processor configuration file {image_processor_file} from cache at" - f" {resolved_image_processor_file}" - ) - - return image_processor_dict, kwargs - - @classmethod - def from_dict(cls, image_processor_dict: Dict[str, Any], **kwargs): - """ - Instantiates a type of [`~image_processing_utils.ImageProcessorMixin`] from a Python dictionary of parameters. - - Args: - image_processor_dict (`Dict[str, Any]`): - Dictionary that will be used to instantiate the image processor object. Such a dictionary can be - retrieved from a pretrained checkpoint by leveraging the - [`~feature_extraction_utils.FeatureExtractionMixin.to_dict`] method. - kwargs (`Dict[str, Any]`): - Additional parameters from which to initialize the image processor object. - - Returns: - [`~feature_extraction_utils.FeatureExtractionMixin`]: The image processor object instantiated from those - parameters. - """ - return_unused_kwargs = kwargs.pop("return_unused_kwargs", False) - - image_processor = cls(**image_processor_dict) - - # Update image_processor with kwargs if needed - to_remove = [] - for key, value in kwargs.items(): - if hasattr(image_processor, key): - setattr(image_processor, key, value) - to_remove.append(key) - for key in to_remove: - kwargs.pop(key, None) - - logger.info(f"image processor {image_processor}") - if return_unused_kwargs: - return image_processor, kwargs - else: - return image_processor - - def to_dict(self) -> Dict[str, Any]: - """ - Serializes this instance to a Python dictionary. - - Returns: - `Dict[str, Any]`: Dictionary of all the attributes that make up this image processor instance. - """ - output = copy.deepcopy(self.__dict__) - output["image_processor_type"] = self.__class__.__name__ - - return output - - @classmethod - def from_json_file(cls, json_file: Union[str, os.PathLike]): - """ - Instantiates an image processor of type [`~image_processing_utils.ImageProcessorMixin`] from the path to a JSON - file of parameters. - - Args: - json_file (`str` or `os.PathLike`): - Path to the JSON file containing the parameters. - - Returns: - A image processor of type [`~feature_extraction_utils.FeatureExtractionMixin`]: The image_processor object - instantiated from that JSON file. - """ - with open(json_file, "r", encoding="utf-8") as reader: - text = reader.read() - image_processor_dict = json.loads(text) - return cls(**image_processor_dict) - - def to_json_string(self) -> str: - """ - Serializes this instance to a JSON string. - - Returns: - `str`: String containing all the attributes that make up this image_processor instance in JSON format. - """ - dictionary = self.to_dict() - - for key, value in dictionary.items(): - if isinstance(value, np.ndarray): - dictionary[key] = value.tolist() - - # make sure private name "_processor_class" is correctly - # saved as "processor_class" - _processor_class = dictionary.pop("_processor_class", None) - if _processor_class is not None: - dictionary["processor_class"] = _processor_class - - return json.dumps(dictionary, indent=2, sort_keys=True) + "\n" - - def to_json_file(self, json_file_path: Union[str, os.PathLike]): - """ - Save this instance to a JSON file. - - Args: - json_file_path (`str` or `os.PathLike`): - Path to the JSON file in which this image_processor instance's parameters will be saved. - """ - with open(json_file_path, "w", encoding="utf-8") as writer: - writer.write(self.to_json_string()) - - def __repr__(self): - return f"{self.__class__.__name__} {self.to_json_string()}" - - @classmethod - def register_for_auto_class(cls, auto_class="AutoImageProcessor"): - """ - Register this class with a given auto class. This should only be used for custom image processors as the ones - in the library are already mapped with `AutoImageProcessor`. - - - - This API is experimental and may have some slight breaking changes in the next releases. - - - - Args: - auto_class (`str` or `type`, *optional*, defaults to `"AutoFeatureExtractor"`): - The auto class to register this new image processor with. - """ - if not isinstance(auto_class, str): - auto_class = auto_class.__name__ - - import transformers.models.auto as auto_module - - if not hasattr(auto_module, auto_class): - raise ValueError(f"{auto_class} is not a valid auto class.") - - cls._auto_class = auto_class - - -ImageProcessorMixin.push_to_hub = copy_func(ImageProcessorMixin.push_to_hub) -ImageProcessorMixin.push_to_hub.__doc__ = ImageProcessorMixin.push_to_hub.__doc__.format( - object="image processor", object_class="AutoImageProcessor", object_files="image processor file" -) +# We use aliasing whilst we phase out the old API. Once feature extractors for vision models +# are deprecated, ImageProcessor mixin will be implemented. Any shared logic will be abstracted out. +ImageProcessorMixin = FeatureExtractionMixin class BaseImageProcessor(ImageProcessorMixin): diff --git a/src/transformers/image_transforms.py b/src/transformers/image_transforms.py index adbbbd7f14e555..024b46911a750a 100644 --- a/src/transformers/image_transforms.py +++ b/src/transformers/image_transforms.py @@ -13,10 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import TYPE_CHECKING, Iterable, List, Optional, Tuple, Union +from typing import TYPE_CHECKING, List, Optional, Tuple, Union import numpy as np -import PIL from transformers.utils.import_utils import is_flax_available, is_tf_available, is_torch_available, is_vision_available @@ -26,7 +25,6 @@ from .image_utils import ( ChannelDimension, - get_channel_dimension_axis, get_image_size, infer_channel_dimension_format, is_jax_tensor, @@ -217,6 +215,7 @@ def resize( size: Tuple[int, int], resample=PIL.Image.BILINEAR, data_format: Optional[ChannelDimension] = None, + return_numpy: bool = True, ) -> np.ndarray: """ Resizes `image` to (h, w) specified by `size` using the PIL library. @@ -258,123 +257,3 @@ def resize( resized_image = np.array(resized_image) resized_image = to_channel_dimension_format(resized_image, data_format) return resized_image - - -def normalize( - image, - mean: Union[float, Iterable[float]], - std: Union[float, Iterable[float]], - data_format: Optional[ChannelDimension] = None, -) -> np.ndarray: - """ - Normalizes `image` using the mean and standard deviation specified by `mean` and `std`. - - image = (image - mean) / std - - Args: - image (`np.ndarray`): - The image to normalize. - mean (`float` or `Iterable[float]`): - The mean to use for normalization. - std (`float` or `Iterable[float]`): - The standard deviation to use for normalization. - data_format (`ChannelDimension`, *optional*, defaults to `None`): - The channel dimension format of the output image. If `None`, will use the inferred format from the input. - """ - if not isinstance(image, np.ndarray): - raise ValueError(f"Input image must be of type np.ndarray, got {type(image)}") - - input_data_format = infer_channel_dimension_format(image) - channel_axis = get_channel_dimension_axis(image) - num_channels = image.shape[channel_axis] - - if isinstance(mean, Iterable): - if len(mean) != num_channels: - raise ValueError(f"mean must have {num_channels} elements if it is an iterable, got {len(mean)}") - else: - mean = [mean] * num_channels - - if isinstance(std, Iterable): - if len(std) != num_channels: - raise ValueError(f"std must have {num_channels} elements if it is an iterable, got {len(std)}") - else: - std = [std] * num_channels - - if input_data_format == ChannelDimension.LAST: - image = (image - mean) / std - else: - image = ((image.T - mean) / std).T - - image = to_channel_dimension_format(image, data_format) if data_format is not None else image - return image - - -def center_crop( - image: np.ndarray, - size: Tuple[int, int], - data_format: Optional[Union[str, ChannelDimension]] = None, -) -> np.ndarray: - """ - Crops the `image` to the specified `size` using a center crop. Note that if the image is too small to be cropped - to the size given, it will be padded (so the returned result will always be of size `size`). - - Args: - image (`np.ndarray`): - The image to crop. - size (`Tuple[int, int]`): - The target size for the cropped image. - data_format (`str` or `ChannelDimension`, *optional*, defaults to `None`): - The channel dimension format for the output image. Can be one of: - - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format. - - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format. - - Returns: - `np.ndarray`: The cropped image. - """ - if not isinstance(image, np.ndarray): - raise ValueError(f"Input image must be of type np.ndarray, got {type(image)}") - - if not isinstance(size, Iterable) or len(size) != 2: - raise ValueError("size must have 2 elements representing the height and width of the output image") - - input_data_format = infer_channel_dimension_format(image) - output_data_format = data_format if data_format is not None else input_data_format - - # We perform the crop in (C, H, W) format and then convert to the output format - image = to_channel_dimension_format(image, ChannelDimension.FIRST) - - orig_height, orig_width = get_image_size(image) - crop_height, crop_width = size - - top = (orig_height - crop_height) // 2 - bottom = top + crop_height # In case size is odd, (image_shape[0] + size[0]) // 2 won't give the proper result. - left = (orig_width - crop_width) // 2 - right = left + crop_width # In case size is odd, (image_shape[1] + size[1]) // 2 won't give the proper result. - - # Check if cropped area is within image boundaries - if top >= 0 and bottom <= orig_height and left >= 0 and right <= orig_width: - image = image[..., top:bottom, left:right] - image = to_channel_dimension_format(image, output_data_format) - return image - - # Otherwise, we may need to pad if the image is too small. Oh joy... - new_height = max(crop_height, orig_height) - new_width = max(crop_width, orig_width) - new_shape = image.shape[:-2] + (new_height, new_width) - new_image = np.zeros_like(image, shape=new_shape) - - # If the image is too small, pad it with zeros - top_pad = (new_height - orig_height) // 2 - bottom_pad = top_pad + orig_height - left_pad = (new_width - orig_width) // 2 - right_pad = left_pad + orig_width - new_image[..., top_pad:bottom_pad, left_pad:right_pad] = image - - top += top_pad - bottom += top_pad - left += left_pad - right += left_pad - - new_image = new_image[..., max(0, top) : min(new_height, bottom), max(0, left) : min(new_width, right)] - new_image = to_channel_dimension_format(new_image, output_data_format) - return new_image diff --git a/src/transformers/image_utils.py b/src/transformers/image_utils.py index b0c128d86aeaae..0ba86d14b7975d 100644 --- a/src/transformers/image_utils.py +++ b/src/transformers/image_utils.py @@ -20,8 +20,14 @@ import requests -from .utils import is_flax_available, is_tf_available, is_torch_available -from .utils.generic import _is_jax, _is_tensorflow, _is_torch, to_numpy +from .utils import is_flax_available, is_tf_available, is_torch_available, is_vision_available +from .utils.constants import ( # noqa: F401 + IMAGENET_DEFAULT_MEAN, + IMAGENET_DEFAULT_STD, + IMAGENET_STANDARD_MEAN, + IMAGENET_STANDARD_STD, +) +from .utils.generic import ExplicitEnum, _is_jax, _is_tensorflow, _is_torch, to_numpy if is_vision_available(): @@ -39,9 +45,9 @@ ] # noqa -class ChannelDimension(enum.Enum): - FIRST = 1 - LAST = 3 +class ChannelDimension(ExplicitEnum): + FIRST = "channels_first" + LAST = "channels_last" def is_torch_tensor(obj): @@ -106,25 +112,6 @@ def infer_channel_dimension_format(image: np.ndarray) -> ChannelDimension: raise ValueError("Unable to infer channel dimension format") -def get_channel_dimension_axis(image: np.ndarray) -> int: - """ - Returns the channel dimension axis of the image. - - Args: - image (`np.ndarray`): - The image to get the channel dimension axis of. - - Returns: - The channel dimension axis of the image. - """ - channel_dim = infer_channel_dimension_format(image) - if channel_dim == ChannelDimension.FIRST: - return image.ndim - 3 - elif channel_dim == ChannelDimension.LAST: - return image.ndim - 1 - raise ValueError(f"Unsupported data format: {channel_dim}") - - def get_image_size(image: np.ndarray, channel_dim: ChannelDimension = None) -> Tuple[int, int]: """ Returns the (height, width) dimensions of the image. diff --git a/src/transformers/models/glpn/image_processing_glpn.py b/src/transformers/models/glpn/image_processing_glpn.py index ffc7efe16ea978..98ae1d53f73d60 100644 --- a/src/transformers/models/glpn/image_processing_glpn.py +++ b/src/transformers/models/glpn/image_processing_glpn.py @@ -16,8 +16,8 @@ from typing import List, Optional, Union -import PIL.Image import numpy as np +import PIL.Image from transformers.utils.generic import TensorType diff --git a/tests/test_image_transforms.py b/tests/test_image_transforms.py index f63555bacb485f..69e6de1587b8d6 100644 --- a/tests/test_image_transforms.py +++ b/tests/test_image_transforms.py @@ -35,9 +35,7 @@ import PIL.Image from transformers.image_transforms import ( - center_crop, get_resize_output_image_size, - normalize, resize, to_channel_dimension_format, to_pil_image, @@ -174,53 +172,3 @@ def test_resize(self): self.assertIsInstance(resized_image, PIL.Image.Image) # PIL size is in (width, height) order self.assertEqual(resized_image.size, (40, 30)) - - def test_normalize(self): - image = np.random.randint(0, 256, (224, 224, 3)) / 255 - - # Test that exception is raised if inputs are incorrect - # Not a numpy array image - with self.assertRaises(ValueError): - normalize(5, 5, 5) - - # Number of mean values != number of channels - with self.assertRaises(ValueError): - normalize(image, mean=(0.5, 0.6), std=1) - - # Number of std values != number of channels - with self.assertRaises(ValueError): - normalize(image, mean=1, std=(0.5, 0.6)) - - # Test result is correct - output data format is channels_first and normalization - # correctly computed - mean = (0.5, 0.6, 0.7) - std = (0.1, 0.2, 0.3) - expected_image = ((image - mean) / std).transpose((2, 0, 1)) - - normalized_image = normalize(image, mean=mean, std=std, data_format="channels_first") - self.assertIsInstance(normalized_image, np.ndarray) - self.assertEqual(normalized_image.shape, (3, 224, 224)) - self.assertTrue(np.allclose(normalized_image, expected_image)) - - def test_center_crop(self): - image = np.random.randint(0, 256, (3, 224, 224)) - - # Test that exception is raised if inputs are incorrect - with self.assertRaises(ValueError): - center_crop(image, 10) - - # Test result is correct - output data format is channels_first and center crop - # correctly computed - expected_image = image[:, 52:172, 82:142].transpose(1, 2, 0) - cropped_image = center_crop(image, (120, 60), data_format="channels_last") - self.assertIsInstance(cropped_image, np.ndarray) - self.assertEqual(cropped_image.shape, (120, 60, 3)) - self.assertTrue(np.allclose(cropped_image, expected_image)) - - # Test that image is padded with zeros if crop size is larger than image size - expected_image = np.zeros((300, 260, 3)) - expected_image[38:262, 18:242, :] = image.transpose((1, 2, 0)) - cropped_image = center_crop(image, (300, 260), data_format="channels_last") - self.assertIsInstance(cropped_image, np.ndarray) - self.assertEqual(cropped_image.shape, (300, 260, 3)) - self.assertTrue(np.allclose(cropped_image, expected_image)) diff --git a/tests/utils/test_image_utils.py b/tests/utils/test_image_utils.py index 6868e117c4c386..0ae5d78fb2dc0a 100644 --- a/tests/utils/test_image_utils.py +++ b/tests/utils/test_image_utils.py @@ -20,7 +20,7 @@ import pytest from transformers import is_torch_available, is_vision_available -from transformers.image_utils import ChannelDimension, get_channel_dimension_axis +from transformers.image_utils import ChannelDimension from transformers.testing_utils import require_torch, require_vision @@ -535,26 +535,3 @@ def test_infer_channel_dimension(self): image = np.random.randint(0, 256, (1, 3, 4, 5)) inferred_dim = infer_channel_dimension_format(image) self.assertEqual(inferred_dim, ChannelDimension.FIRST) - - def test_get_channel_dimension_axis(self): - # Test we correctly identify the channel dimension - image = np.random.randint(0, 256, (3, 4, 5)) - inferred_axis = get_channel_dimension_axis(image) - self.assertEqual(inferred_axis, 0) - - image = np.random.randint(0, 256, (1, 4, 5)) - inferred_axis = get_channel_dimension_axis(image) - self.assertEqual(inferred_axis, 0) - - image = np.random.randint(0, 256, (4, 5, 3)) - inferred_axis = get_channel_dimension_axis(image) - self.assertEqual(inferred_axis, 2) - - image = np.random.randint(0, 256, (4, 5, 1)) - inferred_axis = get_channel_dimension_axis(image) - self.assertEqual(inferred_axis, 2) - - # We can take a batched array of images and find the dimension - image = np.random.randint(0, 256, (1, 3, 4, 5)) - inferred_axis = get_channel_dimension_axis(image) - self.assertEqual(inferred_axis, 1)