From 38f2344f26eda4ab6f354b4a0f1b6e8ae2a4ab3c Mon Sep 17 00:00:00 2001 From: Niels Rogge Date: Fri, 22 Jul 2022 16:49:22 +0200 Subject: [PATCH 1/4] Improve doc test --- src/transformers/models/detr/modeling_detr.py | 45 ++++++++++++++----- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/src/transformers/models/detr/modeling_detr.py b/src/transformers/models/detr/modeling_detr.py index e5d2a04c1f467..06fa1704e6004 100644 --- a/src/transformers/models/detr/modeling_detr.py +++ b/src/transformers/models/detr/modeling_detr.py @@ -1237,8 +1237,15 @@ def forward( >>> feature_extractor = DetrFeatureExtractor.from_pretrained("facebook/detr-resnet-50") >>> model = DetrModel.from_pretrained("facebook/detr-resnet-50") + + >>> # prepare image for the model >>> inputs = feature_extractor(images=image, return_tensors="pt") + + >>> # forward pass >>> outputs = model(**inputs) + + >>> # the last hidden states are the final query embeddings of the Transformer decoder + >>> # these are of shape (batch_size, num_queries, hidden_size) >>> last_hidden_states = outputs.last_hidden_state >>> list(last_hidden_states.shape) [1, 100, 256] @@ -1389,6 +1396,7 @@ def forward( ```python >>> from transformers import DetrFeatureExtractor, DetrForObjectDetection + >>> import torch >>> from PIL import Image >>> import requests @@ -1400,17 +1408,23 @@ def forward( >>> inputs = feature_extractor(images=image, return_tensors="pt") >>> outputs = model(**inputs) - >>> # model predicts bounding boxes and corresponding COCO classes - >>> bboxes, logits = outputs.pred_boxes, outputs.logits - - >>> # get probability per object class and remove the no-object class - >>> probas_per_class = outputs.logits.softmax(-1)[:, :, :-1] - >>> objects_to_keep = probas_per_class.max(-1).values > 0.9 - >>> ids, _ = probas_per_class.max(-1).indices[objects_to_keep].sort() - >>> labels = [model.config.id2label[id.item()] for id in ids] - >>> labels - ['cat', 'cat', 'couch', 'remote', 'remote'] + >>> # convert outputs (bounding boxes and class logits) to COCO API + >>> target_sizes = torch.tensor([image.size[::-1]]) + >>> results = feature_extractor.post_process(outputs, target_sizes=target_sizes)[0] + + >>> for score, label, box in zip(results["scores"], results["labels"], results["boxes"]): + ... box = [round(i, 2) for i in box.tolist()] + ... # let's only keep detections with score > 0.9 + ... if score > 0.9: + ... print( + ... f"Detected {model.config.id2label[label.item()]} with confidence {round(score.item(), 3)} at location {box}" + ... ) + Detected remote with confidence 0.998 at location [40.16, 70.81, 175.55, 117.98] + Detected remote with confidence 0.996 at location [333.24, 72.55, 368.33, 187.66] + Detected couch with confidence 0.995 at location [-0.02, 1.15, 639.73, 473.76] + Detected cat with confidence 0.999 at location [13.24, 52.05, 314.02, 470.93] + Detected cat with confidence 0.999 at location [345.4, 23.85, 640.37, 368.72] ```""" return_dict = return_dict if return_dict is not None else self.config.use_return_dict @@ -1562,9 +1576,12 @@ def forward( >>> feature_extractor = DetrFeatureExtractor.from_pretrained("facebook/detr-resnet-50-panoptic") >>> model = DetrForSegmentation.from_pretrained("facebook/detr-resnet-50-panoptic") + >>> # prepare image for the model >>> inputs = feature_extractor(images=image, return_tensors="pt") + + >>> # forward pass >>> outputs = model(**inputs) - >>> # model predicts COCO classes, bounding boxes, and masks + >>> logits = outputs.logits >>> list(logits.shape) [1, 100, 251] @@ -1576,6 +1593,12 @@ def forward( >>> masks = outputs.pred_masks >>> list(masks.shape) [1, 100, 200, 267] + + >>> # compute the scores, excluding the "no-object" class (the last one) + >>> scores = outputs.logits.softmax(-1)[..., :-1].max(-1)[0] + >>> # threshold the confidence + >>> keep = scores > 0.85 + >>> final_masks = outputs.pred_masks[keep].detach().numpy() ```""" return_dict = return_dict if return_dict is not None else self.config.use_return_dict From c6efd4c1efccffd5ccc166444018c42356843167 Mon Sep 17 00:00:00 2001 From: Niels Rogge Date: Fri, 22 Jul 2022 17:28:10 +0200 Subject: [PATCH 2/4] Improve code example of segmentation model --- src/transformers/models/detr/modeling_detr.py | 32 +++++++++---------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/src/transformers/models/detr/modeling_detr.py b/src/transformers/models/detr/modeling_detr.py index 06fa1704e6004..312d31024915f 100644 --- a/src/transformers/models/detr/modeling_detr.py +++ b/src/transformers/models/detr/modeling_detr.py @@ -1567,8 +1567,12 @@ def forward( ```python >>> from transformers import DetrFeatureExtractor, DetrForSegmentation + >>> from transformers.models.detr.feature_extraction_detr import rgb_to_id + >>> import torch >>> from PIL import Image + >>> import numpy >>> import requests + >>> import io >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> image = Image.open(requests.get(url, stream=True).raw) @@ -1582,23 +1586,17 @@ def forward( >>> # forward pass >>> outputs = model(**inputs) - >>> logits = outputs.logits - >>> list(logits.shape) - [1, 100, 251] - - >>> bboxes = outputs.pred_boxes - >>> list(bboxes.shape) - [1, 100, 4] - - >>> masks = outputs.pred_masks - >>> list(masks.shape) - [1, 100, 200, 267] - - >>> # compute the scores, excluding the "no-object" class (the last one) - >>> scores = outputs.logits.softmax(-1)[..., :-1].max(-1)[0] - >>> # threshold the confidence - >>> keep = scores > 0.85 - >>> final_masks = outputs.pred_masks[keep].detach().numpy() + >>> # use the `post_process_panoptic` method of `DetrFeatureExtractor` to convert to COCO format + >>> processed_sizes = torch.as_tensor(inputs["pixel_values"].shape[-2:]).unsqueeze(0) + >>> result = feature_extractor.post_process_panoptic(outputs, processed_sizes)[0] + + >>> # the segmentation is stored in a special-format png + >>> panoptic_seg = Image.open(io.BytesIO(result["png_string"])) + >>> panoptic_seg = numpy.array(panoptic_seg, dtype=numpy.uint8) + >>> # retrieve the ids corresponding to each mask + >>> panoptic_seg_id = rgb_to_id(panoptic_seg) + >>> panoptic_seg_id.shape + (800, 1066) ```""" return_dict = return_dict if return_dict is not None else self.config.use_return_dict From cfe9a7940ac3da255f661425489fd2b7392a6671 Mon Sep 17 00:00:00 2001 From: Niels Rogge Date: Sat, 23 Jul 2022 10:26:32 +0200 Subject: [PATCH 3/4] Apply suggestion --- src/transformers/models/detr/modeling_detr.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/transformers/models/detr/modeling_detr.py b/src/transformers/models/detr/modeling_detr.py index 312d31024915f..9e0e7922e27ba 100644 --- a/src/transformers/models/detr/modeling_detr.py +++ b/src/transformers/models/detr/modeling_detr.py @@ -1566,13 +1566,14 @@ def forward( Examples: ```python - >>> from transformers import DetrFeatureExtractor, DetrForSegmentation - >>> from transformers.models.detr.feature_extraction_detr import rgb_to_id - >>> import torch + >>> import io + >>> import requests >>> from PIL import Image + >>> import torch >>> import numpy - >>> import requests - >>> import io + + >>> from transformers import DetrFeatureExtractor, DetrForSegmentation + >>> from transformers.models.detr.feature_extraction_detr import rgb_to_id >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> image = Image.open(requests.get(url, stream=True).raw) From 7572708937ede890807c52d93f031a7b1473cf95 Mon Sep 17 00:00:00 2001 From: NielsRogge <48327001+NielsRogge@users.noreply.github.com> Date: Wed, 27 Jul 2022 09:43:18 +0200 Subject: [PATCH 4/4] Update src/transformers/models/detr/modeling_detr.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> --- src/transformers/models/detr/modeling_detr.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/transformers/models/detr/modeling_detr.py b/src/transformers/models/detr/modeling_detr.py index 9e0e7922e27ba..c1a164b486ffd 100644 --- a/src/transformers/models/detr/modeling_detr.py +++ b/src/transformers/models/detr/modeling_detr.py @@ -1418,7 +1418,8 @@ def forward( ... # let's only keep detections with score > 0.9 ... if score > 0.9: ... print( - ... f"Detected {model.config.id2label[label.item()]} with confidence {round(score.item(), 3)} at location {box}" + ... f"Detected {model.config.id2label[label.item()]} with confidence " + ... f"{round(score.item(), 3)} at location {box}." ... ) Detected remote with confidence 0.998 at location [40.16, 70.81, 175.55, 117.98] Detected remote with confidence 0.996 at location [333.24, 72.55, 368.33, 187.66]