Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DETR] Improve code examples #18262

Merged
merged 4 commits into from Jul 27, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
71 changes: 47 additions & 24 deletions src/transformers/models/detr/modeling_detr.py
Expand Up @@ -1237,8 +1237,15 @@ def forward(

>>> feature_extractor = DetrFeatureExtractor.from_pretrained("facebook/detr-resnet-50")
>>> model = DetrModel.from_pretrained("facebook/detr-resnet-50")

>>> # prepare image for the model
>>> inputs = feature_extractor(images=image, return_tensors="pt")

>>> # forward pass
>>> outputs = model(**inputs)

>>> # the last hidden states are the final query embeddings of the Transformer decoder
NielsRogge marked this conversation as resolved.
Show resolved Hide resolved
>>> # these are of shape (batch_size, num_queries, hidden_size)
>>> last_hidden_states = outputs.last_hidden_state
>>> list(last_hidden_states.shape)
[1, 100, 256]
Expand Down Expand Up @@ -1389,6 +1396,7 @@ def forward(

```python
>>> from transformers import DetrFeatureExtractor, DetrForObjectDetection
>>> import torch
>>> from PIL import Image
>>> import requests

Expand All @@ -1400,17 +1408,24 @@ def forward(

>>> inputs = feature_extractor(images=image, return_tensors="pt")
>>> outputs = model(**inputs)
>>> # model predicts bounding boxes and corresponding COCO classes
>>> bboxes, logits = outputs.pred_boxes, outputs.logits

>>> # get probability per object class and remove the no-object class
>>> probas_per_class = outputs.logits.softmax(-1)[:, :, :-1]
>>> objects_to_keep = probas_per_class.max(-1).values > 0.9

>>> ids, _ = probas_per_class.max(-1).indices[objects_to_keep].sort()
>>> labels = [model.config.id2label[id.item()] for id in ids]
>>> labels
['cat', 'cat', 'couch', 'remote', 'remote']
>>> # convert outputs (bounding boxes and class logits) to COCO API
>>> target_sizes = torch.tensor([image.size[::-1]])
>>> results = feature_extractor.post_process(outputs, target_sizes=target_sizes)[0]

>>> for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
... box = [round(i, 2) for i in box.tolist()]
... # let's only keep detections with score > 0.9
... if score > 0.9:
... print(
... f"Detected {model.config.id2label[label.item()]} with confidence "
... f"{round(score.item(), 3)} at location {box}."
... )
Detected remote with confidence 0.998 at location [40.16, 70.81, 175.55, 117.98]
Detected remote with confidence 0.996 at location [333.24, 72.55, 368.33, 187.66]
Detected couch with confidence 0.995 at location [-0.02, 1.15, 639.73, 473.76]
Detected cat with confidence 0.999 at location [13.24, 52.05, 314.02, 470.93]
Detected cat with confidence 0.999 at location [345.4, 23.85, 640.37, 368.72]
```"""
return_dict = return_dict if return_dict is not None else self.config.use_return_dict

Expand Down Expand Up @@ -1552,30 +1567,38 @@ def forward(
Examples:

```python
>>> from transformers import DetrFeatureExtractor, DetrForSegmentation
>>> from PIL import Image
>>> import io
>>> import requests
>>> from PIL import Image
>>> import torch
>>> import numpy

>>> from transformers import DetrFeatureExtractor, DetrForSegmentation
>>> from transformers.models.detr.feature_extraction_detr import rgb_to_id

>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)

>>> feature_extractor = DetrFeatureExtractor.from_pretrained("facebook/detr-resnet-50-panoptic")
>>> model = DetrForSegmentation.from_pretrained("facebook/detr-resnet-50-panoptic")

>>> # prepare image for the model
>>> inputs = feature_extractor(images=image, return_tensors="pt")

>>> # forward pass
>>> outputs = model(**inputs)
>>> # model predicts COCO classes, bounding boxes, and masks
>>> logits = outputs.logits
>>> list(logits.shape)
[1, 100, 251]

>>> bboxes = outputs.pred_boxes
>>> list(bboxes.shape)
[1, 100, 4]

>>> masks = outputs.pred_masks
>>> list(masks.shape)
[1, 100, 200, 267]

>>> # use the `post_process_panoptic` method of `DetrFeatureExtractor` to convert to COCO format
>>> processed_sizes = torch.as_tensor(inputs["pixel_values"].shape[-2:]).unsqueeze(0)
>>> result = feature_extractor.post_process_panoptic(outputs, processed_sizes)[0]

>>> # the segmentation is stored in a special-format png
NielsRogge marked this conversation as resolved.
Show resolved Hide resolved
>>> panoptic_seg = Image.open(io.BytesIO(result["png_string"]))
>>> panoptic_seg = numpy.array(panoptic_seg, dtype=numpy.uint8)
>>> # retrieve the ids corresponding to each mask
>>> panoptic_seg_id = rgb_to_id(panoptic_seg)
>>> panoptic_seg_id.shape
(800, 1066)
```"""

return_dict = return_dict if return_dict is not None else self.config.use_return_dict
Expand Down