Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

adds to raise exception to main caller thread from prefetch generator #412

Merged
merged 10 commits into from
Feb 7, 2022
52 changes: 32 additions & 20 deletions labelbox/data/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,25 +33,30 @@ class PrefetchGenerator:
Useful for modifying the generator results based on data from a network
"""

def __init__(self, data: Iterable[Any], prefetch_limit=20, num_executors=4):
def __init__(self, data: Iterable[Any], prefetch_limit=20, num_executors=1):
if isinstance(data, (list, tuple)):
self._data = (r for r in data)
else:
self._data = data

self.queue = Queue(prefetch_limit)
self._data = ThreadSafeGen(self._data)
self.completed_threads = 0
# Can only iterate over once it the queue.get hangs forever.
self.multithread = num_executors > 1
self.done = False
self.num_executors = num_executors
self.threads = [
threading.Thread(target=self.fill_queue)
for _ in range(num_executors)
]
for thread in self.threads:
thread.daemon = True
thread.start()

if self.multithread:
jtsodapop marked this conversation as resolved.
Show resolved Hide resolved
self._data = ThreadSafeGen(self._data)
self.num_executors = num_executors
self.threads = [
threading.Thread(target=self.fill_queue)
for _ in range(num_executors)
]
for thread in self.threads:
thread.daemon = True
thread.start()
else:
self._data = iter(self._data)

def _process(self, value) -> Any:
raise NotImplementedError("Abstract method needs to be implemented")
Expand All @@ -64,8 +69,8 @@ def fill_queue(self):
raise ValueError("Unexpected None")
self.queue.put(value)
except Exception as e:
logger.warning("Unexpected exception while filling the queue. %r",
e)
self.queue.put(
ValueError(f"Unexpected exception while filling queue: {e}"))
finally:
self.queue.put(None)

Expand All @@ -75,13 +80,20 @@ def __iter__(self):
def __next__(self) -> Any:
if self.done:
raise StopIteration
value = self.queue.get()
while value is None:
self.completed_threads += 1
if self.completed_threads == self.num_executors:
self.done = True
for thread in self.threads:
thread.join()
raise StopIteration

if self.multithread:
value = self.queue.get()

while value is None:
self.completed_threads += 1
if self.completed_threads == self.num_executors:
self.done = True
for thread in self.threads:
thread.join()
raise StopIteration
value = self.queue.get()
if isinstance(value, Exception):
raise value
else:
value = self._process(next(self._data))
return value
10 changes: 5 additions & 5 deletions labelbox/data/serialization/coco/panoptic_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def from_common(cls,

for result in results:
images.append(result[0])
all_coco_annotations.extend(result[1])
all_coco_annotations.append(result[1])
coco_categories.update(result[2])
coco_things.update(result[3])

Expand Down Expand Up @@ -175,12 +175,12 @@ def to_common(self, image_root: Path, mask_root: Path):
raise ValueError(
f"Cannot find file {im_path}. Make sure `image_root` is set properly"
)

if not annotation.file_name.endswith('.png'):
if not str(annotation.file_name).endswith('.png'):
raise ValueError(
f"COCO masks must be stored as png files and their extension must be `.png`. Found {annotation.file_name}"
)
mask = MaskData(file_path=Path(mask_root, annotation.file_name))
mask = MaskData(
file_path=str(Path(mask_root, annotation.file_name)))

for segmentation in annotation.segments_info:
category = category_lookup[segmentation.category_id]
Expand All @@ -189,6 +189,6 @@ def to_common(self, image_root: Path, mask_root: Path):
value=Mask(mask=mask,
color=id_to_rgb(
segmentation.id))))
data = ImageData(file_path=im_path)
data = ImageData(file_path=str(im_path))
yield Label(data=data, annotations=annotations)
del annotation_lookup[image.id]
2 changes: 1 addition & 1 deletion labelbox/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,4 +122,4 @@ class MALValidationError(LabelboxError):

class OperationNotAllowedException(Exception):
"""Raised when user does not have permissions to a resource or has exceeded usage limit"""
pass
pass
154 changes: 153 additions & 1 deletion tests/data/assets/labelbox_v1/unkown_media_type_export.json
Original file line number Diff line number Diff line change
@@ -1 +1,153 @@
[{"ID": "ckw3ce1mc78b50zc30dqf0qhj", "DataRow ID": "ckw3cctc41uqg0zo5023e59hn", "Labeled Data": "https://storage.labelbox.com/ckk4q1vgapsau07324awnsjq2%2F8821d3e2-9059-b616-9d4a-9723da3ea073-im1?Expires=1638367029433&KeyName=labelbox-assets-key-3&Signature=FPOQz-alx3gHMK30ib1iPqJj0W0", "Label": {"objects": [{"featureId": "ckw3ce58u00003e66w9rh0onm", "schemaId": "ckw3cdy207b6t0zbn3sh52xoh", "color": "#1CE6FF", "title": "obj", "value": "obj", "polygon": [{"x": 99.405, "y": 56.15}, {"x": 111.421, "y": 99.129}, {"x": 146.082, "y": 80.413}, {"x": 118.815, "y": 47.369}], "instanceURI": "https://api.labelbox.com/masks/feature/ckw3ce58u00003e66w9rh0onm?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJja2s0cTF2Z3djMHZwMDcwNHhoeDdtNHZrIiwib3JnYW5pemF0aW9uSWQiOiJja2s0cTF2Z2Fwc2F1MDczMjRhd25zanEyIiwiaWF0IjoxNjM3MTU3NDI5LCJleHAiOjE2Mzk3NDk0Mjl9.L4PvjcpSIWV_9R5_M7c_24sj79wtserE_2hkx3ZeCMU"}], "classifications": [], "relationships": []}, "Created By": "msokoloff@labelbox.com", "Project Name": "test", "Created At": "2021-11-17T09:48:56.000Z", "Updated At": "2021-11-17T09:48:56.305Z", "Seconds to Label": 2.239, "External ID": "im1", "Agreement": -1, "Benchmark Agreement": -1, "Benchmark ID": null, "Dataset Name": "no-name", "Reviews": [], "View Label": "https://editor.labelbox.com?project=ckw3cd90b38br0zd4dh4n1xou&label=ckw3ce1mc78b50zc30dqf0qhj", "Has Open Issues": 0, "Skipped": false}, {"ID": "ckw3cea3f7b9t0zbn2tgp2y83", "DataRow ID": "ckw3cctc41uqo0zo5gpma1mr2", "Labeled Data": "https://storage.labelbox.com/ckk4q1vgapsau07324awnsjq2%2F1bc65970-9880-78b4-d298-7a7ef7f8f3fc-im3?Expires=1638367029433&KeyName=labelbox-assets-key-3&Signature=GZUsyQqYYlQPWBYv7GApFYlHXAc", "Label": {"objects": [], "classifications": [{"featureId": "ckw3ced5e00023e66236meh70", "schemaId": "ckw3cdy207b6v0zbn11gp0zz4", "title": "classification", "value": "classification", "answer": {"featureId": "ckw3ced5e00013e6652355ejd", "schemaId": "ckw3cdy207b6w0zbn2hgp3321", "title": "op1", "value": "op_1"}}], "relationships": []}, "Created By": "msokoloff@labelbox.com", "Project Name": "test", "Created At": "2021-11-17T09:49:02.000Z", "Updated At": "2021-11-17T09:49:02.220Z", "Seconds to Label": 5.373, "External ID": "im3", "Agreement": -1, "Benchmark Agreement": -1, "Benchmark ID": null, "Dataset Name": "no-name", "Reviews": [], "View Label": "https://editor.labelbox.com?project=ckw3cd90b38br0zd4dh4n1xou&label=ckw3cea3f7b9t0zbn2tgp2y83", "Has Open Issues": 0, "Skipped": false}, {"ID": "ckw3cec4v78ex0zc3aodwdekw", "DataRow ID": "ckw3cctc41uqs0zo52cy6eus1", "Labeled Data": "https://storage.labelbox.com/ckk4q1vgapsau07324awnsjq2%2Fdb42c0e8-e005-3305-ed35-b021f109b6a7-im4?Expires=1638367029433&KeyName=labelbox-assets-key-3&Signature=Hms9mqcIyWNDzoJUdvMa6_hRKY4", "Label": {"objects": [{"featureId": "ckw3cefl900033e66k41q6zpc", "schemaId": "ckw3cdy207b6t0zbn3sh52xoh", "color": "#1CE6FF", "title": "obj", "value": "obj", "polygon": [{"x": 69.58, "y": 42.292}, {"x": 64.932, "y": 74.128}, {"x": 91.888, "y": 64.601}, {"x": 86.775, "y": 41.828}], "instanceURI": "https://api.labelbox.com/masks/feature/ckw3cefl900033e66k41q6zpc?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJja2s0cTF2Z3djMHZwMDcwNHhoeDdtNHZrIiwib3JnYW5pemF0aW9uSWQiOiJja2s0cTF2Z2Fwc2F1MDczMjRhd25zanEyIiwiaWF0IjoxNjM3MTU3NDI5LCJleHAiOjE2Mzk3NDk0Mjl9.L4PvjcpSIWV_9R5_M7c_24sj79wtserE_2hkx3ZeCMU"}], "classifications": [{"featureId": "ckw3ceijf00053e669zaplftd", "schemaId": "ckw3cdy207b6v0zbn11gp0zz4", "title": "classification", "value": "classification", "answer": {"featureId": "ckw3ceijf00043e665ex22xkp", "schemaId": "ckw3cdy207b6y0zbn77201rux", "title": "op2", "value": "op_2"}}], "relationships": []}, "Created By": "msokoloff@labelbox.com", "Project Name": "test", "Created At": "2021-11-17T09:49:15.000Z", "Updated At": "2021-11-17T09:49:15.785Z", "Seconds to Label": 5, "External ID": "im4", "Agreement": -1, "Benchmark Agreement": -1, "Benchmark ID": null, "Dataset Name": "no-name", "Reviews": [], "View Label": "https://editor.labelbox.com?project=ckw3cd90b38br0zd4dh4n1xou&label=ckw3cec4v78ex0zc3aodwdekw", "Has Open Issues": 0, "Skipped": false}, {"ID": "ckw3ce1s34c1i0zbp32067q4v", "DataRow ID": "ckw3cctc41uqk0zo52n31egs1", "Labeled Data": "https://storage.labelbox.com/ckk4q1vgapsau07324awnsjq2%2F402cbd62-9127-5b50-57d6-d77aaf89f643-im2?Expires=1638367029433&KeyName=labelbox-assets-key-3&Signature=QIwHFUXN1mjBn8K4ZLWVQGQekmE", "Label": {}, "Created By": "msokoloff@labelbox.com", "Project Name": "test", "Created At": "2021-11-17T09:48:59.000Z", "Updated At": "2021-11-17T09:49:02.000Z", "Seconds to Label": 3.524, "External ID": "im2", "Agreement": -1, "Benchmark Agreement": -1, "Benchmark ID": null, "Dataset Name": "no-name", "Reviews": [], "View Label": "https://editor.labelbox.com?project=ckw3cd90b38br0zd4dh4n1xou&label=ckw3ce1s34c1i0zbp32067q4v", "Has Open Issues": 0, "Skipped": true}]
[{
"ID": "ckw3ce1mc78b50zc30dqf0qhj",
"DataRow ID": "ckw3cctc41uqg0zo5023e59hn",
"Labeled Data": "https://storage.labelbox.com/ckk4q1vgapsau07324awnsjq2%2F8821d3e2-9059-b616-9d4a-9723da3ea073-im1?Expires=1638367029433&KeyName=labelbox-assets-key-3&Signature=FPOQz-alx3gHMK30ib1iPqJj0W0",
"Label": {
"objects": [{
"featureId": "ckw3ce58u00003e66w9rh0onm",
"schemaId": "ckw3cdy207b6t0zbn3sh52xoh",
"color": "#1CE6FF",
"title": "obj",
"value": "obj",
"polygon": [{
"x": 99.405,
"y": 56.15
}, {
"x": 111.421,
"y": 99.129
}, {
"x": 146.082,
"y": 80.413
}, {
"x": 118.815,
"y": 47.369
}],
"instanceURI": "https://api.labelbox.com/masks/feature/ckw3ce58u00003e66w9rh0onm?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJja2s0cTF2Z3djMHZwMDcwNHhoeDdtNHZrIiwib3JnYW5pemF0aW9uSWQiOiJja2s0cTF2Z2Fwc2F1MDczMjRhd25zanEyIiwiaWF0IjoxNjM3MTU3NDI5LCJleHAiOjE2Mzk3NDk0Mjl9.L4PvjcpSIWV_9R5_M7c_24sj79wtserE_2hkx3ZeCMU"
}],
"classifications": [],
"relationships": []
},
"Created By": "msokoloff@labelbox.com",
"Project Name": "test",
"Created At": "2021-11-17T09:48:56.000Z",
"Updated At": "2021-11-17T09:48:56.305Z",
"Seconds to Label": 2.239,
"External ID": "im1",
"Agreement": -1,
"Benchmark Agreement": -1,
"Benchmark ID": null,
"Dataset Name": "no-name",
"Reviews": [],
"View Label": "https://editor.labelbox.com?project=ckw3cd90b38br0zd4dh4n1xou&label=ckw3ce1mc78b50zc30dqf0qhj",
"Has Open Issues": 0,
"Skipped": false
}, {
"ID": "ckw3cea3f7b9t0zbn2tgp2y83",
"DataRow ID": "ckw3cctc41uqo0zo5gpma1mr2",
"Labeled Data": "https://storage.labelbox.com/ckk4q1vgapsau07324awnsjq2%2F1bc65970-9880-78b4-d298-7a7ef7f8f3fc-im3?Expires=1638367029433&KeyName=labelbox-assets-key-3&Signature=GZUsyQqYYlQPWBYv7GApFYlHXAc",
"media_type": "image",
"Label": {
"objects": [],
"classifications": [{
"featureId": "ckw3ced5e00023e66236meh70",
"schemaId": "ckw3cdy207b6v0zbn11gp0zz4",
"title": "classification",
"value": "classification",
"answer": {
"featureId": "ckw3ced5e00013e6652355ejd",
"schemaId": "ckw3cdy207b6w0zbn2hgp3321",
"title": "op1",
"value": "op_1"
}
}],
"relationships": []
},
"Created By": "msokoloff@labelbox.com",
"Project Name": "test",
"Created At": "2021-11-17T09:49:02.000Z",
"Updated At": "2021-11-17T09:49:02.220Z",
"Seconds to Label": 5.373,
"External ID": "im3",
"Agreement": -1,
"Benchmark Agreement": -1,
"Benchmark ID": null,
"Dataset Name": "no-name",
"Reviews": [],
"View Label": "https://editor.labelbox.com?project=ckw3cd90b38br0zd4dh4n1xou&label=ckw3cea3f7b9t0zbn2tgp2y83",
"Has Open Issues": 0,
"Skipped": false
}, {
"ID": "ckw3cec4v78ex0zc3aodwdekw",
"DataRow ID": "ckw3cctc41uqs0zo52cy6eus1",
"Labeled Data": "https://storage.labelbox.com/ckk4q1vgapsau07324awnsjq2%2Fdb42c0e8-e005-3305-ed35-b021f109b6a7-im4?Expires=1638367029433&KeyName=labelbox-assets-key-3&Signature=Hms9mqcIyWNDzoJUdvMa6_hRKY4",
"Label": {
"objects": [{
"featureId": "ckw3cefl900033e66k41q6zpc",
"schemaId": "ckw3cdy207b6t0zbn3sh52xoh",
"color": "#1CE6FF",
"title": "obj",
"value": "obj",
"polygon": [{
"x": 69.58,
"y": 42.292
}, {
"x": 64.932,
"y": 74.128
}, {
"x": 91.888,
"y": 64.601
}, {
"x": 86.775,
"y": 41.828
}],
"instanceURI": "https://api.labelbox.com/masks/feature/ckw3cefl900033e66k41q6zpc?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJja2s0cTF2Z3djMHZwMDcwNHhoeDdtNHZrIiwib3JnYW5pemF0aW9uSWQiOiJja2s0cTF2Z2Fwc2F1MDczMjRhd25zanEyIiwiaWF0IjoxNjM3MTU3NDI5LCJleHAiOjE2Mzk3NDk0Mjl9.L4PvjcpSIWV_9R5_M7c_24sj79wtserE_2hkx3ZeCMU"
}],
"classifications": [{
"featureId": "ckw3ceijf00053e669zaplftd",
"schemaId": "ckw3cdy207b6v0zbn11gp0zz4",
"title": "classification",
"value": "classification",
"answer": {
"featureId": "ckw3ceijf00043e665ex22xkp",
"schemaId": "ckw3cdy207b6y0zbn77201rux",
"title": "op2",
"value": "op_2"
}
}],
"relationships": []
},
"Created By": "msokoloff@labelbox.com",
"Project Name": "test",
"Created At": "2021-11-17T09:49:15.000Z",
"Updated At": "2021-11-17T09:49:15.785Z",
"Seconds to Label": 5,
"External ID": "im4",
"Agreement": -1,
"Benchmark Agreement": -1,
"Benchmark ID": null,
"Dataset Name": "no-name",
"Reviews": [],
"View Label": "https://editor.labelbox.com?project=ckw3cd90b38br0zd4dh4n1xou&label=ckw3cec4v78ex0zc3aodwdekw",
"Has Open Issues": 0,
"Skipped": false
}, {
"ID": "ckw3ce1s34c1i0zbp32067q4v",
"DataRow ID": "ckw3cctc41uqk0zo52n31egs1",
"Labeled Data": "https://storage.labelbox.com/ckk4q1vgapsau07324awnsjq2%2F402cbd62-9127-5b50-57d6-d77aaf89f643-im2?Expires=1638367029433&KeyName=labelbox-assets-key-3&Signature=QIwHFUXN1mjBn8K4ZLWVQGQekmE",
"media_type": "image",
"Label": {},
"Created By": "msokoloff@labelbox.com",
"Project Name": "test",
"Created At": "2021-11-17T09:48:59.000Z",
"Updated At": "2021-11-17T09:49:02.000Z",
"Seconds to Label": 3.524,
"External ID": "im2",
"Agreement": -1,
"Benchmark Agreement": -1,
"Benchmark ID": null,
"Dataset Name": "no-name",
"Reviews": [],
"View Label": "https://editor.labelbox.com?project=ckw3cd90b38br0zd4dh4n1xou&label=ckw3ce1s34c1i0zbp32067q4v",
"Has Open Issues": 0,
"Skipped": true
}]
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from pytest_cases import fixture_ref
from pytest_cases import pytest_parametrize_plus, fixture_ref
from pytest_cases import parametrize, fixture_ref

from labelbox.data.metrics.confusion_matrix.confusion_matrix import confusion_matrix_metric


@pytest_parametrize_plus("tool_examples", [
@parametrize("tool_examples", [
fixture_ref('polygon_pairs'),
fixture_ref('rectangle_pairs'),
fixture_ref('mask_pairs'),
Expand All @@ -27,10 +27,9 @@ def test_overlapping_objects(tool_examples):
expected), f"{example.predictions},{example.ground_truths}"


@pytest_parametrize_plus(
"tool_examples",
[fixture_ref('checklist_pairs'),
fixture_ref('radio_pairs')])
@parametrize("tool_examples",
[fixture_ref('checklist_pairs'),
fixture_ref('radio_pairs')])
def test_overlapping_classifications(tool_examples):
for example in tool_examples:
score = confusion_matrix_metric(example.ground_truths,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from pytest_cases import fixture_ref
from pytest_cases import pytest_parametrize_plus, fixture_ref
from pytest_cases import parametrize, fixture_ref

from labelbox.data.metrics.confusion_matrix.confusion_matrix import feature_confusion_matrix_metric


@pytest_parametrize_plus("tool_examples", [
@parametrize("tool_examples", [
fixture_ref('polygon_pairs'),
fixture_ref('rectangle_pairs'),
fixture_ref('mask_pairs'),
Expand All @@ -23,10 +23,9 @@ def test_overlapping_objects(tool_examples):
assert metrics == example.expected, f"{example.predictions},{example.ground_truths}"


@pytest_parametrize_plus(
"tool_examples",
[fixture_ref('checklist_pairs'),
fixture_ref('radio_pairs')])
@parametrize("tool_examples",
[fixture_ref('checklist_pairs'),
fixture_ref('radio_pairs')])
def test_overlapping_classifications(tool_examples):
for example in tool_examples:

Expand Down
34 changes: 34 additions & 0 deletions tests/data/test_prefetch_generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import pytest
from labelbox.data.generator import PrefetchGenerator
from random import random


class ChildClassGenerator(PrefetchGenerator):

def __init__(self, examples, num_executors=1):
super().__init__(data=examples, num_executors=num_executors)

def _process(self, value):
num = random()
if num < .2:
raise ValueError("Randomized value error")
return value


amount = (i for i in range(50))


def test_single_thread_generator():
generator = ChildClassGenerator(amount, num_executors=1)

with pytest.raises(ValueError):
for _ in range(51):
next(generator)


def test_multi_thread_generator():
generator = ChildClassGenerator(amount, num_executors=4)

with pytest.raises(ValueError):
for _ in range(51):
next(generator)