Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

adds to raise exception to main caller thread from prefetch generator #412

Merged
merged 10 commits into from
Feb 7, 2022
50 changes: 31 additions & 19 deletions labelbox/data/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class PrefetchGenerator:
Useful for modifying the generator results based on data from a network
"""

def __init__(self, data: Iterable[Any], prefetch_limit=20, num_executors=4):
def __init__(self, data: Iterable[Any], prefetch_limit=20, num_executors=1):
if isinstance(data, (list, tuple)):
self._data = (r for r in data)
else:
Expand All @@ -43,15 +43,20 @@ def __init__(self, data: Iterable[Any], prefetch_limit=20, num_executors=4):
self._data = ThreadSafeGen(self._data)
self.completed_threads = 0
# Can only iterate over once it the queue.get hangs forever.
self.multithread = False if num_executors == 1 else True
jtsodapop marked this conversation as resolved.
Show resolved Hide resolved
self.done = False
self.num_executors = num_executors
self.threads = [
threading.Thread(target=self.fill_queue)
for _ in range(num_executors)
]
for thread in self.threads:
thread.daemon = True
thread.start()

if self.multithread:
jtsodapop marked this conversation as resolved.
Show resolved Hide resolved
self.num_executors = num_executors
self.threads = [
threading.Thread(target=self.fill_queue)
for _ in range(num_executors)
]
for thread in self.threads:
thread.daemon = True
thread.start()
else:
self._data = iter(self._data)

def _process(self, value) -> Any:
raise NotImplementedError("Abstract method needs to be implemented")
Expand All @@ -64,8 +69,8 @@ def fill_queue(self):
raise ValueError("Unexpected None")
self.queue.put(value)
except Exception as e:
logger.warning("Unexpected exception while filling the queue. %r",
e)
self.queue.put(
ValueError("Unexpected exception while filling queue. %r", e))
finally:
self.queue.put(None)

Expand All @@ -75,13 +80,20 @@ def __iter__(self):
def __next__(self) -> Any:
if self.done:
raise StopIteration
value = self.queue.get()
while value is None:
self.completed_threads += 1
if self.completed_threads == self.num_executors:
self.done = True
for thread in self.threads:
thread.join()
raise StopIteration

if self.multithread:
value = self.queue.get()
if isinstance(value, Exception):
raise value

while value is None:
self.completed_threads += 1
if self.completed_threads == self.num_executors:
self.done = True
for thread in self.threads:
thread.join()
raise StopIteration
value = self.queue.get()
else:
value = self._process(next(self._data))
return value
10 changes: 5 additions & 5 deletions labelbox/data/serialization/coco/panoptic_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def from_common(cls,

for result in results:
images.append(result[0])
all_coco_annotations.extend(result[1])
all_coco_annotations.append(result[1])
coco_categories.update(result[2])
coco_things.update(result[3])

Expand Down Expand Up @@ -175,12 +175,12 @@ def to_common(self, image_root: Path, mask_root: Path):
raise ValueError(
f"Cannot find file {im_path}. Make sure `image_root` is set properly"
)

if not annotation.file_name.endswith('.png'):
if not str(annotation.file_name).endswith('.png'):
raise ValueError(
f"COCO masks must be stored as png files and their extension must be `.png`. Found {annotation.file_name}"
)
mask = MaskData(file_path=Path(mask_root, annotation.file_name))
mask = MaskData(
file_path=str(Path(mask_root, annotation.file_name)))

for segmentation in annotation.segments_info:
category = category_lookup[segmentation.category_id]
Expand All @@ -189,6 +189,6 @@ def to_common(self, image_root: Path, mask_root: Path):
value=Mask(mask=mask,
color=id_to_rgb(
segmentation.id))))
data = ImageData(file_path=im_path)
data = ImageData(file_path=str(im_path))
yield Label(data=data, annotations=annotations)
del annotation_lookup[image.id]
2 changes: 1 addition & 1 deletion labelbox/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,4 +122,4 @@ class MALValidationError(LabelboxError):

class OperationNotAllowedException(Exception):
"""Raised when user does not have permissions to a resource or has exceeded usage limit"""
pass
pass
154 changes: 153 additions & 1 deletion tests/data/assets/labelbox_v1/unkown_media_type_export.json
Original file line number Diff line number Diff line change
@@ -1 +1,153 @@
[{"ID": "ckw3ce1mc78b50zc30dqf0qhj", "DataRow ID": "ckw3cctc41uqg0zo5023e59hn", "Labeled Data": "https://storage.labelbox.com/ckk4q1vgapsau07324awnsjq2%2F8821d3e2-9059-b616-9d4a-9723da3ea073-im1?Expires=1638367029433&KeyName=labelbox-assets-key-3&Signature=FPOQz-alx3gHMK30ib1iPqJj0W0", "Label": {"objects": [{"featureId": "ckw3ce58u00003e66w9rh0onm", "schemaId": "ckw3cdy207b6t0zbn3sh52xoh", "color": "#1CE6FF", "title": "obj", "value": "obj", "polygon": [{"x": 99.405, "y": 56.15}, {"x": 111.421, "y": 99.129}, {"x": 146.082, "y": 80.413}, {"x": 118.815, "y": 47.369}], "instanceURI": "https://api.labelbox.com/masks/feature/ckw3ce58u00003e66w9rh0onm?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJja2s0cTF2Z3djMHZwMDcwNHhoeDdtNHZrIiwib3JnYW5pemF0aW9uSWQiOiJja2s0cTF2Z2Fwc2F1MDczMjRhd25zanEyIiwiaWF0IjoxNjM3MTU3NDI5LCJleHAiOjE2Mzk3NDk0Mjl9.L4PvjcpSIWV_9R5_M7c_24sj79wtserE_2hkx3ZeCMU"}], "classifications": [], "relationships": []}, "Created By": "msokoloff@labelbox.com", "Project Name": "test", "Created At": "2021-11-17T09:48:56.000Z", "Updated At": "2021-11-17T09:48:56.305Z", "Seconds to Label": 2.239, "External ID": "im1", "Agreement": -1, "Benchmark Agreement": -1, "Benchmark ID": null, "Dataset Name": "no-name", "Reviews": [], "View Label": "https://editor.labelbox.com?project=ckw3cd90b38br0zd4dh4n1xou&label=ckw3ce1mc78b50zc30dqf0qhj", "Has Open Issues": 0, "Skipped": false}, {"ID": "ckw3cea3f7b9t0zbn2tgp2y83", "DataRow ID": "ckw3cctc41uqo0zo5gpma1mr2", "Labeled Data": "https://storage.labelbox.com/ckk4q1vgapsau07324awnsjq2%2F1bc65970-9880-78b4-d298-7a7ef7f8f3fc-im3?Expires=1638367029433&KeyName=labelbox-assets-key-3&Signature=GZUsyQqYYlQPWBYv7GApFYlHXAc", "Label": {"objects": [], "classifications": [{"featureId": "ckw3ced5e00023e66236meh70", "schemaId": "ckw3cdy207b6v0zbn11gp0zz4", "title": "classification", "value": "classification", "answer": {"featureId": "ckw3ced5e00013e6652355ejd", "schemaId": "ckw3cdy207b6w0zbn2hgp3321", "title": "op1", "value": "op_1"}}], "relationships": []}, "Created By": "msokoloff@labelbox.com", "Project Name": "test", "Created At": "2021-11-17T09:49:02.000Z", "Updated At": "2021-11-17T09:49:02.220Z", "Seconds to Label": 5.373, "External ID": "im3", "Agreement": -1, "Benchmark Agreement": -1, "Benchmark ID": null, "Dataset Name": "no-name", "Reviews": [], "View Label": "https://editor.labelbox.com?project=ckw3cd90b38br0zd4dh4n1xou&label=ckw3cea3f7b9t0zbn2tgp2y83", "Has Open Issues": 0, "Skipped": false}, {"ID": "ckw3cec4v78ex0zc3aodwdekw", "DataRow ID": "ckw3cctc41uqs0zo52cy6eus1", "Labeled Data": "https://storage.labelbox.com/ckk4q1vgapsau07324awnsjq2%2Fdb42c0e8-e005-3305-ed35-b021f109b6a7-im4?Expires=1638367029433&KeyName=labelbox-assets-key-3&Signature=Hms9mqcIyWNDzoJUdvMa6_hRKY4", "Label": {"objects": [{"featureId": "ckw3cefl900033e66k41q6zpc", "schemaId": "ckw3cdy207b6t0zbn3sh52xoh", "color": "#1CE6FF", "title": "obj", "value": "obj", "polygon": [{"x": 69.58, "y": 42.292}, {"x": 64.932, "y": 74.128}, {"x": 91.888, "y": 64.601}, {"x": 86.775, "y": 41.828}], "instanceURI": "https://api.labelbox.com/masks/feature/ckw3cefl900033e66k41q6zpc?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJja2s0cTF2Z3djMHZwMDcwNHhoeDdtNHZrIiwib3JnYW5pemF0aW9uSWQiOiJja2s0cTF2Z2Fwc2F1MDczMjRhd25zanEyIiwiaWF0IjoxNjM3MTU3NDI5LCJleHAiOjE2Mzk3NDk0Mjl9.L4PvjcpSIWV_9R5_M7c_24sj79wtserE_2hkx3ZeCMU"}], "classifications": [{"featureId": "ckw3ceijf00053e669zaplftd", "schemaId": "ckw3cdy207b6v0zbn11gp0zz4", "title": "classification", "value": "classification", "answer": {"featureId": "ckw3ceijf00043e665ex22xkp", "schemaId": "ckw3cdy207b6y0zbn77201rux", "title": "op2", "value": "op_2"}}], "relationships": []}, "Created By": "msokoloff@labelbox.com", "Project Name": "test", "Created At": "2021-11-17T09:49:15.000Z", "Updated At": "2021-11-17T09:49:15.785Z", "Seconds to Label": 5, "External ID": "im4", "Agreement": -1, "Benchmark Agreement": -1, "Benchmark ID": null, "Dataset Name": "no-name", "Reviews": [], "View Label": "https://editor.labelbox.com?project=ckw3cd90b38br0zd4dh4n1xou&label=ckw3cec4v78ex0zc3aodwdekw", "Has Open Issues": 0, "Skipped": false}, {"ID": "ckw3ce1s34c1i0zbp32067q4v", "DataRow ID": "ckw3cctc41uqk0zo52n31egs1", "Labeled Data": "https://storage.labelbox.com/ckk4q1vgapsau07324awnsjq2%2F402cbd62-9127-5b50-57d6-d77aaf89f643-im2?Expires=1638367029433&KeyName=labelbox-assets-key-3&Signature=QIwHFUXN1mjBn8K4ZLWVQGQekmE", "Label": {}, "Created By": "msokoloff@labelbox.com", "Project Name": "test", "Created At": "2021-11-17T09:48:59.000Z", "Updated At": "2021-11-17T09:49:02.000Z", "Seconds to Label": 3.524, "External ID": "im2", "Agreement": -1, "Benchmark Agreement": -1, "Benchmark ID": null, "Dataset Name": "no-name", "Reviews": [], "View Label": "https://editor.labelbox.com?project=ckw3cd90b38br0zd4dh4n1xou&label=ckw3ce1s34c1i0zbp32067q4v", "Has Open Issues": 0, "Skipped": true}]
[{
"ID": "ckw3ce1mc78b50zc30dqf0qhj",
"DataRow ID": "ckw3cctc41uqg0zo5023e59hn",
"Labeled Data": "https://storage.labelbox.com/ckk4q1vgapsau07324awnsjq2%2F8821d3e2-9059-b616-9d4a-9723da3ea073-im1?Expires=1638367029433&KeyName=labelbox-assets-key-3&Signature=FPOQz-alx3gHMK30ib1iPqJj0W0",
"Label": {
"objects": [{
"featureId": "ckw3ce58u00003e66w9rh0onm",
"schemaId": "ckw3cdy207b6t0zbn3sh52xoh",
"color": "#1CE6FF",
"title": "obj",
"value": "obj",
"polygon": [{
"x": 99.405,
"y": 56.15
}, {
"x": 111.421,
"y": 99.129
}, {
"x": 146.082,
"y": 80.413
}, {
"x": 118.815,
"y": 47.369
}],
"instanceURI": "https://api.labelbox.com/masks/feature/ckw3ce58u00003e66w9rh0onm?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJja2s0cTF2Z3djMHZwMDcwNHhoeDdtNHZrIiwib3JnYW5pemF0aW9uSWQiOiJja2s0cTF2Z2Fwc2F1MDczMjRhd25zanEyIiwiaWF0IjoxNjM3MTU3NDI5LCJleHAiOjE2Mzk3NDk0Mjl9.L4PvjcpSIWV_9R5_M7c_24sj79wtserE_2hkx3ZeCMU"
}],
"classifications": [],
"relationships": []
},
"Created By": "msokoloff@labelbox.com",
"Project Name": "test",
"Created At": "2021-11-17T09:48:56.000Z",
"Updated At": "2021-11-17T09:48:56.305Z",
"Seconds to Label": 2.239,
"External ID": "im1",
"Agreement": -1,
"Benchmark Agreement": -1,
"Benchmark ID": null,
"Dataset Name": "no-name",
"Reviews": [],
"View Label": "https://editor.labelbox.com?project=ckw3cd90b38br0zd4dh4n1xou&label=ckw3ce1mc78b50zc30dqf0qhj",
"Has Open Issues": 0,
"Skipped": false
}, {
"ID": "ckw3cea3f7b9t0zbn2tgp2y83",
"DataRow ID": "ckw3cctc41uqo0zo5gpma1mr2",
"Labeled Data": "https://storage.labelbox.com/ckk4q1vgapsau07324awnsjq2%2F1bc65970-9880-78b4-d298-7a7ef7f8f3fc-im3?Expires=1638367029433&KeyName=labelbox-assets-key-3&Signature=GZUsyQqYYlQPWBYv7GApFYlHXAc",
"media_type": "image",
"Label": {
"objects": [],
"classifications": [{
"featureId": "ckw3ced5e00023e66236meh70",
"schemaId": "ckw3cdy207b6v0zbn11gp0zz4",
"title": "classification",
"value": "classification",
"answer": {
"featureId": "ckw3ced5e00013e6652355ejd",
"schemaId": "ckw3cdy207b6w0zbn2hgp3321",
"title": "op1",
"value": "op_1"
}
}],
"relationships": []
},
"Created By": "msokoloff@labelbox.com",
"Project Name": "test",
"Created At": "2021-11-17T09:49:02.000Z",
"Updated At": "2021-11-17T09:49:02.220Z",
"Seconds to Label": 5.373,
"External ID": "im3",
"Agreement": -1,
"Benchmark Agreement": -1,
"Benchmark ID": null,
"Dataset Name": "no-name",
"Reviews": [],
"View Label": "https://editor.labelbox.com?project=ckw3cd90b38br0zd4dh4n1xou&label=ckw3cea3f7b9t0zbn2tgp2y83",
"Has Open Issues": 0,
"Skipped": false
}, {
"ID": "ckw3cec4v78ex0zc3aodwdekw",
"DataRow ID": "ckw3cctc41uqs0zo52cy6eus1",
"Labeled Data": "https://storage.labelbox.com/ckk4q1vgapsau07324awnsjq2%2Fdb42c0e8-e005-3305-ed35-b021f109b6a7-im4?Expires=1638367029433&KeyName=labelbox-assets-key-3&Signature=Hms9mqcIyWNDzoJUdvMa6_hRKY4",
"Label": {
"objects": [{
"featureId": "ckw3cefl900033e66k41q6zpc",
"schemaId": "ckw3cdy207b6t0zbn3sh52xoh",
"color": "#1CE6FF",
"title": "obj",
"value": "obj",
"polygon": [{
"x": 69.58,
"y": 42.292
}, {
"x": 64.932,
"y": 74.128
}, {
"x": 91.888,
"y": 64.601
}, {
"x": 86.775,
"y": 41.828
}],
"instanceURI": "https://api.labelbox.com/masks/feature/ckw3cefl900033e66k41q6zpc?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJja2s0cTF2Z3djMHZwMDcwNHhoeDdtNHZrIiwib3JnYW5pemF0aW9uSWQiOiJja2s0cTF2Z2Fwc2F1MDczMjRhd25zanEyIiwiaWF0IjoxNjM3MTU3NDI5LCJleHAiOjE2Mzk3NDk0Mjl9.L4PvjcpSIWV_9R5_M7c_24sj79wtserE_2hkx3ZeCMU"
}],
"classifications": [{
"featureId": "ckw3ceijf00053e669zaplftd",
"schemaId": "ckw3cdy207b6v0zbn11gp0zz4",
"title": "classification",
"value": "classification",
"answer": {
"featureId": "ckw3ceijf00043e665ex22xkp",
"schemaId": "ckw3cdy207b6y0zbn77201rux",
"title": "op2",
"value": "op_2"
}
}],
"relationships": []
},
"Created By": "msokoloff@labelbox.com",
"Project Name": "test",
"Created At": "2021-11-17T09:49:15.000Z",
"Updated At": "2021-11-17T09:49:15.785Z",
"Seconds to Label": 5,
"External ID": "im4",
"Agreement": -1,
"Benchmark Agreement": -1,
"Benchmark ID": null,
"Dataset Name": "no-name",
"Reviews": [],
"View Label": "https://editor.labelbox.com?project=ckw3cd90b38br0zd4dh4n1xou&label=ckw3cec4v78ex0zc3aodwdekw",
"Has Open Issues": 0,
"Skipped": false
}, {
"ID": "ckw3ce1s34c1i0zbp32067q4v",
"DataRow ID": "ckw3cctc41uqk0zo52n31egs1",
"Labeled Data": "https://storage.labelbox.com/ckk4q1vgapsau07324awnsjq2%2F402cbd62-9127-5b50-57d6-d77aaf89f643-im2?Expires=1638367029433&KeyName=labelbox-assets-key-3&Signature=QIwHFUXN1mjBn8K4ZLWVQGQekmE",
"media_type": "image",
"Label": {},
"Created By": "msokoloff@labelbox.com",
"Project Name": "test",
"Created At": "2021-11-17T09:48:59.000Z",
"Updated At": "2021-11-17T09:49:02.000Z",
"Seconds to Label": 3.524,
"External ID": "im2",
"Agreement": -1,
"Benchmark Agreement": -1,
"Benchmark ID": null,
"Dataset Name": "no-name",
"Reviews": [],
"View Label": "https://editor.labelbox.com?project=ckw3cd90b38br0zd4dh4n1xou&label=ckw3ce1s34c1i0zbp32067q4v",
"Has Open Issues": 0,
"Skipped": true
}]
3 changes: 0 additions & 3 deletions tests/integration/test_data_rows.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,6 @@ def test_data_row_bulk_creation(dataset, rand_gen, image_url):
},
])
assert task in client.get_user().created_tasks()
# TODO make Tasks expandable
with pytest.raises(InvalidQueryError):
assert task.created_by() == client.get_user()
task.wait_till_done()
assert task.status == "COMPLETE"

Expand Down