Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

adds to raise exception to main caller thread from prefetch generator #412

Merged
merged 10 commits into from
Feb 7, 2022
6 changes: 5 additions & 1 deletion labelbox/data/annotation_types/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,11 @@ class LabelGenerator(PrefetchGenerator):
than the LabelList but will be much more memory efficient.
"""

def __init__(self, data: Generator[Label, None, None], *args, **kwargs):
def __init__(self,
data: Generator[Label, None, None],
multithread: bool = False,
*args,
**kwargs):
self._fns = {}
super().__init__(data, *args, **kwargs)

Expand Down
44 changes: 30 additions & 14 deletions labelbox/data/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,15 @@ class PrefetchGenerator:
Useful for modifying the generator results based on data from a network
"""

def __init__(self, data: Iterable[Any], prefetch_limit=20, num_executors=4):
#maybe change num exec to just 1, and if 1, make sync
#instead of self.get qeue in next, itll return just self._data.next
#kwarg on export for multithread, and all other things that use prefetch

def __init__(self,
data: Iterable[Any],
prefetch_limit=20,
num_executors=4,
multithread: bool = False):
jtsodapop marked this conversation as resolved.
Show resolved Hide resolved
if isinstance(data, (list, tuple)):
self._data = (r for r in data)
else:
Expand All @@ -44,14 +52,19 @@ def __init__(self, data: Iterable[Any], prefetch_limit=20, num_executors=4):
self.completed_threads = 0
# Can only iterate over once it the queue.get hangs forever.
self.done = False
self.num_executors = num_executors
self.threads = [
threading.Thread(target=self.fill_queue)
for _ in range(num_executors)
]
for thread in self.threads:
thread.daemon = True
thread.start()
self.multithread = multithread

if self.multithread:
jtsodapop marked this conversation as resolved.
Show resolved Hide resolved
self.num_executors = num_executors
self.threads = [
threading.Thread(target=self.fill_queue)
for _ in range(num_executors)
]
for thread in self.threads:
thread.daemon = True
thread.start()
else:
self.fill_queue()

def _process(self, value) -> Any:
raise NotImplementedError("Abstract method needs to be implemented")
Expand All @@ -64,19 +77,22 @@ def fill_queue(self):
raise ValueError("Unexpected None")
self.queue.put(value)
except Exception as e:
logger.warning("Unexpected exception while filling the queue. %r",
e)
finally:
self.queue.put(None)
jtsodapop marked this conversation as resolved.
Show resolved Hide resolved
self.queue.put(
ValueError("Unexpected exception while filling queue. %r", e))

def __iter__(self):
return self

def __next__(self) -> Any:
if self.done:
if self.done or self.queue.empty():
raise StopIteration
value = self.queue.get()
if isinstance(value, ValueError):
raise value
jtsodapop marked this conversation as resolved.
Show resolved Hide resolved
while value is None:
if not self.multithread:
value = self.queue.get()
continue
self.completed_threads += 1
if self.completed_threads == self.num_executors:
self.done = True
Expand Down
10 changes: 5 additions & 5 deletions labelbox/data/serialization/coco/panoptic_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def from_common(cls,

for result in results:
images.append(result[0])
all_coco_annotations.extend(result[1])
all_coco_annotations.append(result[1])
coco_categories.update(result[2])
coco_things.update(result[3])

Expand Down Expand Up @@ -175,12 +175,12 @@ def to_common(self, image_root: Path, mask_root: Path):
raise ValueError(
f"Cannot find file {im_path}. Make sure `image_root` is set properly"
)

if not annotation.file_name.endswith('.png'):
if not str(annotation.file_name).endswith('.png'):
raise ValueError(
f"COCO masks must be stored as png files and their extension must be `.png`. Found {annotation.file_name}"
)
mask = MaskData(file_path=Path(mask_root, annotation.file_name))
mask = MaskData(
file_path=str(Path(mask_root, annotation.file_name)))

for segmentation in annotation.segments_info:
category = category_lookup[segmentation.category_id]
Expand All @@ -189,6 +189,6 @@ def to_common(self, image_root: Path, mask_root: Path):
value=Mask(mask=mask,
color=id_to_rgb(
segmentation.id))))
data = ImageData(file_path=im_path)
data = ImageData(file_path=str(im_path))
yield Label(data=data, annotations=annotations)
del annotation_lookup[image.id]
2 changes: 1 addition & 1 deletion labelbox/data/serialization/labelbox_v1/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ class LBV1VideoIterator(PrefetchGenerator):
Generator that fetches video annotations in the background to be faster.
"""

def __init__(self, examples, client):
def __init__(self, examples, client, multithread: bool = False):
jtsodapop marked this conversation as resolved.
Show resolved Hide resolved
self.client = client
super().__init__(examples)

Expand Down
2 changes: 1 addition & 1 deletion labelbox/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,4 +122,4 @@ class MALValidationError(LabelboxError):

class OperationNotAllowedException(Exception):
"""Raised when user does not have permissions to a resource or has exceeded usage limit"""
pass
pass
154 changes: 153 additions & 1 deletion tests/data/assets/labelbox_v1/unkown_media_type_export.json
Original file line number Diff line number Diff line change
@@ -1 +1,153 @@
[{"ID": "ckw3ce1mc78b50zc30dqf0qhj", "DataRow ID": "ckw3cctc41uqg0zo5023e59hn", "Labeled Data": "https://storage.labelbox.com/ckk4q1vgapsau07324awnsjq2%2F8821d3e2-9059-b616-9d4a-9723da3ea073-im1?Expires=1638367029433&KeyName=labelbox-assets-key-3&Signature=FPOQz-alx3gHMK30ib1iPqJj0W0", "Label": {"objects": [{"featureId": "ckw3ce58u00003e66w9rh0onm", "schemaId": "ckw3cdy207b6t0zbn3sh52xoh", "color": "#1CE6FF", "title": "obj", "value": "obj", "polygon": [{"x": 99.405, "y": 56.15}, {"x": 111.421, "y": 99.129}, {"x": 146.082, "y": 80.413}, {"x": 118.815, "y": 47.369}], "instanceURI": "https://api.labelbox.com/masks/feature/ckw3ce58u00003e66w9rh0onm?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJja2s0cTF2Z3djMHZwMDcwNHhoeDdtNHZrIiwib3JnYW5pemF0aW9uSWQiOiJja2s0cTF2Z2Fwc2F1MDczMjRhd25zanEyIiwiaWF0IjoxNjM3MTU3NDI5LCJleHAiOjE2Mzk3NDk0Mjl9.L4PvjcpSIWV_9R5_M7c_24sj79wtserE_2hkx3ZeCMU"}], "classifications": [], "relationships": []}, "Created By": "msokoloff@labelbox.com", "Project Name": "test", "Created At": "2021-11-17T09:48:56.000Z", "Updated At": "2021-11-17T09:48:56.305Z", "Seconds to Label": 2.239, "External ID": "im1", "Agreement": -1, "Benchmark Agreement": -1, "Benchmark ID": null, "Dataset Name": "no-name", "Reviews": [], "View Label": "https://editor.labelbox.com?project=ckw3cd90b38br0zd4dh4n1xou&label=ckw3ce1mc78b50zc30dqf0qhj", "Has Open Issues": 0, "Skipped": false}, {"ID": "ckw3cea3f7b9t0zbn2tgp2y83", "DataRow ID": "ckw3cctc41uqo0zo5gpma1mr2", "Labeled Data": "https://storage.labelbox.com/ckk4q1vgapsau07324awnsjq2%2F1bc65970-9880-78b4-d298-7a7ef7f8f3fc-im3?Expires=1638367029433&KeyName=labelbox-assets-key-3&Signature=GZUsyQqYYlQPWBYv7GApFYlHXAc", "Label": {"objects": [], "classifications": [{"featureId": "ckw3ced5e00023e66236meh70", "schemaId": "ckw3cdy207b6v0zbn11gp0zz4", "title": "classification", "value": "classification", "answer": {"featureId": "ckw3ced5e00013e6652355ejd", "schemaId": "ckw3cdy207b6w0zbn2hgp3321", "title": "op1", "value": "op_1"}}], "relationships": []}, "Created By": "msokoloff@labelbox.com", "Project Name": "test", "Created At": "2021-11-17T09:49:02.000Z", "Updated At": "2021-11-17T09:49:02.220Z", "Seconds to Label": 5.373, "External ID": "im3", "Agreement": -1, "Benchmark Agreement": -1, "Benchmark ID": null, "Dataset Name": "no-name", "Reviews": [], "View Label": "https://editor.labelbox.com?project=ckw3cd90b38br0zd4dh4n1xou&label=ckw3cea3f7b9t0zbn2tgp2y83", "Has Open Issues": 0, "Skipped": false}, {"ID": "ckw3cec4v78ex0zc3aodwdekw", "DataRow ID": "ckw3cctc41uqs0zo52cy6eus1", "Labeled Data": "https://storage.labelbox.com/ckk4q1vgapsau07324awnsjq2%2Fdb42c0e8-e005-3305-ed35-b021f109b6a7-im4?Expires=1638367029433&KeyName=labelbox-assets-key-3&Signature=Hms9mqcIyWNDzoJUdvMa6_hRKY4", "Label": {"objects": [{"featureId": "ckw3cefl900033e66k41q6zpc", "schemaId": "ckw3cdy207b6t0zbn3sh52xoh", "color": "#1CE6FF", "title": "obj", "value": "obj", "polygon": [{"x": 69.58, "y": 42.292}, {"x": 64.932, "y": 74.128}, {"x": 91.888, "y": 64.601}, {"x": 86.775, "y": 41.828}], "instanceURI": "https://api.labelbox.com/masks/feature/ckw3cefl900033e66k41q6zpc?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJja2s0cTF2Z3djMHZwMDcwNHhoeDdtNHZrIiwib3JnYW5pemF0aW9uSWQiOiJja2s0cTF2Z2Fwc2F1MDczMjRhd25zanEyIiwiaWF0IjoxNjM3MTU3NDI5LCJleHAiOjE2Mzk3NDk0Mjl9.L4PvjcpSIWV_9R5_M7c_24sj79wtserE_2hkx3ZeCMU"}], "classifications": [{"featureId": "ckw3ceijf00053e669zaplftd", "schemaId": "ckw3cdy207b6v0zbn11gp0zz4", "title": "classification", "value": "classification", "answer": {"featureId": "ckw3ceijf00043e665ex22xkp", "schemaId": "ckw3cdy207b6y0zbn77201rux", "title": "op2", "value": "op_2"}}], "relationships": []}, "Created By": "msokoloff@labelbox.com", "Project Name": "test", "Created At": "2021-11-17T09:49:15.000Z", "Updated At": "2021-11-17T09:49:15.785Z", "Seconds to Label": 5, "External ID": "im4", "Agreement": -1, "Benchmark Agreement": -1, "Benchmark ID": null, "Dataset Name": "no-name", "Reviews": [], "View Label": "https://editor.labelbox.com?project=ckw3cd90b38br0zd4dh4n1xou&label=ckw3cec4v78ex0zc3aodwdekw", "Has Open Issues": 0, "Skipped": false}, {"ID": "ckw3ce1s34c1i0zbp32067q4v", "DataRow ID": "ckw3cctc41uqk0zo52n31egs1", "Labeled Data": "https://storage.labelbox.com/ckk4q1vgapsau07324awnsjq2%2F402cbd62-9127-5b50-57d6-d77aaf89f643-im2?Expires=1638367029433&KeyName=labelbox-assets-key-3&Signature=QIwHFUXN1mjBn8K4ZLWVQGQekmE", "Label": {}, "Created By": "msokoloff@labelbox.com", "Project Name": "test", "Created At": "2021-11-17T09:48:59.000Z", "Updated At": "2021-11-17T09:49:02.000Z", "Seconds to Label": 3.524, "External ID": "im2", "Agreement": -1, "Benchmark Agreement": -1, "Benchmark ID": null, "Dataset Name": "no-name", "Reviews": [], "View Label": "https://editor.labelbox.com?project=ckw3cd90b38br0zd4dh4n1xou&label=ckw3ce1s34c1i0zbp32067q4v", "Has Open Issues": 0, "Skipped": true}]
[{
"ID": "ckw3ce1mc78b50zc30dqf0qhj",
"DataRow ID": "ckw3cctc41uqg0zo5023e59hn",
"Labeled Data": "https://storage.labelbox.com/ckk4q1vgapsau07324awnsjq2%2F8821d3e2-9059-b616-9d4a-9723da3ea073-im1?Expires=1638367029433&KeyName=labelbox-assets-key-3&Signature=FPOQz-alx3gHMK30ib1iPqJj0W0",
"Label": {
"objects": [{
"featureId": "ckw3ce58u00003e66w9rh0onm",
"schemaId": "ckw3cdy207b6t0zbn3sh52xoh",
"color": "#1CE6FF",
"title": "obj",
"value": "obj",
"polygon": [{
"x": 99.405,
"y": 56.15
}, {
"x": 111.421,
"y": 99.129
}, {
"x": 146.082,
"y": 80.413
}, {
"x": 118.815,
"y": 47.369
}],
"instanceURI": "https://api.labelbox.com/masks/feature/ckw3ce58u00003e66w9rh0onm?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJja2s0cTF2Z3djMHZwMDcwNHhoeDdtNHZrIiwib3JnYW5pemF0aW9uSWQiOiJja2s0cTF2Z2Fwc2F1MDczMjRhd25zanEyIiwiaWF0IjoxNjM3MTU3NDI5LCJleHAiOjE2Mzk3NDk0Mjl9.L4PvjcpSIWV_9R5_M7c_24sj79wtserE_2hkx3ZeCMU"
}],
"classifications": [],
"relationships": []
},
"Created By": "msokoloff@labelbox.com",
"Project Name": "test",
"Created At": "2021-11-17T09:48:56.000Z",
"Updated At": "2021-11-17T09:48:56.305Z",
"Seconds to Label": 2.239,
"External ID": "im1",
"Agreement": -1,
"Benchmark Agreement": -1,
"Benchmark ID": null,
"Dataset Name": "no-name",
"Reviews": [],
"View Label": "https://editor.labelbox.com?project=ckw3cd90b38br0zd4dh4n1xou&label=ckw3ce1mc78b50zc30dqf0qhj",
"Has Open Issues": 0,
"Skipped": false
}, {
"ID": "ckw3cea3f7b9t0zbn2tgp2y83",
"DataRow ID": "ckw3cctc41uqo0zo5gpma1mr2",
"Labeled Data": "https://storage.labelbox.com/ckk4q1vgapsau07324awnsjq2%2F1bc65970-9880-78b4-d298-7a7ef7f8f3fc-im3?Expires=1638367029433&KeyName=labelbox-assets-key-3&Signature=GZUsyQqYYlQPWBYv7GApFYlHXAc",
"media_type": "image",
"Label": {
"objects": [],
"classifications": [{
"featureId": "ckw3ced5e00023e66236meh70",
"schemaId": "ckw3cdy207b6v0zbn11gp0zz4",
"title": "classification",
"value": "classification",
"answer": {
"featureId": "ckw3ced5e00013e6652355ejd",
"schemaId": "ckw3cdy207b6w0zbn2hgp3321",
"title": "op1",
"value": "op_1"
}
}],
"relationships": []
},
"Created By": "msokoloff@labelbox.com",
"Project Name": "test",
"Created At": "2021-11-17T09:49:02.000Z",
"Updated At": "2021-11-17T09:49:02.220Z",
"Seconds to Label": 5.373,
"External ID": "im3",
"Agreement": -1,
"Benchmark Agreement": -1,
"Benchmark ID": null,
"Dataset Name": "no-name",
"Reviews": [],
"View Label": "https://editor.labelbox.com?project=ckw3cd90b38br0zd4dh4n1xou&label=ckw3cea3f7b9t0zbn2tgp2y83",
"Has Open Issues": 0,
"Skipped": false
}, {
"ID": "ckw3cec4v78ex0zc3aodwdekw",
"DataRow ID": "ckw3cctc41uqs0zo52cy6eus1",
"Labeled Data": "https://storage.labelbox.com/ckk4q1vgapsau07324awnsjq2%2Fdb42c0e8-e005-3305-ed35-b021f109b6a7-im4?Expires=1638367029433&KeyName=labelbox-assets-key-3&Signature=Hms9mqcIyWNDzoJUdvMa6_hRKY4",
"Label": {
"objects": [{
"featureId": "ckw3cefl900033e66k41q6zpc",
"schemaId": "ckw3cdy207b6t0zbn3sh52xoh",
"color": "#1CE6FF",
"title": "obj",
"value": "obj",
"polygon": [{
"x": 69.58,
"y": 42.292
}, {
"x": 64.932,
"y": 74.128
}, {
"x": 91.888,
"y": 64.601
}, {
"x": 86.775,
"y": 41.828
}],
"instanceURI": "https://api.labelbox.com/masks/feature/ckw3cefl900033e66k41q6zpc?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJja2s0cTF2Z3djMHZwMDcwNHhoeDdtNHZrIiwib3JnYW5pemF0aW9uSWQiOiJja2s0cTF2Z2Fwc2F1MDczMjRhd25zanEyIiwiaWF0IjoxNjM3MTU3NDI5LCJleHAiOjE2Mzk3NDk0Mjl9.L4PvjcpSIWV_9R5_M7c_24sj79wtserE_2hkx3ZeCMU"
}],
"classifications": [{
"featureId": "ckw3ceijf00053e669zaplftd",
"schemaId": "ckw3cdy207b6v0zbn11gp0zz4",
"title": "classification",
"value": "classification",
"answer": {
"featureId": "ckw3ceijf00043e665ex22xkp",
"schemaId": "ckw3cdy207b6y0zbn77201rux",
"title": "op2",
"value": "op_2"
}
}],
"relationships": []
},
"Created By": "msokoloff@labelbox.com",
"Project Name": "test",
"Created At": "2021-11-17T09:49:15.000Z",
"Updated At": "2021-11-17T09:49:15.785Z",
"Seconds to Label": 5,
"External ID": "im4",
"Agreement": -1,
"Benchmark Agreement": -1,
"Benchmark ID": null,
"Dataset Name": "no-name",
"Reviews": [],
"View Label": "https://editor.labelbox.com?project=ckw3cd90b38br0zd4dh4n1xou&label=ckw3cec4v78ex0zc3aodwdekw",
"Has Open Issues": 0,
"Skipped": false
}, {
"ID": "ckw3ce1s34c1i0zbp32067q4v",
"DataRow ID": "ckw3cctc41uqk0zo52n31egs1",
"Labeled Data": "https://storage.labelbox.com/ckk4q1vgapsau07324awnsjq2%2F402cbd62-9127-5b50-57d6-d77aaf89f643-im2?Expires=1638367029433&KeyName=labelbox-assets-key-3&Signature=QIwHFUXN1mjBn8K4ZLWVQGQekmE",
"media_type": "image",
"Label": {},
"Created By": "msokoloff@labelbox.com",
"Project Name": "test",
"Created At": "2021-11-17T09:48:59.000Z",
"Updated At": "2021-11-17T09:49:02.000Z",
"Seconds to Label": 3.524,
"External ID": "im2",
"Agreement": -1,
"Benchmark Agreement": -1,
"Benchmark ID": null,
"Dataset Name": "no-name",
"Reviews": [],
"View Label": "https://editor.labelbox.com?project=ckw3cd90b38br0zd4dh4n1xou&label=ckw3ce1s34c1i0zbp32067q4v",
"Has Open Issues": 0,
"Skipped": true
}]
3 changes: 0 additions & 3 deletions tests/integration/test_data_rows.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,6 @@ def test_data_row_bulk_creation(dataset, rand_gen, image_url):
},
])
assert task in client.get_user().created_tasks()
# TODO make Tasks expandable
with pytest.raises(InvalidQueryError):
assert task.created_by() == client.get_user()
task.wait_till_done()
assert task.status == "COMPLETE"

Expand Down