forked from legend-exp/pygama
-
Notifications
You must be signed in to change notification settings - Fork 0
/
processing_chain.py
1538 lines (1299 loc) · 63.8 KB
/
processing_chain.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""
This module provides routines for setting up and running signal processing
chains on waveform data.
"""
from __future__ import annotations
import ast
import importlib
import itertools as it
import json
import logging
import re
from abc import ABCMeta, abstractmethod
from copy import deepcopy
from dataclasses import dataclass
from typing import Any
import numpy as np
from numba import vectorize
import pygama.lgdo as lgdo
from pygama.dsp.errors import DSPFatal, ProcessingChainError
from pygama.math.units import Quantity, Unit
from pygama.math.units import unit_registry as ureg
log = logging.getLogger(__name__)
# Filler value for variables to be automatically deduced later
auto = 'auto'
# Map from ast interpreter operations to functions to call and format string
ast_ops_dict = {ast.Add: (np.add, '{}+{}'),
ast.Sub: (np.subtract, '{}-{}'),
ast.Mult: (np.multiply, '{}*{}'),
ast.Div: (np.divide, '{}/{}'),
ast.FloorDiv: (np.floor_divide, '{}//{}'),
ast.USub: (np.negative, '-{}')}
@dataclass
class CoordinateGrid:
"""Helper class that describes a system of units, consisting of a period
and offset.
`period` is a unitted :class:`pint.Quantity`, `offset` is a scalar in units
of `period`, a :class:`pint.Unit` or a :class:`ProcChainVar`. In the last
case, a :class:`ProcChainVar` variable is used to store a different offset
for each event.
"""
period: Quantity | Unit | str
offset: Quantity | ProcChainVar | float | int
def __post_init__(self) -> None:
if isinstance(self.period, str):
self.period = Quantity(1., self.period)
elif isinstance(self.period, Unit):
self.period *= 1 # make Quantity
if isinstance(self.offset, (int, float)):
self.offset = self.offset*self.period
assert isinstance(self.period, Quantity) and isinstance(self.offset, (Quantity, ProcChainVar))
def __eq__(self, other: CoordinateGrid) -> bool:
"""True if values are equal; if offset is a variable, compares reference"""
return self.period == other.period and (self.offset is other.offset if isinstance(self.offset, ProcChainVar) else self.offset == other.offset)
def unit_str(self) -> str:
string = format(self.period.u, '~')
if string == '':
string = str(self.period.u)
return string
def get_period(self, unit: str | Unit) -> float:
if isinstance(unit, str):
unit = ureg.Quantity(unit)
return float(self.period/unit)
def get_offset(self, unit: str | Unit = None) -> float:
"""Get the offset (convert)ed to unit. If `unit` is ``None`` use period."""
if unit is None:
unit = self.period
elif isinstance(unit, str):
unit = ureg.Quantity(unit)
if isinstance(self.offset, ProcChainVar):
return self.offset.get_buffer(CoordinateGrid(unit, 0))
else:
return float(self.offset/unit)
def __str__(self) -> str:
offset = self.offset.name if isinstance(self.offset, ProcChainVar) \
else str(self.offset)
return f"({str(self.period)},{offset})"
class ProcChainVar:
"""Helper data class with buffer and information for internal variables in
:class:`ProcessingChain`.
Members can be set to ``auto`` to attempt to deduce these when adding this
variable to a processor for the first time.
"""
def __init__(self, proc_chain: ProcessingChain, name: str, shape: tuple = auto,
dtype: np.dtype = auto, grid: CoordinateGrid = auto,
unit: str | Unit = auto, is_coord: bool = auto) -> None:
"""
Parameters
----------
proc_chain : ProcessingChain
DOCME
name : str
DOCME
shape : tuple, optional, default='auto'
DOCME
dtype : numpy.dtype or str, optional, default='auto'
DOCME
grid : CoordinateGrid
DOCME
unit : str or pint.Unit, optional, default='auto'
DOCME
is_coord : bool, optional, default='auto'
DOCME
"""
assert isinstance(proc_chain, ProcessingChain) and isinstance(name, str)
self.proc_chain = proc_chain
self.name = name
# ndarray containing data buffer of size block_width x shape
# list of ndarrays in different coordinate systems if is_coord is true
self._buffer: list | np.ndarray = None
self.shape = shape
self.dtype = dtype
self.grid = grid
self.unit = unit
self.is_coord = is_coord
log.debug(f'added variable: {self.description()}')
# Use this to enforce type constraints and perform conversions
def __setattr__(self, name: str, value: Any) -> None:
if value is auto:
pass
elif name == 'shape':
if hasattr(value, '__iter__'):
value = tuple(value)
else:
value = (int(value),)
value = tuple(value)
assert all(isinstance(d, int) for d in value)
elif name == 'dtype' and not isinstance(value, np.dtype):
value = np.dtype(value)
elif name == 'grid' and not isinstance(value, CoordinateGrid) and value is not None:
period, offset = value if isinstance(value, tuple) else value, 0
value = CoordinateGrid(period, offset)
elif name == 'unit' and value is not None:
value = str(value)
elif name == 'is_coord':
value = bool(value)
if value:
if self._buffer is None:
self._buffer = []
elif isinstance(self._buffer, np.ndarray):
self._buffer = [(self._buffer, CoordinateGrid(self.unit, 0))]
super().__setattr__(name, value)
def get_buffer(self, unit: str | Unit = None) -> np.array:
# If buffer needs to be created, do so now
if self._buffer is None:
if self.shape is auto:
raise ProcessingChainError(f"cannot deduce shape of {self.name}")
if self.dtype is auto:
raise ProcessingChainError(f"cannot deduce shape of {self.name}")
# create the buffer so that the array start is aligned in memory on a multiple of 64 bytes
self._buffer = np.zeros(shape=(self.proc_chain._block_width,)+self.shape, dtype=self.dtype)
# if variable isn't a coordinate, we're all set
if self.is_coord is False or self.is_coord is auto:
return self._buffer
# if no unit is given, use the native unit
if unit is None:
if isinstance(self.unit, str):
unit = CoordinateGrid(self.unit, 0.)
elif not isinstance(unit, CoordinateGrid):
unit = CoordinateGrid(unit, 0.)
# if this is our first time accessing, no conversion is needed
if len(self._buffer) == 0:
if self.shape is auto:
raise ProcessingChainError(f"cannot deduce shape of {self.name}")
if self.dtype is auto:
raise ProcessingChainError(f"cannot deduce shape of {self.name}")
buff = np.zeros(shape=(self.proc_chain._block_width,)+self.shape, dtype=self.dtype)
self._buffer.append((buff, unit))
return buff
# check if coordinate conversion has been done already
for buff, grid in self._buffer:
if grid == unit:
return buff
# If we get this far, add conversion processor to ProcChain and add new buffer to _buffer
conversion_manager = UnitConversionManager(self, unit)
self._buffer.append([conversion_manager.out_buffer, unit])
self.proc_chain._proc_managers.append(conversion_manager)
return conversion_manager.out_buffer
@property
def buffer(self):
return self.get_buffer()
@property
def period(self):
return self.grid.period if self.grid else None
@property
def offset(self):
return self.grid.offset if self.grid else None
def description(self):
return (f"{self.name}(shape: {self.shape}, "
f"dtype: {self.dtype}, grid: {self.grid}, "
f"unit: {self.unit}, is_coord: {self.is_coord})")
def update_auto(self, shape: tuple = auto, dtype: np.dtype = auto,
grid: CoordinateGrid = auto, unit: str | Unit = auto,
is_coord: bool = auto, period: period = None,
offset: offset = 0) -> None:
"""
Update any variables set to auto; leave the others alone. Emit a message
only if anything was updated.
"""
updated = False
# Construct coordinate grid from period/offset if given
if grid is auto and period is not None:
if isinstance(offset, str):
offset = self.get_variable(offset, expr_only=True)
grid = CoordinateGrid(period, offset)
if self.shape is auto and shape is not auto:
self.shape = shape
updated = True
if self.dtype is auto and dtype is not auto:
self.dtype = dtype
updated = True
if self.grid is auto and grid is not auto:
self.grid = grid
updated = True
if self.unit is auto and unit is not auto:
self.unit = unit
updated = True
if self.is_coord is auto and is_coord is not auto:
self.is_coord = is_coord
updated = True
if updated:
log.debug(f'updated variable: {self.description()}')
def __str__(self) -> str:
return self.name
class ProcessingChain:
"""A class to efficiently perform a sequence of digital signal processing
(DSP) transforms.
It contains a list of DSP functions and a set of constant values and named
variables contained in fixed memory locations. When executing the
:class:`ProcessingChain`, processors will act on the internal memory
without allocating new memory in the process. Furthermore, the memory is
allocated in blocks, enabling vectorized processing of many entries at
once. To set up a :class:`ProcessingChain`, use the following methods:
- :meth:`.link_input_buffer` bind a named variable to an external NumPy
array to read data from
- :meth:`.add_processor` add a dsp function and bind its inputs to a set of
named variables and constant values
- :meth:`.link_output_buffer` bind a named variable to an external NumPy
array to write data into
When calling these methods, the :class:`ProcessingChain` class will use
available information to allocate buffers to the correct sizes and data
types. For this reason, transforms will ideally implement the
:class:`numpy.ufunc` class, enabling broadcasting of array dimensions. If
not enough information is available to correctly allocate memory, it can be
provided through the named variable strings or by calling add_vector or
add_scalar.
"""
def __init__(self, block_width: int = 8, buffer_len: int = None) -> None:
"""
Parameters
----------
block_width : int
number of entries to simultaneously process.
buffer_len : int
length of input and output buffers. Should be a multiple of
`block_width`
"""
# Dictionary from name to scratch data buffers as ProcChainVar
self._vars_dict = {}
# list of processors with variables they are called on
self._proc_managers = []
# lists of I/O managers that handle copying data to/from external memory buffers
self._input_managers = []
self._output_managers = []
self._block_width = block_width
self._buffer_len = buffer_len
def add_variable(self, name: str, dtype: np.dtype | str = auto, shape: int | tuple = auto,
grid: CoordinateGrid = auto, unit: str | Unit = auto, is_coord: bool = auto,
period: CoordinateGrid.period = None, offset: CoordinateGrid.offset = 0) -> ProcChainVar:
"""Add a named variable containing a block of values or arrays.
Parameters
----------
name : str
name of variable
dtype : numpy.dtype or str, optional, default='auto'
default is ``None``, meaning `dtype` will be deduced later, if
possible
shape : int or tuple, optional, default='auto'
length or shape tuple of element. Default is ``None``, meaning length
will be deduced later, if possible
grid : CoordinateGrid
for variable, containing period and offset
unit
DOCME
period
unit with period of waveform associated with object. Do not use if
`grid` is provided
offset
unit with offset of waveform associated with object. Requires a
`period` to be provided
is_coord : bool
if ``True``, transform value based on `period` and `offset`
"""
self._validate_name(name, raise_exception=True)
if name in self._vars_dict:
raise ProcessingChainError(name+' is already in variable list')
# Construct coordinate grid from period/offset if given
if grid is auto and period is not None:
if isinstance(offset, str):
offset = self.get_variable(offset, expr_only=True)
grid = CoordinateGrid(period, offset)
var = ProcChainVar(self, name, shape=shape, dtype=dtype, grid=grid,
unit=unit, is_coord=is_coord)
self._vars_dict[name] = var
return var
# TODO: type: numpy array or lgdo. need an lgdo type
def link_input_buffer(self, varname: str, buff=None):
"""Link an input buffer to a variable
Parameters
----------
varname : str
name of internal variable to copy into buffer at the end
of processor execution. If variable does not yet exist, it will
be created with a similar shape to the provided buffer
buff : numpy.array or lgdo class, optional, default=None
object to use as input buffer. If ``None``, create a new buffer
with a similar shape to the variable
Returns
-------
buffer: numpy.array or lgdo class
buff or newly allocated input buffer
"""
self._validate_name(varname, raise_exception=True)
var = self.get_variable(varname, expr_only=True)
if var is None:
var = self.add_variable(varname)
if not isinstance(var, ProcChainVar):
raise ProcessingChainError("Must link an input buffer to a processing chain variable")
# Create input buffer that will be linked and returned if none exists
if buff is None:
if var is None:
raise ProcessingChainError(f"{varname} does not exist and no buffer was provided")
elif isinstance(var.grid, CoordinateGrid) and len(var.shape) == 1:
buff = lgdo.WaveformTable(size=self._buffer_len,
wf_len=var.shape[0],
dtype=var.dtype)
elif len(var.shape) == 0:
buff = lgdo.Array(shape=(self._buffer_len), dtype=var.dtype)
elif len(var.shape) > 0:
buff = lgdo.ArrayOfEqualSizedArrays(shape=(self._buffer_len, *var.shape), dtype=var.dtype)
else:
buff = np.ndarray((self._buffer_len,) + var.shape, var.dtype)
# Add the buffer to the input buffers list
if isinstance(buff, np.ndarray):
out_man = NumpyIOManager(buff, var)
elif isinstance(buff, lgdo.ArrayOfEqualSizedArrays):
out_man = LGDOArrayOfEqualSizedArraysIOManager(buff, var)
elif isinstance(buff, lgdo.Array):
out_man = LGDOArrayIOManager(buff, var)
elif isinstance(buff, lgdo.WaveformTable):
out_man = LGDOWaveformIOManager(buff, var)
else:
raise ProcessingChainError("Could not link input buffer of unknown type", str(buff))
log.debug(f"added input buffer: {out_man}")
self._input_managers.append(out_man)
return buff
def link_output_buffer(self, varname: str, buff=None):
"""Link an output buffer to a variable.
Parameters
----------
varname
name of internal variable to copy into buffer at the end
of processor execution. If variable does not yet exist, it will
be created with a similar shape to the provided buffer
buff : numpy.array or lgdo class
object to use as output buffer. If None, create a new buffer with a
similar shape to the variable
Returns
-------
numpy.array or lgdo object
buff or newly allocated output buffer
"""
self._validate_name(varname, raise_exception=True)
var = self.get_variable(varname, expr_only=True)
if var is None:
var = self.add_variable(varname)
if not isinstance(var, ProcChainVar):
raise ProcessingChainError("must link an output buffer to a processing chain variable")
# Create output buffer that will be linked and returned if none exists
if buff is None:
if var is None:
raise ProcessingChainError(varname+" does not exist and no buffer was provided")
elif isinstance(var.grid, CoordinateGrid) and len(var.shape) == 1:
buff = lgdo.WaveformTable(size=self._buffer_len,
wf_len=var.shape[0],
dtype=var.dtype)
elif len(var.shape) == 0:
buff = lgdo.Array(shape=(self._buffer_len), dtype=var.dtype)
elif len(var.shape) > 0:
buff = lgdo.ArrayOfEqualSizedArrays(shape=(self._buffer_len, *var.shape), dtype=var.dtype)
else:
buff = np.ndarray((self._buffer_len,) + var.shape, var.dtype)
# Add the buffer to the output buffers list
if isinstance(buff, np.ndarray):
out_man = NumpyIOManager(buff, var)
elif isinstance(buff, lgdo.ArrayOfEqualSizedArrays):
out_man = LGDOArrayOfEqualSizedArraysIOManager(buff, var)
elif isinstance(buff, lgdo.Array):
out_man = LGDOArrayIOManager(buff, var)
elif isinstance(buff, lgdo.WaveformTable):
out_man = LGDOWaveformIOManager(buff, var)
else:
raise ProcessingChainError("could not link output buffer of unknown type", str(buff))
log.debug(f"added output buffer: {out_man}")
self._output_managers.append(out_man)
return buff
def add_processor(self, func: np.ufunc, *args, signature: str = None,
types: list[str] = None) -> None:
"""Make a list of parameters from `*args`. Replace any strings in the
list with NumPy objects from `vars_dict`, where able.
"""
params = []
kw_params = {}
for i, param in enumerate(args):
if(isinstance(param, str)):
param = self.get_variable(param)
if(isinstance(param, dict)):
kw_params.update(param)
else:
params.append(param)
proc_man = ProcessorManager(self, func, params, kw_params, signature, types)
self._proc_managers.append(proc_man)
def execute(self, start: int = 0, stop: int = None) -> None:
"""Execute the dsp chain on the entire input/output buffers."""
if stop is None:
stop = self._buffer_len
for i in range(start, stop, self._block_width):
self._execute_procs(i, min(i+self._block_width, self._buffer_len))
def get_variable(self, expr: str, get_names_only: bool = False,
expr_only: bool = False) -> Any:
r"""Parse string `expr` into a NumPy array or value, using the following
syntax:
- numeric values are parsed into ``int``\ s or ``float``\ s
- units found in the :mod:`pint` package
- other strings are parsed into variable names. If `get_names_only` is
``False``, fetch the internal buffer (creating it as needed). Else,
return a string of the name
- if a string is followed by ``(...)``, try parsing into one of the
following expressions:
- ``len(expr)``: return the length of the array found with `expr`
- ``round(expr)``: return the value found with `expr` to the
nearest integer
- ``varname(shape, type)``: allocate a new buffer with the
specified shape and type, using ``varname``. This is used if
the automatic type and shape deduction for allocating variables
fails
- Unary and binary operators :obj:`+`, :obj:`-`, :obj:`*`, :obj:`/`,
:obj:`//` are available. If a variable name is included in the
expression, a processor will be added to the
:class:`ProcessingChain` and a new buffer allocated to store the
output
- ``varname[slice]``: return the variable with a slice applied. Slice
values can be ``float``\ s, and will have round applied to them
- ``keyword = expr``: return a ``dict`` with a single element
pointing from keyword to the parsed `expr`. This is used for
`kwargs`. If `expr_only` is ``True``, raise an exception if we see
this
If `get_names_only` is set to ``True``, do not fetch or allocate new
arrays, instead return a list of variable names found in the expression.
"""
names = []
try:
stmt = ast.parse(expr).body[0]
var = self._parse_expr(stmt.value, expr, get_names_only, names)
except Exception as e:
raise ProcessingChainError("Could not parse expression:\n " + expr) from e
# Check if this is an arg (i.e. expr) or kwarg (i.e. assign)
if not get_names_only:
if isinstance(stmt, ast.Expr):
return var
elif isinstance(stmt, ast.Assign) and len(stmt.targets) == 1:
if expr_only:
raise ProcessingChainError("kwarg assignment is not allowed in this context\n " + expr)
return {stmt.targets[0].id: var}
else:
raise ProcessingChainError("Could not parse expression:\n " + expr)
else:
return names
def _parse_expr(self, node: Any, expr: str, dry_run: bool, var_name_list: list[str]) -> Any:
"""
Helper function for :meth:`ProcessingChain.get_variable` that
recursively evaluates the AST tree. Whenever we encounter a variable
name, add it to `var_name_list` (which should begin as an empty list).
Only add new variables and processors to the chain if `dry_run` is
``True``. Based on `this Stackoverflow
answer <https://stackoverflow.com/a/9558001>`_.
"""
if node is None:
return None
elif isinstance(node, ast.Num):
return node.n
elif isinstance(node, ast.Str):
return node.s
elif isinstance(node, ast.Constant):
return node.value
# look for name in variable dictionary
elif isinstance(node, ast.Name):
# check if it is a unit
if node.id in ureg:
return ureg(node.id)
#check if it is a variable
var_name_list.append(node.id)
if dry_run: return None
val = self._vars_dict.get(node.id, None)
if val is None:
val = self.add_variable(node.id)
return val
# define binary operators (+,-,*,/)
elif isinstance(node, ast.BinOp):
lhs = self._parse_expr(node.left, expr, dry_run, var_name_list)
rhs = self._parse_expr(node.right, expr, dry_run, var_name_list)
if rhs is None or lhs is None: return None
op, op_form = ast_ops_dict[type(node.op)]
if not (isinstance(lhs, ProcChainVar) or isinstance(rhs, ProcChainVar)):
return op(lhs, rhs)
name = '('+op_form.format(str(lhs), str(rhs))+')'
if isinstance(lhs, ProcChainVar) and isinstance(rhs, ProcChainVar):
#TODO: handle units/coords; for now make them match lhs
out = ProcChainVar(self, name, is_coord=lhs.is_coord)
elif isinstance(lhs, ProcChainVar):
out = ProcChainVar(self, name, lhs.shape, lhs.dtype, lhs.grid, lhs.unit, is_coord=lhs.is_coord)
else:
out = ProcChainVar(self, name, rhs.shape, rhs.dtype, rhs.grid, rhs.unit, is_coord=rhs.is_coord)
self._proc_managers.append(ProcessorManager(self, op, [lhs, rhs, out]))
return out
# define unary operators (-)
elif isinstance(node, ast.UnaryOp):
operand = self._parse_expr(node.operand, expr, dry_run, var_name_list)
if operand is None: return None
op, op_form = ast_ops_dict[type(node.op)]
name = '('+op_form.format(str(operand))+')'
if isinstance(operand, ProcChainVar):
out = ProcChainVar(self, name, operand.shape, operand.dtype,
operand.grid, operand.unit, operand.is_coord)
self._proc_managers.append(ProcessorManager(self, op, [operand, out]))
else:
out = op(out)
return out
elif isinstance(node, ast.Subscript):
val = self._parse_expr(node.value, expr, dry_run, var_name_list)
if val is None: return None
if not isinstance(val, ProcChainVar):
raise ProcessingChainError("Cannot apply subscript to", node.value)
def get_index(slice_value):
ret = self._parse_expr(slice_value, expr, dry_run, var_name_list)
if ret is None:
return ret
if isinstance(ret, Quantity):
ret = float(ret/val.period)
if isinstance(ret, float):
round_ret = int(round(ret))
if abs(ret - round_ret) > 0.0001:
log.warning(f'slice value {slice_value} is non-integer. Rounding to {round_ret}')
return round_ret
return int(ret)
if isinstance(node.slice, ast.Index):
index = get_index(node.slice.value)
out_buf = val[..., index]
out_name=f'{str(val)}[{index}]',
out_grid = None
elif isinstance(node.slice, ast.Slice):
sl = slice(get_index(node.slice.lower),
get_index(node.slice.upper),
get_index(node.slice.step) )
out_buf = val.buffer[..., sl]
out_name = '{}[{}:{}{}]'.format(
str(val),
'' if sl.start is None else str(sl.start),
'' if sl.stop is None else str(sl.stop),
'' if sl.step is None else ':'+str(sl.step) )
if val.grid is None:
out_grid = None
else:
pd = val.period
if sl.step is not None: pd *= sl.step
off = val.offset
if sl.start is not None:
start = sl.start*val.period
if isinstance(off, ProcChainVar):
new_off = ProcChainVar(self,
name=f"({str(off)}+{str(start)})",
is_coord=True)
self._proc_managers.append(ProcessorManager(self, np.add, [off, start, new_off]))
off = new_off
else:
off += start
out_grid = CoordinateGrid(pd, off)
elif isinstance(node.slice, ast.ExtSlice):
# TODO: implement this...
raise ProcessingChainError("ExtSlice still isn't implemented...")
# Create our return variable and set the buffer to the slice
out = ProcChainVar(self, out_name,
shape = out_buf.shape[1:],
dtype = out_buf.dtype,
grid = out_grid,
unit = val.unit,
is_coord=val.is_coord )
out._buffer = [ (out_buf, val._buffer[0][1]) ] if out.is_coord else out_buf
return out
# for name.attribute
elif isinstance(node, ast.Attribute):
val = self._parse_expr(node.value, expr, dry_run, var_name_list)
if val is None: return None
return getattr(val, node.attr)
# for func(args, kwargs)
elif isinstance(node, ast.Call):
func = self.func_list.get(node.func.id, None)
args = [ self._parse_expr(arg, expr, dry_run, var_name_list) \
for arg in node.args ]
kwargs = { kwarg.arg:self._parse_expr(kwarg.value, expr, dry_run, var_name_list) for kwarg in node.keywords }
if func is not None:
return func(*args, **kwargs)
elif self._validate_name(node.func.id):
var_name = node.func.id
var_name_list.append(var_name)
if var_name in self._vars_dict:
var = self._vars_dict[var_name]
var.update_auto(*args, **kwargs)
return self._vars_dict[var_name]
elif not dry_run:
return self.add_variable(var_name, *args, **kwargs)
else:
return None
else:
raise ProcessingChainError(
f"do not recognize call to {func} with arguments "
f"{[str(arg.__dict__) for arg in node.args]}")
raise ProcessingChainError(f"cannot parse AST nodes of type {node.__dict__}")
def _validate_name(self, name: str, raise_exception: bool = False) -> bool:
"""Check that name is alphanumeric, and not an already used keyword"""
isgood = re.match(r"\A\w+$", name) and name not in self.func_list and name not in ureg
if raise_exception and not isgood:
raise ProcessingChainError(f"{name} is not a valid variable name")
return isgood
def _execute_procs(self, begin: int, end: int) -> str:
"""Copy from input buffers to variables, call all the processors on
their paired arg tuples, copy from variables to list of output buffers.
"""
# Copy input buffers into proc chain buffers
for in_man in self._input_managers:
in_man.read(begin, end)
# Loop through processors and run each one
for proc_man in self._proc_managers:
try:
proc_man.execute()
except DSPFatal as e:
e.processor = str(proc_man)
e.wf_range = (begin, end)
raise e
# copy from processing chain buffers into output buffers
for out_man in self._output_managers:
out_man.write(begin, end)
def __str__(self):
return 'Input variables:\n ' \
+ '\n '.join([str(in_man) for in_man in self._input_managers]) \
+ '\nProcessors:\n ' \
+ '\n '.join([str(proc_man) for proc_man in self._proc_managers]) \
+ '\nOutput variables:\n ' \
+ '\n '.join([str(out_man) for out_man in self._output_managers])
# Define functions that can be parsed by get_variable
# Get length of ProcChainVar
def _length(var: ProcChainVar) -> int:
if var is None:
return None
if not isinstance(var, ProcChainVar):
raise ProcessingChainError(f"cannot call len() on {var}")
if not len(var.buffer.shape) == 2:
raise ProcessingChainError(f"{var} has wrong number of dims")
return var.buffer.shape[1]
# round value
def _round(var: ProcChainVar) -> float:
if var is None:
return None
if not isinstance(var, ProcChainVar):
return round(float(var))
else:
raise ProcessingChainError("round() is not implemented for variables, only constants.")
# dict of functions that can be parsed by get_variable
func_list = {'len': _length, 'round': _round}
class ProcessorManager:
"""The class that calls processors and makes sure variables are compatible."""
@dataclass
class DimInfo:
length: int # length of arrays in this dimension
grid: CoordinateGrid # period and offset of arrays in this dimension
def __init__(self, proc_chain: ProcessingChain, func: np.ufunc, params: list[str],
kw_params: dict = {}, signature: str = None, types: list[str] = None) -> None:
assert isinstance(proc_chain, ProcessingChain) and callable(func) \
and isinstance(params, list)
# reference back to our processing chain
self.proc_chain = proc_chain
# callable function used to process data
self.processor = func
# list of parameters prior to converting to internal representation
self.params = params
# dict of keyword parameters prior to converting to internal rep
self.kw_params = kw_params
# list of raw values and buffers from params; we will fill this soon
self.args = []
# dict of kws -> raw values and buffers from params; we will fill this soon
self.kwargs = {}
# Get the signature and list of valid types for the function
self.signature = func.signature if signature is None else signature
if self.signature is None:
self.signature = ','.join(['()']*func.nin) + '->' \
+ ','.join(['()']*func.nout)
# Get list of allowed type signatures
if types is None:
types = func.types.copy()
if types is None:
raise ProcessingChainError(f"""could not find a type signature list
for {func.__name__}. Please supply a
valid list of types.""")
if not isinstance(types, list):
types = [types]
found_types = [typestr.replace('->', '') for typestr in types]
# Make sure arrays obey the broadcasting rules, and make a dictionary
# of the correct dimensions and unit system
dims_list = re.findall(r"\((.*?)\)", self.signature)
if not len(dims_list) == len(params) + len(kw_params):
raise ProcessingChainError(
f"expected {len(dims_list)} arguments from signature "
f"{self.signature}; found "
f"{len(params)}: ({', '.join([str(par) for par in params])})")
dims_dict = {} # map from dim name -> DimInfo
outerdims = [] # list of DimInfo
grid = None # period/offset to use for unit and coordinate conversions
for ipar, (dims, param) in enumerate(zip(dims_list, it.chain(self.params, self.kw_params.values()))):
if not isinstance(param, ProcChainVar):
continue
# find type signatures that match type of array
if param.dtype is not auto:
arr_type = param.dtype.char
found_types = [type_sig for type_sig in found_types if np.can_cast(arr_type, type_sig[ipar])]
# fill out dimensions from dim signature and check if it works
if param.shape is auto:
continue
fun_dims = [od for od in outerdims] + \
[d.strip() for d in dims.split(',') if d]
arr_dims = list(param.shape)
arr_grid = param.grid if param.grid is not auto else None
if not grid:
grid = arr_grid
# check if arr_dims can be broadcast to match fun_dims
for i in range(max(len(fun_dims), len(arr_dims))):
fd = fun_dims[-i-1] if i < len(fun_dims) else None
ad = arr_dims[-i-1] if i < len(arr_dims) else None
if isinstance(fd, str):
if fd in dims_dict:
this_dim = dims_dict[fd]
if not ad or this_dim.length != ad:
raise ProcessingChainError(
f"failed to broadcast array dimensions for "
f"{func.__name}. Could not find consistent value "
f"for dimension {fd}")
if not this_dim.grid:
dims_dict[fd].grid = arr_grid
elif arr_grid and arr_grid != this_dim.grid:
log.debug(
f"arrays of dimension {fd} for "
f"{func.__name__} do not have consistent period "
f"and offset!")
else:
dims_dict[fd] = self.DimInfo(ad, arr_grid)
elif not fd:
# if we ran out of function dimensions, add a new outer dim
outerdims.insert(0, self.DimInfo(ad, arr_grid))
elif not ad:
continue
elif fd.length != ad:
# If dimensions disagree, either insert a broadcasted array dimension or raise an exception
if len(fun_dims) > len(arr_dims):
arr_dims.insert(len(arr_dims)-i, 1)
elif len(fun_dims) < len(arr_dims):
outerdims.insert(len(fun_dims)-i, self.DimInfo(ad, arr_grid))
fun_dims.insert(len(fun_dims)-i, ad)
else:
raise ProcessingChainError(
f"failed to broadcast array dimensions for "
f"{func.__name__}. Input arrays do not have "
f"consistent outer dimensions. Require: "
f"{tuple(dim.length for dim in outerdims+fun_dims)}; "
f"found {tuple(arr_dims)} for {param}")
elif not fd.grid:
outerdims[len(fun_dims)-i].grid = arr_grid
elif arr_grid and fd.grid != arr_grid:
log.debug(f"arrays of dimension {fd} for {func.__name__} "
f"do not have consistent period and offset!")
arr_grid = None # this is only used for inner most dim
# Get the type signature we are using
if not found_types:
for param in it.chain(self.params, self.kw_params.values()):
if not isinstance(param, ProcChainVar):
continue
raise ProcessingChainError(
f"could not find a type signature matching the types of the "
f"variables given for {self} (types: {types})")
# Use the first types in the list that all our types can be cast to
self.types = [np.dtype(t) for t in found_types[0]]
# Finish setting up of input parameters for function
# Iterate through args and then kwargs
# Reshape variable arrays to add broadcast dimensions
# Allocate new arrays as needed
# Convert coords to right system of units as needed
for i, ((arg_name, param), dims, dtype) in enumerate(zip(it.chain(zip(it.repeat(None), self.params), self.kw_params.items()), dims_list, self.types)):
dim_list = outerdims.copy()
for d in dims.split(','):
if not d:
continue
if d not in dims_dict:
raise ProcessingChainError(f"could not deduce dimension {d} for {param}")
dim_list.append(dims_dict[d])
shape = tuple(d.length for d in dim_list)
this_grid = dim_list[-1].grid if dim_list else None
if isinstance(param, ProcChainVar):
# Deduce any automated descriptions of parameter
unit = None
is_coord = False
if param.is_coord == True and grid is not None:
unit = str(grid.period.u)
elif isinstance(param.unit, str) and param.unit in ureg and grid is not None and ureg.is_compatible_with(grid.period, param.unit):
is_coord = True
param.update_auto(shape=shape, dtype=np.dtype(dtype),
grid=this_grid, unit=unit,
is_coord=is_coord)
if param.is_coord and not grid:
grid = param._buffer[0][1]
param = param.get_buffer(grid)
# reshape just in case there are some missing dimensions
arshape = list(param.shape)
for idim in range(-1, -1-len(shape), -1):
if arshape[idim] != shape[idim]:
arshape.insert(len(arshape)+idim+1, 1)
param = param.reshape(tuple(arshape))
elif param is not None:
# Convert scalar to right type, including units
if isinstance(param, (Quantity, Unit)):
if ureg.is_compatible_with(ureg.dimensionless, param):
param = float(param)
elif not isinstance(grid, CoordinateGrid) or not ureg.is_compatible_with(grid.period, param):
raise ProcessingChainError(
f"could not find valid conversion for {param}; "
f"CoordinateGrid is {grid}")
else:
param = float(param/grid.period)
if np.issubdtype(dtype, np.integer):
param = dtype.type(round(param))
else:
param = dtype.type(param)
if arg_name is None:
self.args.append(param)
else:
self.kwargs[arg_name] = param
log.debug(f'added processor: {self}')
def execute(self) -> None: