From 09fb8ca7a27e11bb9946c7be5e8da9ef294d1cd6 Mon Sep 17 00:00:00 2001 From: Ian Date: Mon, 18 Jul 2022 13:52:54 -0700 Subject: [PATCH 1/9] Improved debug output for processing_chain --- src/pygama/dsp/processing_chain.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/pygama/dsp/processing_chain.py b/src/pygama/dsp/processing_chain.py index e19b115f8..ed69117a2 100644 --- a/src/pygama/dsp/processing_chain.py +++ b/src/pygama/dsp/processing_chain.py @@ -1082,8 +1082,8 @@ def write(self, start: int, end: int) -> None: self.raw_var[0:end-start, ...], 'unsafe') def __str__(self) -> str: - return (f"{self.var} linked to numpy.array({self.io_buf.shape}, " - f"{self.io_buf.dtype})@{self.io_buf.data})") + return (f"{self.var} linked to numpy.array(shape={self.io_buf.shape}, " + f"dtype={self.io_buf.dtype})") class LGDOArrayIOManager(IOManager): @@ -1130,7 +1130,7 @@ def write(self, start: int, end: int) -> None: self.raw_var[0:end-start, ...], 'unsafe') def __str__(self) -> str: - return f'{self.var} linked to {self.io_array}' + return f'{self.var} linked to lgdo.Array(shape={self.io_array.nda.shape}, dtype={self.io_array.nda.dtype}, attrs={self.io_array.attrs})' class LGDOArrayOfEqualSizedArraysIOManager(IOManager): """IO Manager for buffers that are numpy ArrayOfEqualSizedArrays""" @@ -1176,7 +1176,7 @@ def write(self, start: int, end: int) -> None: self.raw_var[0:end-start, ...], 'unsafe') def __str__(self) -> str: - return f'{self.var} linked to {self.io_array}' + return f'{self.var} linked to lgdo.ArrayOfEqualSizedArrays(shape={self.io_array.nda.shape}, dtype={self.io_array.nda.dtype}, attrs={self.io_array.attrs})' class LGDOWaveformIOManager(IOManager): @@ -1243,9 +1243,10 @@ def write(self, start: int, end: int) -> None: self.t0_buf[start:end, ...] = self.t0_var[0:end-start, ...] def __str__(self) -> str: - return (f"{self.var} linked to ") + return (f"{self.var} linked to pygama.lgdo.WaveformTable(" + f"values(shape={self.wf_table.values.nda.shape}, dtype={self.wf_table.values.nda.dtype}, attrs={self.wf_table.values.attrs}), " + f"dt(shape={self.wf_table.dt.nda.shape}, dtype={self.wf_table.dt.nda.dtype}, attrs={self.wf_table.dt.attrs}), " + f"t0(shape={self.wf_table.t0.nda.shape}, dtype={self.wf_table.t0.nda.dtype}, attrs={self.wf_table.t0.attrs}))") def build_processing_chain(lh5_in: lgdo.Table, dsp_config: dict | str, db_dict: dict = None, From 5ceb85a327c8cf664e8b235b1f7adfe5da140074 Mon Sep 17 00:00:00 2001 From: Ian Date: Sun, 24 Jul 2022 10:27:40 -0700 Subject: [PATCH 2/9] Fixed writing of lgdo scalar in overwrite mode --- src/pygama/lgdo/lh5_store.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/pygama/lgdo/lh5_store.py b/src/pygama/lgdo/lh5_store.py index b3a636db6..9cfc7ce4b 100644 --- a/src/pygama/lgdo/lh5_store.py +++ b/src/pygama/lgdo/lh5_store.py @@ -589,8 +589,9 @@ def write_object(self, obj, name, lh5_file, group='/', start_row=0, n_rows=None, # scalars elif isinstance(obj, Scalar): - if verbosity > 0 and name in group: - print('overwriting {name} in {group}') + if wo_mode == 'o' and name in group: + log.info(f'overwriting {name} in {group}') + del group[name] ds = group.create_dataset(name, shape=(), data=obj.value) ds.attrs.update(obj.attrs) return From 2dad4d1cf147affa27d61912f84771739a843a0c Mon Sep 17 00:00:00 2001 From: Ian Date: Sun, 24 Jul 2022 10:31:31 -0700 Subject: [PATCH 3/9] Refactored chan_config option in build_dsp --- src/pygama/dsp/build_dsp.py | 61 +++++++++++++++++++------------------ 1 file changed, 31 insertions(+), 30 deletions(-) diff --git a/src/pygama/dsp/build_dsp.py b/src/pygama/dsp/build_dsp.py index 583542409..973f98fa3 100644 --- a/src/pygama/dsp/build_dsp.py +++ b/src/pygama/dsp/build_dsp.py @@ -23,10 +23,11 @@ log = logging.getLogger(__name__) -def build_dsp(f_raw: str, f_dsp: str, dsp_config: str | dict, lh5_tables: - list[str] = None, database: str = None, outputs: list[str] = - None, n_max: int = np.inf, write_mode: str = 'r', buffer_len: int - = 3200, block_width: int = 16, chan_config: dict = None) -> None: +def build_dsp(f_raw: str, f_dsp: str, dsp_config: str | dict = None, + lh5_tables: list[str] = None, database: str = None, + outputs: list[str] = None, n_max: int = np.inf, + write_mode: str = 'r', buffer_len: int = 3200, + block_width: int = 16, chan_config: dict = None) -> None: """ Convert raw-tier LH5 data into dsp-tier LH5 data by running a sequence of processors via the :class:`~.processing_chain.ProcessingChain`. @@ -64,6 +65,22 @@ def build_dsp(f_raw: str, f_dsp: str, dsp_config: str | dict, lh5_tables: contains JSON DSP configuration file names for every table in `lh5_tables` """ + + if chan_config is not None: + # clear existing output files + if write_mode == 'r': + if os.path.isfile(f_dsp): + os.remove(f_dsp) + write_mode = 'a' + + for tb, dsp_config in chan_config.items(): + log.debug(f'processing table: {tb} with DSP config file {dsp_config}') + try: + build_dsp(f_raw, f_dsp, dsp_config, [tb], database, + outputs, n_max, write_mode, buffer_len, block_width) + except RuntimeError: + log.debug(f'table {tb} not found') + return if isinstance(dsp_config, str): with open(dsp_config) as config_file: @@ -80,21 +97,15 @@ def build_dsp(f_raw: str, f_dsp: str, dsp_config: str | dict, lh5_tables: # if no group is specified, assume we want to decode every table in the file if lh5_tables is None: - lh5_tables = [] - lh5_keys = lh5.ls(f_raw) - - # sometimes 'raw' is nested, e.g g024/raw - for tb in lh5_keys: - if "raw" not in tb: - tbname = lh5.ls(lh5_file[tb])[0] - if "raw" in tbname: - tb = f'{tb}/{tbname}' # g024 + /raw - lh5_tables.append(tb) - - # make sure every group points to waveforms, if not, remove the group - for tb in lh5_tables: - if 'raw' not in tb: - lh5_tables.remove(tb) + lh5_tables = lh5.ls(f_raw) + + # check if group points to raw data; sometimes 'raw' is nested, e.g g024/raw + for i, tb in enumerate(lh5_tables): + if "raw" not in tb and lh5.ls(lh5_file, f"{tb}/raw"): + lh5_tables[i] = f'{tb}/raw' + elif not lh5.ls(lh5_file, tb): + del lh5_tables[i] + if len(lh5_tables) == 0: raise RuntimeError(f"could not find any valid LH5 table in {f_raw}") @@ -134,16 +145,6 @@ def build_dsp(f_raw: str, f_dsp: str, dsp_config: str | dict, lh5_tables: if n_max and n_max < tot_n_rows: tot_n_rows = n_max - # if we have separate DSP files for each table, read them in here - if chan_config is not None: - f_config = chan_config[tb] - with open(f_config) as config_file: - dsp_config = json.load(config_file) - log.debug(f'processing table: {tb} with DSP config file {f_config}') - - if not isinstance(dsp_config, dict): - raise RuntimeError(f'dsp_config for {tb} must be a dict') - chan_name = tb.split('/')[0] db_dict = database.get(chan_name) if database else None tb_name = tb.replace('/raw', '/dsp') @@ -188,4 +189,4 @@ def build_dsp(f_raw: str, f_dsp: str, dsp_config: str | dict, lh5_tables: if log.level <= logging.INFO: progress_bar.close() - raw_store.write_object(dsp_info, 'dsp_info', f_dsp) + raw_store.write_object(dsp_info, 'dsp_info', f_dsp, wo_mode='o') From 491ed1421a5a85f2a1730fb4ec6cb6ff4a7bd83d Mon Sep 17 00:00:00 2001 From: Ian Date: Sun, 24 Jul 2022 10:32:20 -0700 Subject: [PATCH 4/9] Fixed bug in progress bar --- src/pygama/dsp/build_dsp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pygama/dsp/build_dsp.py b/src/pygama/dsp/build_dsp.py index 973f98fa3..bcd4ca4b6 100644 --- a/src/pygama/dsp/build_dsp.py +++ b/src/pygama/dsp/build_dsp.py @@ -181,9 +181,9 @@ def build_dsp(f_raw: str, f_dsp: str, dsp_config: str | dict = None, write_start=write_offset+start_row) if log.level <= logging.INFO: - progress_bar.update(buffer_len) + progress_bar.update(n_rows) - if start_row+n_rows > tot_n_rows: + if start_row+n_rows >= tot_n_rows: break if log.level <= logging.INFO: From e80285ee485828d5469318c12d171c5f74bc1ea1 Mon Sep 17 00:00:00 2001 From: Ian Date: Sun, 24 Jul 2022 10:32:54 -0700 Subject: [PATCH 5/9] Use linear interpolation for non-integer times in fixed_time_pickoff --- src/pygama/dsp/_processors/fixed_time_pickoff.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/pygama/dsp/_processors/fixed_time_pickoff.py b/src/pygama/dsp/_processors/fixed_time_pickoff.py index c40036085..2616258ab 100644 --- a/src/pygama/dsp/_processors/fixed_time_pickoff.py +++ b/src/pygama/dsp/_processors/fixed_time_pickoff.py @@ -10,13 +10,14 @@ def fixed_time_pickoff(w_in, t_in, a_out): """ Pick off the waveform value at the provided index. If the - provided index is out of range, return NaN. + provided index is out of range, return NaN. For non-integer + indices, interpolate linearly between nearest samples Parameters ---------- w_in : array-like The input waveform - t_in : int + t_in : float The waveform index to pick off a_out : float The output pick-off value @@ -38,10 +39,9 @@ def fixed_time_pickoff(w_in, t_in, a_out): if np.isnan(w_in).any() or np.isnan(t_in): return - if np.floor(t_in) != t_in: - raise DSPFatal('The pick-off index must be an integer') - if int(t_in) < 0 or int(t_in) >= len(w_in): return - a_out[0] = w_in[int(t_in)] + i = int(t_in) + w = t_in-i + a_out[0] = (1-2)*w_in[i] + w*w_in[i+1] From 3e31ac98e624ec942cee6dccf8d331624488c147 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 24 Jul 2022 17:52:00 +0000 Subject: [PATCH 6/9] style: pre-commit fixes --- src/pygama/dsp/build_dsp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pygama/dsp/build_dsp.py b/src/pygama/dsp/build_dsp.py index bcd4ca4b6..85a8a708b 100644 --- a/src/pygama/dsp/build_dsp.py +++ b/src/pygama/dsp/build_dsp.py @@ -65,7 +65,7 @@ def build_dsp(f_raw: str, f_dsp: str, dsp_config: str | dict = None, contains JSON DSP configuration file names for every table in `lh5_tables` """ - + if chan_config is not None: # clear existing output files if write_mode == 'r': From bfcc164de4987d6c8b9dd6eac5dc63e49036597f Mon Sep 17 00:00:00 2001 From: iguinn Date: Mon, 25 Jul 2022 08:26:18 -0700 Subject: [PATCH 7/9] Fixed a typo in fixed_time_pickoff --- src/pygama/dsp/_processors/fixed_time_pickoff.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pygama/dsp/_processors/fixed_time_pickoff.py b/src/pygama/dsp/_processors/fixed_time_pickoff.py index 2616258ab..fdab940b5 100644 --- a/src/pygama/dsp/_processors/fixed_time_pickoff.py +++ b/src/pygama/dsp/_processors/fixed_time_pickoff.py @@ -42,6 +42,6 @@ def fixed_time_pickoff(w_in, t_in, a_out): if int(t_in) < 0 or int(t_in) >= len(w_in): return - i = int(t_in) + i_in = int(t_in) w = t_in-i - a_out[0] = (1-2)*w_in[i] + w*w_in[i+1] + a_out[0] = (1-w)*w_in[i_in] + w*w_in[i_in+1] From cfb237b830d64a1a03c680941a96a8971d3f19c8 Mon Sep 17 00:00:00 2001 From: Jason Detwiler Date: Mon, 25 Jul 2022 09:52:13 -0700 Subject: [PATCH 8/9] be careful about overwriting scalars --- src/pygama/lgdo/lh5_store.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/pygama/lgdo/lh5_store.py b/src/pygama/lgdo/lh5_store.py index 54abe2179..a2ff6d8cb 100644 --- a/src/pygama/lgdo/lh5_store.py +++ b/src/pygama/lgdo/lh5_store.py @@ -622,9 +622,12 @@ def write_object(self, # scalars elif isinstance(obj, Scalar): - if wo_mode == 'o' and name in group: - log.debug(f'overwriting {name} in {group}') - del group[name] + if name in group: + if wo_mode in ['o', 'a']: + log.debug(f'overwriting {name} in {group}') + del group[name] + else: + raise RuntimeError(f"tried to overwrite {name} in {group} for wo_mode {wo_mode}") ds = group.create_dataset(name, shape=(), data=obj.value) ds.attrs.update(obj.attrs) return From 081944fff11077c73fc39c927a7eea2fb4cc665a Mon Sep 17 00:00:00 2001 From: Ian Date: Mon, 25 Jul 2022 11:51:40 -0700 Subject: [PATCH 9/9] Revert changes to fixed_time_pickoff --- src/pygama/dsp/_processors/fixed_time_pickoff.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/pygama/dsp/_processors/fixed_time_pickoff.py b/src/pygama/dsp/_processors/fixed_time_pickoff.py index fdab940b5..c40036085 100644 --- a/src/pygama/dsp/_processors/fixed_time_pickoff.py +++ b/src/pygama/dsp/_processors/fixed_time_pickoff.py @@ -10,14 +10,13 @@ def fixed_time_pickoff(w_in, t_in, a_out): """ Pick off the waveform value at the provided index. If the - provided index is out of range, return NaN. For non-integer - indices, interpolate linearly between nearest samples + provided index is out of range, return NaN. Parameters ---------- w_in : array-like The input waveform - t_in : float + t_in : int The waveform index to pick off a_out : float The output pick-off value @@ -39,9 +38,10 @@ def fixed_time_pickoff(w_in, t_in, a_out): if np.isnan(w_in).any() or np.isnan(t_in): return + if np.floor(t_in) != t_in: + raise DSPFatal('The pick-off index must be an integer') + if int(t_in) < 0 or int(t_in) >= len(w_in): return - i_in = int(t_in) - w = t_in-i - a_out[0] = (1-w)*w_in[i_in] + w*w_in[i_in+1] + a_out[0] = w_in[int(t_in)]