"""
efficiency_simulation.py
This script is for the simulation of a data set to determine a cut efficiency.
Usage:
- Adapt the section 'Constants and Paths' to your measurement.
- Adapt the section 'Apply Cuts' (around line 240) to your individual cut values.
- If you start the script without command line arguments, it will simulate all files and merge them one after another.
- If you start the script with the flag -f n, if will only simulate the n'th file from the list of files.
- If you start the script with the flag -m, it will only do the merge between all files.
- For time efficient simulation, a good workflow is to write a bash script, that starts the simulation of all files
simultaneously with the -f flags, then call the script again with the -m flag when all files are done.
"""
# ---------------------------------------------
# Imports
# ---------------------------------------------
import cait as ai
import matplotlib.pyplot as plt
import numpy as np
from tqdm.auto import tqdm
import argparse
import pickle
import os
if __name__ == '__main__':
# ---------------------------------------------
# Read command line arguments
# ---------------------------------------------
# Construct the argument parser
ap = argparse.ArgumentParser()
# Add the arguments to the parser
ap.add_argument("-f", "--file", type=int, required=False, default=None,
help="Use only this index from the files list.")
ap.add_argument("-m", "--merge", action='store_true', help="Only merge the files list.")
args = vars(ap.parse_args())
THIS_FILE_ONLY = args['file']
MERGE_ONLY = args['merge']
assert THIS_FILE_ONLY is None or THIS_FILE_ONLY >= 0, "The file number must be integer >= 0!"
assert not (
THIS_FILE_ONLY is not None and MERGE_ONLY), "Attention, you cannot choose a specific file and merge only together!"
# ---------------------------------------------
# Constants and Paths
# ---------------------------------------------
RUN = ... # put an string for the number of the experiments run, e.g. '34'
MODULE = ... # put a name for the detector, e.g. 'DetA'
PATH_HW_DATA = ... # path to the directory in which the RDT and CON files are stored
PATH_STREAM_DATA = ... # path to the directory in which the CSMPL file directories are stored
PATH_DB = ... # path to the SQL data base with information about the CSMPL files
PATH_PROC_DATA = ... # path to where you want to store the HDF5 files
FILE_NMBRS = [] # a list of string, the file number you want to analyse, e.g. ['001', '002', '003']
FNAMING = ... # the naming of the files, typically 'bck', for calibration data 'cal'
RDT_CHANNELS = [] # a list of strings of the channels, e.g. [0, 1] (written in PAR file - attention, the PAR file counts from 1, Cait from 0)
CSMPL_CHANNELS = [0, 1] # the channel numbers of the CDAQ, written in the SQL data base
RECORD_LENGTH = 16384 # the number of samples within a record window (read in PAR file)
SAMPLE_FREQUENCY = 25000 # the sample frequency of the measurement (read in PAR file)
DOWN_SEF = 4 # the downsample rate for the standard event fit
DOWN_BLF = 16 # the downsample rate for the baseline fit
PROCESSES = 8 # the number of processes for parallelization
PCA_COMPONENTS = 2 # the number of pca components to calculate
SKIP_FILE_NMBRS = [] # in case the loop crashed at some point and you want to start from a specific file number, write here the numbers to ignore, e.g. ['001', '002']
THRESHOLDS = [] # a list of the trigger thresholds in V
TRUNCATION_LEVELS = [] # list of the truncation levels
MAXIMAL_EVENT_HEIGHTS = [] # list of the maximal event heights to be included in the simulation
MINIMAL_EVENT_HEIGHTS = [] # list of the minimal event heights to be included in the simulation
PATH_PULSER_MODEL = ... # put an string of the path where you want to store the pulser model
POLY_ORDER = 5 # the order of the polynomial used for the energy calibration
ONLY_ECAL = False # if this is set to true, only the energy calibration is done for the file
CPE_FACTOR = [] # list of the values source_peak_energy/source_peak_equivalent_tpa_value
NMBR_SIMULATED_EVENTS = 20000 # this number of events is simulated for each file in the list
XSCALE = 'log'
BINS = 2000
# typically you do not need to change the values below
FNAME_STREAM = 'stream_{:03d}'.format(len(FILE_NMBRS) - 1)
H5_CHANNELS = list(range(len(RDT_CHANNELS))
print('OF thresholds in V: ', THRESHOLDS)
SEF_APP = '_down{}'.format(DOWN_SEF) if DOWN_SEF > 1 else ''
if XSCALE == 'linear':
discrete_ph = np.array([np.linspace(mi, ma, BINS + 1) for mi,ma in zip(MINIMAL_EVENT_HEIGHTS, MAXIMAL_EVENT_HEIGHTS)])
elif XSCALE == 'log':
if any(np.array(MINIMAL_EVENT_HEIGHTS) <= 0):
print('Changing lower end of non-positive MINIMAL_EVENT_HEIGHTS to 1e-3!')
MINIMAL_EVENT_HEIGHTS[MINIMAL_EVENT_HEIGHTS <= 0] = 1e-3
discrete_ph = np.array([np.logspace(start=np.log10(mi), stop=np.log10(ma), num=BINS + 1) for mi,ma in zip(MINIMAL_EVENT_HEIGHTS, MAXIMAL_EVENT_HEIGHTS)])
else:
raise ValueError('The argument of XSCALE must be either linear or log!')
discrete_ph = discrete_ph[:, :-1] + (discrete_ph[:, 1:] - discrete_ph[:, :-1]) / 2
assert len(FILE_NMBRS) > 0, "Choose some file numbers!"
assert THIS_FILE_ONLY not in SKIP_FILE_NMBRS, "Attention, you chose a file that is in the skip list!"
if THIS_FILE_ONLY is not None:
SKIP_FILE_NMBRS = FILE_NMBRS.copy()
del SKIP_FILE_NMBRS[THIS_FILE_ONLY]
# ---------------------------------------------
# Get Handle to Stream Data
# ---------------------------------------------
dh_stream = ai.DataHandler(run=RUN,
module=MODULE,
channels=RDT_CHANNELS)
dh_stream.set_filepath(path_h5=PATH_PROC_DATA,
fname='stream_{:03d}'.format(len(FILE_NMBRS) - 1),
appendix=False)
start_hours = dh_stream.get('metainfo', 'startstop_hours')[:, 0]
# ---------------------------------------------
# Start the Loop
# ---------------------------------------------
for i, fn in enumerate(FILE_NMBRS):
print('-----------------------------------------------------')
print('>> {} WORKING ON FILE: {}'.format(i, fn))
if fn in SKIP_FILE_NMBRS:
print('Skipping this file.')
else:
if not MERGE_ONLY:
empty_name = 'empty_' + FNAMING + '_' + fn
sim_name = 'sim_' + FNAMING + '_' + fn
if os.path.isfile(PATH_PROC_DATA + empty_name + '.h5'):
os.remove(PATH_PROC_DATA + empty_name + '.h5')
dh_empty = ai.DataHandler(run=RUN,
channels=RDT_CHANNELS,
record_length=RECORD_LENGTH,
sample_frequency=SAMPLE_FREQUENCY)
dh_empty.set_filepath(path_h5=PATH_PROC_DATA,
fname=empty_name,
appendix=False)
csmpl_paths = [
PATH_STREAM_DATA + 'Ch' + str(c + 1) + '/' + 'Run' + RUN + '_' + FNAMING + '_' + fn + '_Ch' + str(
c + 1) + '.csmpl' for c in CSMPL_CHANNELS]
if not ONLY_ECAL:
# --------------------------------------------------
# Include Test Pulse Time Stamps
# --------------------------------------------------
dh_empty.include_test_stamps(path_teststamps=PATH_HW_DATA + FNAMING + '_' + fn + '.test_stamps',
path_dig_stamps=PATH_HW_DATA + FNAMING + '_' + fn + '.dig_stamps',
path_sql=PATH_DB,
csmpl_channels=CSMPL_CHANNELS,
sql_file_label=FNAMING + '_' + fn,
fix_offset=True)
# --------------------------------------------------
# Include the Random Triggers Events
# --------------------------------------------------
dh_empty.include_noise_triggers(
nmbr=NMBR_SIMULATED_EVENTS,
min_distance=0.5,
max_distance=60,
max_attempts=5,
no_pileup=False,
)
dh_empty.include_noise_events(
csmpl_paths,
datatype='float32',
)
# ----------------------------------------------------------
# Include OF, SEV, NPS
# ----------------------------------------------------------
dh_empty.include_sev(sev=dh_stream.get('stdevent', 'event'),
fitpar=dh_stream.get('stdevent', 'fitpar'),
mainpar=dh_stream.get('stdevent', 'mainpar'))
dh_empty.include_nps(nps=dh_stream.get('noise', 'nps'))
dh_empty.include_of(of_real=dh_stream.get('optimumfilter', 'optimumfilter_real'),
of_imag=dh_stream.get('optimumfilter', 'optimumfilter_imag'))
# --------------------------------------------------
# Simulate Events
# --------------------------------------------------
dh_empty.calc_bl_coefficients(down=DOWN_BLF)
dh_empty.simulate_pulses(path_sim=PATH_PROC_DATA + sim_name + '.h5',
size_events=NMBR_SIMULATED_EVENTS,
reuse_bl=True,
# ev_ph_intervals=[(0, m) for m in MAXIMAL_EVENT_HEIGHTS],
ev_discrete_phs=discrete_ph,
t0_interval=[-10, 10], # in ms
rms_thresholds=[1e5, 1e5],
fake_noise=False)
# --------------------------------------------------
# Delete original set
# --------------------------------------------------
# Delete the empty bl hdf5 set
del dh_empty
print('Deleting {}.'.format(PATH_PROC_DATA + empty_name + '.h5'))
os.remove(PATH_PROC_DATA + empty_name + '.h5')
dh_sim = ai.DataHandler(run=RUN,
channels=RDT_CHANNELS,
record_length=RECORD_LENGTH,
sample_frequency=SAMPLE_FREQUENCY)
dh_sim.set_filepath(path_h5=PATH_PROC_DATA,
fname=sim_name,
appendix=False)
# --------------------------------------------------
# Calc Parameters
# --------------------------------------------------
if not ONLY_ECAL:
dh_sim.calc_mp(type='events')
dh_sim.calc_additional_mp()
dh_sim.apply_of()
dh_sim.apply_sev_fit(down=DOWN_SEF, name_appendix='_down{}'.format(DOWN_SEF), processes=PROCESSES,
truncation_level=TRUNCATION_LEVELS, verb=True)
# --------------------------------------------------
# Apply Cuts
# --------------------------------------------------
if not ONLY_ECAL:
# change this to your individual cut values!
surviving = ai.cuts.LogicalCut(
initial_condition=np.abs(dh_sim.get('events', 'mainpar')[0, :, 8]) < 1e-6)
surviving.add_condition(np.abs(dh_sim.get('events', 'mainpar')[1, :, 8]) < 1e-6)
surviving.add_condition(dh_sim.get('events', 'mainpar')[0, :, 0] < 1.6)
surviving.add_condition(dh_sim.get('events', 'mainpar')[1, :, 0] < 0.3)
surviving.add_condition(dh_sim.get('events', 'mainpar')[0, :, 3] < 4500)
surviving.add_condition(dh_sim.get('events', 'mainpar')[0, :, 3] > 3900)
surviving.add_condition(dh_sim.get('events', 'of_ph')[0,:] > THRESHOLDS[0]) # threshold
# typically you need not change anything below here
for c in H5_CHANNELS:
dh_sim.apply_logical_cut(cut_flag=surviving.get_flag(),
naming='surviving',
channel=c,
type='events',
delete_old=False)
# --------------------------------------------------
# PCA
# --------------------------------------------------
dh_sim.apply_pca(nmbr_components=PCA_COMPONENTS,
down=DOWN_SEF,
fit_idx=surviving.get_idx())
# --------------------------------------------------
# Assign Energies
# --------------------------------------------------
with open(PATH_PULSER_MODEL, 'rb') as f:
pm = pickle.load(f)
dh_sim.calc_calibration(starts_saturation=MAXIMAL_EVENT_HEIGHTS,
cpe_factor=CPE_FACTOR,
poly_order=POLY_ORDER,
plot=False,
method='of',
pulser_models=pm,
name_appendix_energy='_reconstructed',
)
dh_sim.calc_calibration(starts_saturation=MAXIMAL_EVENT_HEIGHTS,
cpe_factor=CPE_FACTOR,
poly_order=POLY_ORDER,
plot=False,
method='true_ph',
pulser_models=pm,
name_appendix_energy='_true',
)
else:
print('Doing only the merge.')
# --------------------------------------------------
# Merge the files
# --------------------------------------------------
if i > 0 and THIS_FILE_ONLY is None:
file_name_a = PATH_PROC_DATA + 'sim_' + FNAMING + '_{}.h5'.format(
FILE_NMBRS[0]) if i == 1 else PATH_PROC_DATA + 'efficiency_{:03d}.h5'.format(i - 1)
a_name = 'sim_' + FNAMING + '_{}'.format(FILE_NMBRS[0]) if i == 1 else 'keep'
ai.data.merge_h5_sets(path_h5_a=file_name_a,
path_h5_b=PATH_PROC_DATA + 'sim_' + FNAMING + '_{}.h5'.format(fn),
path_h5_merged=PATH_PROC_DATA + 'efficiency_{:03d}.h5'.format(i),
groups_to_merge=['events', 'testpulses', 'controlpulses', 'stream'],
sets_to_merge=['event', 'mainpar', 'true_ph', 'true_onset', 'of_ph',
'sev_fit_par' + SEF_APP, 'sev_fit_rms' + SEF_APP,
'hours', 'labels', 'testpulseamplitude', 'time_s',
'time_mus', 'pulse_height', 'pca_error', 'pca_projection',
'tp_hours',
'tp_time_mus', 'tp_time_s', 'tpa',
'trigger_hours', 'trigger_time_mus', 'trigger_time_s',
'surviving', 'recoil_energy_true', 'recoil_energy_sigma_true',
'tpa_equivalent_true', 'tpa_equivalent_sigma_true',
'recoil_energy_reconstructed', 'recoil_energy_sigma_reconstructed',
'tpa_equivalent_reconstructed', 'tpa_equivalent_sigma_reconstructed'],
concatenate_axis=[1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1],
continue_hours=True,
second_file_start=start_hours[i],
keep_original_files=False,
groups_from_a=['optimumfilter', 'optimumfilter_tp', 'stdevent', 'stdevent_tp',
'noise'],
a_name=a_name,
b_name='sim_' + FNAMING + '_{}'.format(fn),
verb=False,
)
# ---------------------------------------------
# Finishing Notes
# ---------------------------------------------
print('-----------------------------------------------------')
print('>> DONE WITH ALL FILES.')