************************* Efficiency Simulation ************************* .. code:: python """ efficiency_simulation.py The main script for an efficiency simulation, based on CSMPL files. All parameters are explained when you run the script with -h flag. You need to have the file trigger_utils in the same directory. """ # --------------------------------------------- # Imports # --------------------------------------------- import cait as ai import matplotlib.pyplot as plt import numpy as np from tqdm.auto import tqdm import argparse import pickle import os from trigger_utils import read_hw_data, get_filestart if __name__ == '__main__': # --------------------------------------------- # Read command line arguments # --------------------------------------------- # Construct the argument parser ap = argparse.ArgumentParser() # Add the REQUIRED arguments to the parser ap.add_argument("-a", "--name_stream_h5", type=str, help="Name of the stream HDF5 file, e.g. stream_008.") ap.add_argument("-l", "--file_list", type=str, nargs='+', default=None, help="List of the file numbers.") ap.add_argument("-p", "--rdt_path", type=str, help="Path to hardware data, e.g. /eos/vbc/group/darkmatter/cresst/gsdata/hwtrig/Run36/bck/") ap.add_argument("-s", "--csmpl_path", type=str, help="Path to stream data, e.g. /eos/vbc/group/darkmatter/cresst/gsdata/cstream/Run36/data/") ap.add_argument("-x", "--xy_files", type=str, help="Path to the folder that contains filter, nps, sev, sev mainpar and sev fitpar files.") ap.add_argument("-y", "--path_pulser_model", type=str, help="Path to the pulser models - the naming must be _.pm ; e.g. /.../Li1_040.pm") ap.add_argument("-z", "--path_h5", type=str, help="Path where the HDF5 files should be stored.") # Add the OPTIONAL arguments to the parser ap.add_argument("-b", "--nmbr_events", type=int, default=40000, help="Number of events to simulate per file.") ap.add_argument("-c", "--rdt_channels", type=int, default=[0, 1], nargs='+', help="List of the rdt channels of this module, e.g. 12 13.") ap.add_argument("-d", "--csmpl_channels", type=int, default=[0, 1], nargs='+', help="List of the csmpl channels of this module, e.g. 12 13.") ap.add_argument("-e", "--max_height", type=float, default=[3.5, 2.5], nargs='+', help="Maximal event height to simulate.") ap.add_argument("-f", "--min_height", type=float, default=[0.0015, 0.0035], nargs='+', help="Minimal event height to simualte.") ap.add_argument("-g", "--no_features", action='store_true', help="Skip the calculation of features and do only the triggering (not recommended).") ap.add_argument("-i", "--run_nmbr", type=str, default='36', help="Number of the Run, e.g. 36.") ap.add_argument("-k", "--no_simulation", action='store_true', help="Skip the file simulation, do only the feature calculations.") ap.add_argument("--no_ecal", action='store_true', help="Skip the energy calibration, e.g. if you dont have CPE factors.") ap.add_argument("-m", "--merge", action='store_true', help="Only merge the files list. You need to call this once you converted all the files.") ap.add_argument("-n", "--naming", type=str, default='bck', help="Naming of the csmpl stream files, e.g. bcl or ncal.") ap.add_argument("-o", "--out_name", type=str, default='efficiency', help="Naming of the output file, e.g. efficiency. This will always be combined with the naming, to obtain unique file names.") ap.add_argument("-q", "--sample_frequency", type=int, default=25000, help="The sample frequency.") ap.add_argument("-r", "--record_length", type=int, default=16384, help="The length of the record window.") ap.add_argument("-t", "--trigger_thresholds", type=float, default=[5.428, 11.017], nargs='+', help="List of the trigger threshold for all channels in mV, e.g. 5.428 11.017.") ap.add_argument("-u", "--truncation_levels", type=float, default=[0.9, 1.5], nargs='+', help="List of the truncation levels for all channels in V, e.g. 0.9 1.5.") ap.add_argument("-v", "--processes", type=int, default=4, help="The number of processes to use for the sev fit.") ap.add_argument("-w", "--uncorrelated", action='store_true', help="Do the height evaluations uncorrelated, if not activated the first channel is dominant, i.e. evaluate the height in the other channels at the maximum position of the first channel.") ap.add_argument("--cpe_factors", type=float, default=[2.956196019429851, 18.24242689975974], nargs='+', help="List of the CPE factors for all channels.") args = vars(ap.parse_args()) # --------------------------------------------- # Constants and Paths # --------------------------------------------- THRESHOLDS = np.array(args['trigger_thresholds']) * 0.001 discrete_ph = np.array([np.logspace(start=np.log10(mi), stop=np.log10(ma), num=args['nmbr_events']) for mi,ma in zip(args['min_height'], args['max_height'])]) datasets = { # 'event': 1, 'mainpar': 1, 'add_mainpar': 1, 'true_ph': 1, 'true_onset': 0, 'of_ph': 1, 'of_ph_direct': 1, 'arr_fit_par': 1, 'arr_fit_rms': 1, 'arr_fit_par_direct': 1, 'arr_fit_rms_direct': 1, 'hours': 0, 'labels': 1, 'testpulseamplitude': 0, 'time_s': 0, 'time_mus': 0, 'pulse_height': 1, 'tp_hours': 0, 'tp_time_mus': 0, 'tp_time_s': 0, 'tpa': 0, 'trigger_hours': 0, 'trigger_time_mus': 0, 'trigger_time_s': 0, 'recoil_energy_true': 1, 'recoil_energy_sigma_true': 1, 'tpa_equivalent_true': 1, 'tpa_equivalent_sigma_true': 1, 'recoil_energy_reconstructed': 1, 'recoil_energy_sigma_reconstructed': 1, 'tpa_equivalent_reconstructed': 1, 'tpa_equivalent_sigma_reconstructed': 1, 'cnn_cut': 1, 'cnn_prob': 1, 'start_s': -1, 'start_mus': -1, 'stop_s': -1, 'stop_mus': -1, 'sample_frequency': -1, 'record_length': -1, 'runtime': -1, } merge_keywords = { 'groups_to_merge': ['events', 'stream', 'metainfo'], 'sets_to_merge': list(datasets.keys()), 'concatenate_axis': list(datasets.values()), 'continue_hours': True, 'keep_original_files': True, 'groups_from_a': ['optimumfilter', 'optimumfilter_tp', 'optimumfilter_direct', 'stdevent', 'stdevent_tp', 'stdevent_direct', 'noise'], } # --------------------------------------------- # Get Handle to Stream Data # --------------------------------------------- dh_stream = ai.DataHandler(channels=args['rdt_channels'], record_length=args['record_length'], sample_frequency=args['sample_frequency']) dh_stream.set_filepath(path_h5=args['path_h5'], fname=args['name_stream_h5'], appendix=False) start_hours = dh_stream.get('metainfo', 'startstop_hours')[:, 0] # --------------------------------------------- # Get Infos from HW Data # --------------------------------------------- xy_files = read_hw_data(args) # --------------------------------------------- # Start the Loop # --------------------------------------------- for i, fn in enumerate(args['file_list']): print('-----------------------------------------------------') print('>> {} WORKING ON FILE: {}'.format(i, fn)) if not args['merge']: empty_name = 'empty_' + args['naming'] + '_' + fn sim_name = args['out_name'] + '_' + args['naming'] + '_' + fn if not args['no_simulation']: dh_empty = ai.DataHandler(channels=args['rdt_channels'], record_length=args['record_length'], sample_frequency=args['sample_frequency']) dh_empty.set_filepath(path_h5=args['path_h5'], fname=empty_name, appendix=False) csmpl_paths = [ args['csmpl_path'] + 'Ch' + str(c + 1) + '/' + 'Run' + args['run_nmbr'] + '_' + args['naming'] + '_' + fn + '_Ch' + str( c + 1) + '.csmpl' for c in args['csmpl_channels']] # -------------------------------------------------- # Include Test Pulse Time Stamps # -------------------------------------------------- # include metadata dh_empty.init_empty() dh_empty.include_metainfo(args['rdt_path'] + args['naming'] + '_' + fn + '.par') dh_empty.include_test_stamps(path_teststamps=args['rdt_path'] + args['naming'] + '_' + fn + '.test_stamps', path_dig_stamps=args['rdt_path'] + args['naming'] + '_' + fn + '.dig_stamps', ) # -------------------------------------------------- # Include the Random Triggers Events # -------------------------------------------------- dh_empty.include_noise_triggers( nmbr=args['nmbr_events'], min_distance=0.5, max_distance=60, max_attempts=5, no_pileup=False, ) dh_empty.include_noise_events( csmpl_paths, datatype='float32', ) # ---------------------------------------------------------- # Include OF, SEV, NPS # ---------------------------------------------------------- dh_empty.include_sev(sev=xy_files['sev'], fitpar=xy_files['sev_fitpar'], mainpar=xy_files['sev_mainpar']) dh_empty.include_nps(nps=xy_files['nps']) dh_empty.include_of(of_real=np.real(xy_files['of']), of_imag=np.imag(xy_files['of'])) # for tp if 'sev_tp' in xy_files: dh_empty.include_sev(sev=xy_files['sev_tp'], fitpar=xy_files['sev_tp_fitpar'], mainpar=xy_files['sev_tp_mainpar'], group_name_appendix='_tp') dh_empty.include_of(of_real=np.real(xy_files['of_tp']), of_imag=np.imag(xy_files['of_tp']), group_name_appendix='_tp') # for direct hits if 'sev_direct' in xy_files: dh_empty.include_sev(sev=xy_files['sev_direct'], fitpar=xy_files['sev_direct_fitpar'], mainpar=xy_files['sev_direct_mainpar'], group_name_appendix='_direct') if 'of_direct' in xy_files: dh_empty.include_of(of_real=np.real(xy_files['of_direct']), of_imag=np.imag(xy_files['of_direct']), group_name_appendix='_direct') # -------------------------------------------------- # Simulate Events # -------------------------------------------------- dh_empty.calc_bl_coefficients() dh_empty.simulate_pulses(path_sim=args['path_h5'] + sim_name + '.h5', size_events=args['nmbr_events'], reuse_bl=True, ev_discrete_phs=discrete_ph, t0_interval=[-10, 0], # in ms rms_thresholds=[1e5, 1e5], fake_noise=False) # -------------------------------------------------- # Delete original empty set # -------------------------------------------------- # Delete the empty bl hdf5 set del dh_empty print('Deleting {}.'.format(args['path_h5'] + empty_name + '.h5')) os.remove(args['path_h5'] + empty_name + '.h5') # -------------------------------------------------- # Include data from PAR and XY files # -------------------------------------------------- dh_sim = ai.DataHandler(channels=args['rdt_channels'], record_length=args['record_length'], sample_frequency=args['sample_frequency']) dh_sim.set_filepath(path_h5=args['path_h5'], fname=sim_name, appendix=False) if not args['no_features']: dh_sim.include_metainfo(args['rdt_path'] + args['naming'] + '_' + fn + '.par') dh_sim.include_sev(sev=xy_files['sev'], fitpar=xy_files['sev_fitpar'], mainpar=xy_files['sev_mainpar']) dh_sim.include_nps(nps=xy_files['nps']) dh_sim.include_of(of_real=np.real(xy_files['of']), of_imag=np.imag(xy_files['of'])) # for tp if 'sev_tp' in xy_files: dh_sim.include_sev(sev=xy_files['sev_tp'], fitpar=xy_files['sev_tp_fitpar'], mainpar=xy_files['sev_tp_mainpar'], group_name_appendix='_tp') dh_sim.include_of(of_real=np.real(xy_files['of_tp']), of_imag=np.imag(xy_files['of_tp']), group_name_appendix='_tp') # for direct hits if 'sev_direct' in xy_files: dh_sim.include_sev(sev=xy_files['sev_direct'], fitpar=xy_files['sev_direct_fitpar'], mainpar=xy_files['sev_direct_mainpar'], group_name_appendix='_direct') if 'of_direct' in xy_files: dh_sim.include_of(of_real=np.real(xy_files['of_direct']), of_imag=np.imag(xy_files['of_direct']), group_name_appendix='_direct') # -------------------------------------------------- # Calc Parameters # -------------------------------------------------- dh_sim.calc_mp(type='events') dh_sim.calc_additional_mp() dh_sim.apply_of() if 'of_direct' in xy_files: dh_sim.apply_of(name_appendix_group='_direct', name_appendix_set='_direct') # get the sevs with the fit parameters t = dh_sim.record_window() sev_array = [] for i,c in enumerate(args['rdt_channels']): sev_array.append(ai.fit.pulse_template(t, *xy_files['sev_fitpar'][c])) if 'sev_direct' in xy_files: sev_direct_array = [] for i,c in enumerate(args['rdt_channels']): sev_direct_array.append(ai.fit.pulse_template(t, *xy_files['sev_direct_fitpar'][c])) # # do the fits # dh_sim.apply_array_fit(processes=args['processes'], # truncation_level=args['truncation_levels'], # first_channel_dominant=not args['uncorrelated'], use_this_array=sev_array) # dh_sim.apply_array_fit(processes=args['processes'], # truncation_level=args['truncation_levels'], # first_channel_dominant=False, use_this_array=sev_array) # # do the fit for the direct hits # if 'sev_direct' in xy_files: # dh_sim.apply_array_fit(group_name_appendix = '_direct', name_appendix = '_direct', # processes=args['processes'], # truncation_level=args['truncation_levels'], only_channels=[1], # use_this_array=sev_direct_array) if not args['no_ecal']: # -------------------------------------------------- # Assign Energies # -------------------------------------------------- with open(args['path_pulser_model'] + args['naming'] + '_' + fn + '.pm', 'rb') as f: pm = pickle.load(f) dh_sim.calc_calibration(starts_saturation=args['max_height'], cpe_factor=args['cpe_factors'], plot=False, method='of', pulser_models=pm, name_appendix_energy='_reconstructed', use_interpolation=True, ) dh_sim.calc_calibration(starts_saturation=args['max_height'], cpe_factor=args['cpe_factors'], plot=False, method='true_ph', pulser_models=pm, name_appendix_energy='_true', use_interpolation=True, ) # -------------------------------------------------- # Apply neural network and other cuts # -------------------------------------------------- ckp_path = ai.resources.get_resource_path('cnn-clf-binary-v0.ckpt') for c in range(len(args['rdt_channels'])): for group in ['events']: ai.models.nn_predict(h5_path=dh_sim.path_h5, model=ai.models.CNNModule.load_from_checkpoint(ckp_path), feature_channel=c, group_name=group, prediction_name='cnn_cut', keys=['event'], no_channel_idx_in_pred=False, use_prob=False) ai.models.nn_predict(h5_path=dh_sim.path_h5, model=ai.models.CNNModule.load_from_checkpoint(ckp_path), feature_channel=c, group_name=group, prediction_name='cnn_prob', keys=['event'], no_channel_idx_in_pred=False, use_prob=True) # -------------------------------------------------- # Delete Raw Events # -------------------------------------------------- dh_sim.drop_raw_data(type='events') # -------------------------------------------------- # Merge the files # -------------------------------------------------- if i > 0 and args['merge']: merge_keywords_ = merge_keywords.copy() merge_keywords_['path_h5_a'] = args['path_h5'] + args['out_name'] + '_' + args['naming'] + '_{}.h5'.format( args['file_list'][0]) if i == 1 else args['path_h5'] + args['out_name'] + '_{:03d}.h5'.format(i - 1) merge_keywords_['a_name'] = args['out_name'] + '_' + args['naming'] + '_{}'.format(args['file_list'][0]) if i == 1 else 'keep' merge_keywords_['path_h5_b'] = args['path_h5'] + args['out_name'] + '_' + args['naming'] + '_{}.h5'.format(fn) merge_keywords_['b_name'] = args['out_name'] + '_' + args['naming'] + '_{}'.format(fn) merge_keywords_['path_h5_merged'] = args['path_h5'] + args['out_name'] + '_{:03d}.h5'.format(i) start_a = get_filestart(merge_keywords_['path_h5_a'], args) start_b = get_filestart(merge_keywords_['path_h5_b'], args) merge_keywords_['second_file_start'] = (start_b[0] + 1e-6*start_b[1] - start_a[0] - 1e-6*start_a[1])/3600 ai.data.merge_h5_sets(verb=False, **merge_keywords_, ) # --------------------------------------------- # Finishing Notes # --------------------------------------------- print('-----------------------------------------------------') print('>> DONE WITH ALL FILES.')