Efficiency Simulation

"""
efficiency_simulation.py

The main script for an efficiency simulation, based on CSMPL files.

All parameters are explained when you run the script with -h flag. You need to have the file trigger_utils in the same directory.

"""

# ---------------------------------------------
# Imports
# ---------------------------------------------

import cait as ai
import matplotlib.pyplot as plt
import numpy as np
from tqdm.auto import tqdm
import argparse
import pickle
import os
from trigger_utils import read_hw_data, get_filestart

if __name__ == '__main__':

    # ---------------------------------------------
    # Read command line arguments
    # ---------------------------------------------

    # Construct the argument parser
    ap = argparse.ArgumentParser()

    # Add the REQUIRED arguments to the parser
    ap.add_argument("-a", "--name_stream_h5", type=str, help="Name of the stream HDF5 file, e.g. stream_008.")
    ap.add_argument("-l", "--file_list", type=str, nargs='+', default=None, help="List of the file numbers.")
    ap.add_argument("-p", "--rdt_path", type=str, help="Path to hardware data, e.g. /eos/vbc/group/darkmatter/cresst/gsdata/hwtrig/Run36/bck/")
    ap.add_argument("-s", "--csmpl_path", type=str, help="Path to stream data, e.g. /eos/vbc/group/darkmatter/cresst/gsdata/cstream/Run36/data/")
    ap.add_argument("-x", "--xy_files", type=str, help="Path to the folder that contains filter, nps, sev, sev mainpar and sev fitpar files.")
    ap.add_argument("-y", "--path_pulser_model", type=str, help="Path to the pulser models - the naming must be <path_you_put>_<file_nmbr>.pm ; e.g. /.../Li1_040.pm")
    ap.add_argument("-z", "--path_h5", type=str, help="Path where the HDF5 files should be stored.")

    # Add the OPTIONAL arguments to the parser
    ap.add_argument("-b", "--nmbr_events", type=int, default=40000, help="Number of events to simulate per file.")
    ap.add_argument("-c", "--rdt_channels", type=int, default=[0, 1],  nargs='+', help="List of the rdt channels of this module, e.g. 12 13.")
    ap.add_argument("-d", "--csmpl_channels", type=int, default=[0, 1], nargs='+', help="List of the csmpl channels of this module, e.g. 12 13.")
    ap.add_argument("-e", "--max_height", type=float, default=[3.5, 2.5],  nargs='+', help="Maximal event height to simulate.")
    ap.add_argument("-f", "--min_height", type=float, default=[0.0015, 0.0035],  nargs='+', help="Minimal event height to simualte.")
    ap.add_argument("-g", "--no_features", action='store_true', help="Skip the calculation of features and do only the triggering (not recommended).")
    ap.add_argument("-i", "--run_nmbr", type=str, default='36', help="Number of the Run, e.g. 36.")
    ap.add_argument("-k", "--no_simulation", action='store_true', help="Skip the file simulation, do only the feature calculations.")
    ap.add_argument("--no_ecal", action='store_true', help="Skip the energy calibration, e.g. if you dont have CPE factors.")
    ap.add_argument("-m", "--merge", action='store_true', help="Only merge the files list. You need to call this once you converted all the files.")
    ap.add_argument("-n", "--naming", type=str, default='bck', help="Naming of the csmpl stream files, e.g. bcl or ncal.")
    ap.add_argument("-o", "--out_name", type=str, default='efficiency', help="Naming of the output file, e.g. efficiency. This will always be combined with the naming, to obtain unique file names.")
    ap.add_argument("-q", "--sample_frequency", type=int, default=25000, help="The sample frequency.")
    ap.add_argument("-r", "--record_length", type=int, default=16384, help="The length of the record window.")
    ap.add_argument("-t", "--trigger_thresholds", type=float, default=[5.428, 11.017], nargs='+', help="List of the trigger threshold for all channels in mV, e.g. 5.428 11.017.")
    ap.add_argument("-u", "--truncation_levels", type=float, default=[0.9, 1.5], nargs='+', help="List of the truncation levels for all channels in V, e.g. 0.9 1.5.")
    ap.add_argument("-v", "--processes", type=int, default=4, help="The number of processes to use for the sev fit.")
    ap.add_argument("-w", "--uncorrelated", action='store_true', help="Do the height evaluations uncorrelated, if not activated the first channel is dominant, i.e. evaluate the height in the other channels at the maximum position of the first channel.")
    ap.add_argument("--cpe_factors", type=float, default=[2.956196019429851, 18.24242689975974],  nargs='+', help="List of the CPE factors for all channels.")

    args = vars(ap.parse_args())

    # ---------------------------------------------
    # Constants and Paths
    # ---------------------------------------------

    THRESHOLDS = np.array(args['trigger_thresholds']) * 0.001

    discrete_ph = np.array([np.logspace(start=np.log10(mi), stop=np.log10(ma), num=args['nmbr_events']) for mi,ma in zip(args['min_height'], args['max_height'])])

    datasets = {
        # 'event': 1,
        'mainpar': 1,
        'add_mainpar': 1,
        'true_ph': 1,
        'true_onset': 0,
        'of_ph': 1,
        'of_ph_direct': 1,
        'arr_fit_par': 1,
        'arr_fit_rms': 1,
        'arr_fit_par_direct': 1,
        'arr_fit_rms_direct': 1,
        'hours': 0,
        'labels': 1,
        'testpulseamplitude': 0,
        'time_s': 0,
        'time_mus': 0,
        'pulse_height': 1,
        'tp_hours': 0,
        'tp_time_mus': 0,
        'tp_time_s': 0,
        'tpa': 0,
        'trigger_hours': 0,
        'trigger_time_mus': 0,
        'trigger_time_s': 0,
        'recoil_energy_true': 1,
        'recoil_energy_sigma_true': 1,
        'tpa_equivalent_true': 1,
        'tpa_equivalent_sigma_true': 1,
        'recoil_energy_reconstructed': 1,
        'recoil_energy_sigma_reconstructed': 1,
        'tpa_equivalent_reconstructed': 1,
        'tpa_equivalent_sigma_reconstructed': 1,
        'cnn_cut': 1,
        'cnn_prob': 1,
        'start_s': -1,
        'start_mus': -1,
        'stop_s': -1,
        'stop_mus': -1,
        'sample_frequency': -1,
        'record_length': -1,
        'runtime': -1,
               }

    merge_keywords = {
        'groups_to_merge': ['events', 'stream', 'metainfo'],
        'sets_to_merge': list(datasets.keys()),
        'concatenate_axis': list(datasets.values()),
        'continue_hours': True,
        'keep_original_files': True,
        'groups_from_a': ['optimumfilter', 'optimumfilter_tp', 'optimumfilter_direct', 'stdevent', 'stdevent_tp', 'stdevent_direct', 'noise'],
                     }

    # ---------------------------------------------
    # Get Handle to Stream Data
    # ---------------------------------------------

    dh_stream = ai.DataHandler(channels=args['rdt_channels'],
                               record_length=args['record_length'],
                               sample_frequency=args['sample_frequency'])

    dh_stream.set_filepath(path_h5=args['path_h5'],
                           fname=args['name_stream_h5'],
                           appendix=False)

    start_hours = dh_stream.get('metainfo', 'startstop_hours')[:, 0]

    # ---------------------------------------------
    # Get Infos from HW Data
    # ---------------------------------------------

    xy_files = read_hw_data(args)

    # ---------------------------------------------
    # Start the Loop
    # ---------------------------------------------

    for i, fn in enumerate(args['file_list']):

        print('-----------------------------------------------------')
        print('>> {} WORKING ON FILE: {}'.format(i, fn))

        if not args['merge']:
            empty_name = 'empty_' + args['naming'] + '_' + fn
            sim_name = args['out_name'] + '_' + args['naming'] + '_' + fn

            if not args['no_simulation']:

                dh_empty = ai.DataHandler(channels=args['rdt_channels'],
                                          record_length=args['record_length'],
                                          sample_frequency=args['sample_frequency'])

                dh_empty.set_filepath(path_h5=args['path_h5'],
                                      fname=empty_name,
                                      appendix=False)

                csmpl_paths = [
                    args['csmpl_path'] + 'Ch' + str(c + 1) + '/' + 'Run' + args['run_nmbr'] + '_' + args['naming'] + '_' + fn + '_Ch' + str(
                        c + 1) + '.csmpl' for c in args['csmpl_channels']]

                # --------------------------------------------------
                # Include Test Pulse Time Stamps
                # --------------------------------------------------

                # include metadata
                dh_empty.init_empty()
                dh_empty.include_metainfo(args['rdt_path'] + args['naming'] + '_' + fn + '.par')

                dh_empty.include_test_stamps(path_teststamps=args['rdt_path'] + args['naming'] + '_' + fn + '.test_stamps',
                                             path_dig_stamps=args['rdt_path'] + args['naming'] + '_' + fn + '.dig_stamps',
                                      )

                # --------------------------------------------------
                # Include the Random Triggers Events
                # --------------------------------------------------

                dh_empty.include_noise_triggers(
                    nmbr=args['nmbr_events'],
                    min_distance=0.5,
                    max_distance=60,
                    max_attempts=5,
                    no_pileup=False,
                )

                dh_empty.include_noise_events(
                    csmpl_paths,
                    datatype='float32',
                )

                # ----------------------------------------------------------
                # Include OF, SEV, NPS
                # ----------------------------------------------------------


                dh_empty.include_sev(sev=xy_files['sev'],
                               fitpar=xy_files['sev_fitpar'],
                               mainpar=xy_files['sev_mainpar'])

                dh_empty.include_nps(nps=xy_files['nps'])

                dh_empty.include_of(of_real=np.real(xy_files['of']),
                              of_imag=np.imag(xy_files['of']))

                # for tp

                if 'sev_tp' in xy_files:

                    dh_empty.include_sev(sev=xy_files['sev_tp'],
                                   fitpar=xy_files['sev_tp_fitpar'],
                                   mainpar=xy_files['sev_tp_mainpar'],
                                   group_name_appendix='_tp')

                    dh_empty.include_of(of_real=np.real(xy_files['of_tp']),
                                  of_imag=np.imag(xy_files['of_tp']),
                                  group_name_appendix='_tp')

                # for direct hits

                if 'sev_direct' in xy_files:

                    dh_empty.include_sev(sev=xy_files['sev_direct'],
                                   fitpar=xy_files['sev_direct_fitpar'],
                                   mainpar=xy_files['sev_direct_mainpar'],
                                   group_name_appendix='_direct')

                if 'of_direct' in xy_files:

                    dh_empty.include_of(of_real=np.real(xy_files['of_direct']),
                                  of_imag=np.imag(xy_files['of_direct']),
                                  group_name_appendix='_direct')


                # --------------------------------------------------
                # Simulate Events
                # --------------------------------------------------

                dh_empty.calc_bl_coefficients()

                dh_empty.simulate_pulses(path_sim=args['path_h5'] + sim_name + '.h5',
                                      size_events=args['nmbr_events'],
                                      reuse_bl=True,
                                      ev_discrete_phs=discrete_ph,
                                      t0_interval=[-10, 0],  # in ms
                                      rms_thresholds=[1e5, 1e5],
                                      fake_noise=False)

                # --------------------------------------------------
                # Delete original empty set
                # --------------------------------------------------

                # Delete the empty bl hdf5 set
                del dh_empty
                print('Deleting {}.'.format(args['path_h5'] + empty_name + '.h5'))
                os.remove(args['path_h5'] + empty_name + '.h5')

            # --------------------------------------------------
            # Include data from PAR and XY files
            # --------------------------------------------------

            dh_sim = ai.DataHandler(channels=args['rdt_channels'],
                                    record_length=args['record_length'],
                                    sample_frequency=args['sample_frequency'])

            dh_sim.set_filepath(path_h5=args['path_h5'],
                                fname=sim_name,
                                appendix=False)

            if not args['no_features']:

                dh_sim.include_metainfo(args['rdt_path'] + args['naming'] + '_' + fn + '.par')

                dh_sim.include_sev(sev=xy_files['sev'],
                   fitpar=xy_files['sev_fitpar'],
                   mainpar=xy_files['sev_mainpar'])

                dh_sim.include_nps(nps=xy_files['nps'])

                dh_sim.include_of(of_real=np.real(xy_files['of']),
                              of_imag=np.imag(xy_files['of']))

                # for tp

                if 'sev_tp' in xy_files:

                    dh_sim.include_sev(sev=xy_files['sev_tp'],
                                   fitpar=xy_files['sev_tp_fitpar'],
                                   mainpar=xy_files['sev_tp_mainpar'],
                                   group_name_appendix='_tp')

                    dh_sim.include_of(of_real=np.real(xy_files['of_tp']),
                                  of_imag=np.imag(xy_files['of_tp']),
                                  group_name_appendix='_tp')

                # for direct hits

                if 'sev_direct' in xy_files:

                    dh_sim.include_sev(sev=xy_files['sev_direct'],
                                   fitpar=xy_files['sev_direct_fitpar'],
                                   mainpar=xy_files['sev_direct_mainpar'],
                                   group_name_appendix='_direct')

                if 'of_direct' in xy_files:

                    dh_sim.include_of(of_real=np.real(xy_files['of_direct']),
                                  of_imag=np.imag(xy_files['of_direct']),
                                  group_name_appendix='_direct')


                # --------------------------------------------------
                # Calc Parameters
                # --------------------------------------------------

                dh_sim.calc_mp(type='events')
                dh_sim.calc_additional_mp()
                dh_sim.apply_of()

                if 'of_direct' in xy_files:
                    dh_sim.apply_of(name_appendix_group='_direct', name_appendix_set='_direct')

                # get the sevs with the fit parameters

                t = dh_sim.record_window()
                sev_array = []
                for i,c in enumerate(args['rdt_channels']):
                    sev_array.append(ai.fit.pulse_template(t, *xy_files['sev_fitpar'][c]))

                if 'sev_direct' in xy_files:
                    sev_direct_array = []
                    for i,c in enumerate(args['rdt_channels']):
                        sev_direct_array.append(ai.fit.pulse_template(t, *xy_files['sev_direct_fitpar'][c]))

    #             # do the fits

    #             dh_sim.apply_array_fit(processes=args['processes'],
    #                                truncation_level=args['truncation_levels'],
    #                                first_channel_dominant=not args['uncorrelated'], use_this_array=sev_array)

    #             dh_sim.apply_array_fit(processes=args['processes'],
    #                    truncation_level=args['truncation_levels'],
    #                    first_channel_dominant=False, use_this_array=sev_array)

    #             # do the fit for the direct hits

    #             if 'sev_direct' in xy_files:
    #                 dh_sim.apply_array_fit(group_name_appendix = '_direct', name_appendix = '_direct',
    #                                    processes=args['processes'],
    #                                    truncation_level=args['truncation_levels'], only_channels=[1],
    #                                    use_this_array=sev_direct_array)

            if not args['no_ecal']:

                # --------------------------------------------------
                # Assign Energies
                # --------------------------------------------------

                with open(args['path_pulser_model'] + args['naming'] + '_' + fn + '.pm', 'rb') as f:
                    pm = pickle.load(f)

                dh_sim.calc_calibration(starts_saturation=args['max_height'],
                                        cpe_factor=args['cpe_factors'],
                                        plot=False,
                                        method='of',
                                        pulser_models=pm,
                                        name_appendix_energy='_reconstructed',
                                        use_interpolation=True,
                                        )

                dh_sim.calc_calibration(starts_saturation=args['max_height'],
                                        cpe_factor=args['cpe_factors'],
                                        plot=False,
                                        method='true_ph',
                                        pulser_models=pm,
                                        name_appendix_energy='_true',
                                        use_interpolation=True,
                                        )

            # --------------------------------------------------
            # Apply neural network and other cuts
            # --------------------------------------------------

            ckp_path = ai.resources.get_resource_path('cnn-clf-binary-v0.ckpt')

            for c in range(len(args['rdt_channels'])):

                for group in ['events']:

                    ai.models.nn_predict(h5_path=dh_sim.path_h5,
                               model=ai.models.CNNModule.load_from_checkpoint(ckp_path),
                               feature_channel=c,
                               group_name=group,
                               prediction_name='cnn_cut',
                               keys=['event'],
                               no_channel_idx_in_pred=False,
                               use_prob=False)

                    ai.models.nn_predict(h5_path=dh_sim.path_h5,
                               model=ai.models.CNNModule.load_from_checkpoint(ckp_path),
                               feature_channel=c,
                               group_name=group,
                               prediction_name='cnn_prob',
                               keys=['event'],
                               no_channel_idx_in_pred=False,
                               use_prob=True)

            # --------------------------------------------------
            # Delete Raw Events
            # --------------------------------------------------

            dh_sim.drop_raw_data(type='events')

        # --------------------------------------------------
        # Merge the files
        # --------------------------------------------------

        if i > 0 and args['merge']:

            merge_keywords_ = merge_keywords.copy()

            merge_keywords_['path_h5_a'] = args['path_h5'] + args['out_name'] + '_' + args['naming'] + '_{}.h5'.format(
                args['file_list'][0]) if i == 1 else args['path_h5'] + args['out_name'] + '_{:03d}.h5'.format(i - 1)
            merge_keywords_['a_name'] = args['out_name'] + '_' + args['naming'] + '_{}'.format(args['file_list'][0]) if i == 1 else 'keep'
            merge_keywords_['path_h5_b'] = args['path_h5'] + args['out_name'] + '_' + args['naming'] + '_{}.h5'.format(fn)
            merge_keywords_['b_name'] = args['out_name'] + '_' + args['naming'] + '_{}'.format(fn)
            merge_keywords_['path_h5_merged'] = args['path_h5'] + args['out_name'] + '_{:03d}.h5'.format(i)

            start_a = get_filestart(merge_keywords_['path_h5_a'], args)
            start_b = get_filestart(merge_keywords_['path_h5_b'], args)

            merge_keywords_['second_file_start'] = (start_b[0] + 1e-6*start_b[1] - start_a[0] - 1e-6*start_a[1])/3600

            ai.data.merge_h5_sets(verb=False,
                                  **merge_keywords_,
                                  )

    # ---------------------------------------------
    # Finishing Notes
    # ---------------------------------------------

    print('-----------------------------------------------------')
    print('>> DONE WITH ALL FILES.')