*************************
Efficiency Simulation
*************************

.. code:: python

    """
    efficiency_simulation.py

    The main script for an efficiency simulation, based on CSMPL files.

    All parameters are explained when you run the script with -h flag. You need to have the file trigger_utils in the same directory.

    """

    # ---------------------------------------------
    # Imports
    # ---------------------------------------------

    import cait as ai
    import matplotlib.pyplot as plt
    import numpy as np
    from tqdm.auto import tqdm
    import argparse
    import pickle
    import os
    from trigger_utils import read_hw_data, get_filestart

    if __name__ == '__main__':

        # ---------------------------------------------
        # Read command line arguments
        # ---------------------------------------------

        # Construct the argument parser
        ap = argparse.ArgumentParser()

        # Add the REQUIRED arguments to the parser
        ap.add_argument("-a", "--name_stream_h5", type=str, help="Name of the stream HDF5 file, e.g. stream_008.")
        ap.add_argument("-l", "--file_list", type=str, nargs='+', default=None, help="List of the file numbers.")
        ap.add_argument("-p", "--rdt_path", type=str, help="Path to hardware data, e.g. /eos/vbc/group/darkmatter/cresst/gsdata/hwtrig/Run36/bck/")
        ap.add_argument("-s", "--csmpl_path", type=str, help="Path to stream data, e.g. /eos/vbc/group/darkmatter/cresst/gsdata/cstream/Run36/data/")
        ap.add_argument("-x", "--xy_files", type=str, help="Path to the folder that contains filter, nps, sev, sev mainpar and sev fitpar files.")
        ap.add_argument("-y", "--path_pulser_model", type=str, help="Path to the pulser models - the naming must be <path_you_put>_<file_nmbr>.pm ; e.g. /.../Li1_040.pm")
        ap.add_argument("-z", "--path_h5", type=str, help="Path where the HDF5 files should be stored.")

        # Add the OPTIONAL arguments to the parser
        ap.add_argument("-b", "--nmbr_events", type=int, default=40000, help="Number of events to simulate per file.")
        ap.add_argument("-c", "--rdt_channels", type=int, default=[0, 1],  nargs='+', help="List of the rdt channels of this module, e.g. 12 13.")
        ap.add_argument("-d", "--csmpl_channels", type=int, default=[0, 1], nargs='+', help="List of the csmpl channels of this module, e.g. 12 13.")
        ap.add_argument("-e", "--max_height", type=float, default=[3.5, 2.5],  nargs='+', help="Maximal event height to simulate.")
        ap.add_argument("-f", "--min_height", type=float, default=[0.0015, 0.0035],  nargs='+', help="Minimal event height to simualte.")
        ap.add_argument("-g", "--no_features", action='store_true', help="Skip the calculation of features and do only the triggering (not recommended).")
        ap.add_argument("-i", "--run_nmbr", type=str, default='36', help="Number of the Run, e.g. 36.")
        ap.add_argument("-k", "--no_simulation", action='store_true', help="Skip the file simulation, do only the feature calculations.")
        ap.add_argument("--no_ecal", action='store_true', help="Skip the energy calibration, e.g. if you dont have CPE factors.")
        ap.add_argument("-m", "--merge", action='store_true', help="Only merge the files list. You need to call this once you converted all the files.")
        ap.add_argument("-n", "--naming", type=str, default='bck', help="Naming of the csmpl stream files, e.g. bcl or ncal.")
        ap.add_argument("-o", "--out_name", type=str, default='efficiency', help="Naming of the output file, e.g. efficiency. This will always be combined with the naming, to obtain unique file names.")
        ap.add_argument("-q", "--sample_frequency", type=int, default=25000, help="The sample frequency.")
        ap.add_argument("-r", "--record_length", type=int, default=16384, help="The length of the record window.")
        ap.add_argument("-t", "--trigger_thresholds", type=float, default=[5.428, 11.017], nargs='+', help="List of the trigger threshold for all channels in mV, e.g. 5.428 11.017.")
        ap.add_argument("-u", "--truncation_levels", type=float, default=[0.9, 1.5], nargs='+', help="List of the truncation levels for all channels in V, e.g. 0.9 1.5.")
        ap.add_argument("-v", "--processes", type=int, default=4, help="The number of processes to use for the sev fit.")
        ap.add_argument("-w", "--uncorrelated", action='store_true', help="Do the height evaluations uncorrelated, if not activated the first channel is dominant, i.e. evaluate the height in the other channels at the maximum position of the first channel.")
        ap.add_argument("--cpe_factors", type=float, default=[2.956196019429851, 18.24242689975974],  nargs='+', help="List of the CPE factors for all channels.")

        args = vars(ap.parse_args())

        # ---------------------------------------------
        # Constants and Paths
        # ---------------------------------------------

        THRESHOLDS = np.array(args['trigger_thresholds']) * 0.001

        discrete_ph = np.array([np.logspace(start=np.log10(mi), stop=np.log10(ma), num=args['nmbr_events']) for mi,ma in zip(args['min_height'], args['max_height'])])

        datasets = {
            # 'event': 1,
            'mainpar': 1,
            'add_mainpar': 1,
            'true_ph': 1,
            'true_onset': 0,
            'of_ph': 1,
            'of_ph_direct': 1,
            'arr_fit_par': 1,
            'arr_fit_rms': 1,
            'arr_fit_par_direct': 1,
            'arr_fit_rms_direct': 1,
            'hours': 0,
            'labels': 1,
            'testpulseamplitude': 0,
            'time_s': 0,
            'time_mus': 0,
            'pulse_height': 1,
            'tp_hours': 0,
            'tp_time_mus': 0,
            'tp_time_s': 0,
            'tpa': 0,
            'trigger_hours': 0,
            'trigger_time_mus': 0,
            'trigger_time_s': 0,
            'recoil_energy_true': 1,
            'recoil_energy_sigma_true': 1,
            'tpa_equivalent_true': 1,
            'tpa_equivalent_sigma_true': 1,
            'recoil_energy_reconstructed': 1,
            'recoil_energy_sigma_reconstructed': 1,
            'tpa_equivalent_reconstructed': 1,
            'tpa_equivalent_sigma_reconstructed': 1,
            'cnn_cut': 1,
            'cnn_prob': 1,
            'start_s': -1,
            'start_mus': -1,
            'stop_s': -1,
            'stop_mus': -1,
            'sample_frequency': -1,
            'record_length': -1,
            'runtime': -1,
                   }

        merge_keywords = {
            'groups_to_merge': ['events', 'stream', 'metainfo'],
            'sets_to_merge': list(datasets.keys()),
            'concatenate_axis': list(datasets.values()),
            'continue_hours': True,
            'keep_original_files': True,
            'groups_from_a': ['optimumfilter', 'optimumfilter_tp', 'optimumfilter_direct', 'stdevent', 'stdevent_tp', 'stdevent_direct', 'noise'],
                         }

        # ---------------------------------------------
        # Get Handle to Stream Data
        # ---------------------------------------------

        dh_stream = ai.DataHandler(channels=args['rdt_channels'],
                                   record_length=args['record_length'],
                                   sample_frequency=args['sample_frequency'])

        dh_stream.set_filepath(path_h5=args['path_h5'],
                               fname=args['name_stream_h5'],
                               appendix=False)

        start_hours = dh_stream.get('metainfo', 'startstop_hours')[:, 0]

        # ---------------------------------------------
        # Get Infos from HW Data
        # ---------------------------------------------

        xy_files = read_hw_data(args)

        # ---------------------------------------------
        # Start the Loop
        # ---------------------------------------------

        for i, fn in enumerate(args['file_list']):

            print('-----------------------------------------------------')
            print('>> {} WORKING ON FILE: {}'.format(i, fn))

            if not args['merge']:
                empty_name = 'empty_' + args['naming'] + '_' + fn
                sim_name = args['out_name'] + '_' + args['naming'] + '_' + fn

                if not args['no_simulation']:

                    dh_empty = ai.DataHandler(channels=args['rdt_channels'],
                                              record_length=args['record_length'],
                                              sample_frequency=args['sample_frequency'])

                    dh_empty.set_filepath(path_h5=args['path_h5'],
                                          fname=empty_name,
                                          appendix=False)

                    csmpl_paths = [
                        args['csmpl_path'] + 'Ch' + str(c + 1) + '/' + 'Run' + args['run_nmbr'] + '_' + args['naming'] + '_' + fn + '_Ch' + str(
                            c + 1) + '.csmpl' for c in args['csmpl_channels']]

                    # --------------------------------------------------
                    # Include Test Pulse Time Stamps
                    # --------------------------------------------------

                    # include metadata
                    dh_empty.init_empty()
                    dh_empty.include_metainfo(args['rdt_path'] + args['naming'] + '_' + fn + '.par')

                    dh_empty.include_test_stamps(path_teststamps=args['rdt_path'] + args['naming'] + '_' + fn + '.test_stamps',
                                                 path_dig_stamps=args['rdt_path'] + args['naming'] + '_' + fn + '.dig_stamps',
                                          )

                    # --------------------------------------------------
                    # Include the Random Triggers Events
                    # --------------------------------------------------

                    dh_empty.include_noise_triggers(
                        nmbr=args['nmbr_events'],
                        min_distance=0.5,
                        max_distance=60,
                        max_attempts=5,
                        no_pileup=False,
                    )

                    dh_empty.include_noise_events(
                        csmpl_paths,
                        datatype='float32',
                    )

                    # ----------------------------------------------------------
                    # Include OF, SEV, NPS
                    # ----------------------------------------------------------


                    dh_empty.include_sev(sev=xy_files['sev'],
                                   fitpar=xy_files['sev_fitpar'],
                                   mainpar=xy_files['sev_mainpar'])

                    dh_empty.include_nps(nps=xy_files['nps'])

                    dh_empty.include_of(of_real=np.real(xy_files['of']),
                                  of_imag=np.imag(xy_files['of']))

                    # for tp

                    if 'sev_tp' in xy_files:

                        dh_empty.include_sev(sev=xy_files['sev_tp'],
                                       fitpar=xy_files['sev_tp_fitpar'],
                                       mainpar=xy_files['sev_tp_mainpar'],
                                       group_name_appendix='_tp')

                        dh_empty.include_of(of_real=np.real(xy_files['of_tp']),
                                      of_imag=np.imag(xy_files['of_tp']),
                                      group_name_appendix='_tp')

                    # for direct hits

                    if 'sev_direct' in xy_files:

                        dh_empty.include_sev(sev=xy_files['sev_direct'],
                                       fitpar=xy_files['sev_direct_fitpar'],
                                       mainpar=xy_files['sev_direct_mainpar'],
                                       group_name_appendix='_direct')

                    if 'of_direct' in xy_files:

                        dh_empty.include_of(of_real=np.real(xy_files['of_direct']),
                                      of_imag=np.imag(xy_files['of_direct']),
                                      group_name_appendix='_direct')


                    # --------------------------------------------------
                    # Simulate Events
                    # --------------------------------------------------

                    dh_empty.calc_bl_coefficients()

                    dh_empty.simulate_pulses(path_sim=args['path_h5'] + sim_name + '.h5',
                                          size_events=args['nmbr_events'],
                                          reuse_bl=True,
                                          ev_discrete_phs=discrete_ph,
                                          t0_interval=[-10, 0],  # in ms
                                          rms_thresholds=[1e5, 1e5],
                                          fake_noise=False)

                    # --------------------------------------------------
                    # Delete original empty set
                    # --------------------------------------------------

                    # Delete the empty bl hdf5 set
                    del dh_empty
                    print('Deleting {}.'.format(args['path_h5'] + empty_name + '.h5'))
                    os.remove(args['path_h5'] + empty_name + '.h5')

                # --------------------------------------------------
                # Include data from PAR and XY files
                # --------------------------------------------------

                dh_sim = ai.DataHandler(channels=args['rdt_channels'],
                                        record_length=args['record_length'],
                                        sample_frequency=args['sample_frequency'])

                dh_sim.set_filepath(path_h5=args['path_h5'],
                                    fname=sim_name,
                                    appendix=False)

                if not args['no_features']:

                    dh_sim.include_metainfo(args['rdt_path'] + args['naming'] + '_' + fn + '.par')

                    dh_sim.include_sev(sev=xy_files['sev'],
                       fitpar=xy_files['sev_fitpar'],
                       mainpar=xy_files['sev_mainpar'])

                    dh_sim.include_nps(nps=xy_files['nps'])

                    dh_sim.include_of(of_real=np.real(xy_files['of']),
                                  of_imag=np.imag(xy_files['of']))

                    # for tp

                    if 'sev_tp' in xy_files:

                        dh_sim.include_sev(sev=xy_files['sev_tp'],
                                       fitpar=xy_files['sev_tp_fitpar'],
                                       mainpar=xy_files['sev_tp_mainpar'],
                                       group_name_appendix='_tp')

                        dh_sim.include_of(of_real=np.real(xy_files['of_tp']),
                                      of_imag=np.imag(xy_files['of_tp']),
                                      group_name_appendix='_tp')

                    # for direct hits

                    if 'sev_direct' in xy_files:

                        dh_sim.include_sev(sev=xy_files['sev_direct'],
                                       fitpar=xy_files['sev_direct_fitpar'],
                                       mainpar=xy_files['sev_direct_mainpar'],
                                       group_name_appendix='_direct')

                    if 'of_direct' in xy_files:

                        dh_sim.include_of(of_real=np.real(xy_files['of_direct']),
                                      of_imag=np.imag(xy_files['of_direct']),
                                      group_name_appendix='_direct')


                    # --------------------------------------------------
                    # Calc Parameters
                    # --------------------------------------------------

                    dh_sim.calc_mp(type='events')
                    dh_sim.calc_additional_mp()
                    dh_sim.apply_of()

                    if 'of_direct' in xy_files:
                        dh_sim.apply_of(name_appendix_group='_direct', name_appendix_set='_direct')

                    # get the sevs with the fit parameters

                    t = dh_sim.record_window()
                    sev_array = []
                    for i,c in enumerate(args['rdt_channels']):
                        sev_array.append(ai.fit.pulse_template(t, *xy_files['sev_fitpar'][c]))

                    if 'sev_direct' in xy_files:
                        sev_direct_array = []
                        for i,c in enumerate(args['rdt_channels']):
                            sev_direct_array.append(ai.fit.pulse_template(t, *xy_files['sev_direct_fitpar'][c]))

        #             # do the fits

        #             dh_sim.apply_array_fit(processes=args['processes'],
        #                                truncation_level=args['truncation_levels'],
        #                                first_channel_dominant=not args['uncorrelated'], use_this_array=sev_array)

        #             dh_sim.apply_array_fit(processes=args['processes'],
        #                    truncation_level=args['truncation_levels'],
        #                    first_channel_dominant=False, use_this_array=sev_array)

        #             # do the fit for the direct hits

        #             if 'sev_direct' in xy_files:
        #                 dh_sim.apply_array_fit(group_name_appendix = '_direct', name_appendix = '_direct',
        #                                    processes=args['processes'],
        #                                    truncation_level=args['truncation_levels'], only_channels=[1],
        #                                    use_this_array=sev_direct_array)

                if not args['no_ecal']:

                    # --------------------------------------------------
                    # Assign Energies
                    # --------------------------------------------------

                    with open(args['path_pulser_model'] + args['naming'] + '_' + fn + '.pm', 'rb') as f:
                        pm = pickle.load(f)

                    dh_sim.calc_calibration(starts_saturation=args['max_height'],
                                            cpe_factor=args['cpe_factors'],
                                            plot=False,
                                            method='of',
                                            pulser_models=pm,
                                            name_appendix_energy='_reconstructed',
                                            use_interpolation=True,
                                            )

                    dh_sim.calc_calibration(starts_saturation=args['max_height'],
                                            cpe_factor=args['cpe_factors'],
                                            plot=False,
                                            method='true_ph',
                                            pulser_models=pm,
                                            name_appendix_energy='_true',
                                            use_interpolation=True,
                                            )

                # --------------------------------------------------
                # Apply neural network and other cuts
                # --------------------------------------------------

                ckp_path = ai.resources.get_resource_path('cnn-clf-binary-v0.ckpt')

                for c in range(len(args['rdt_channels'])):

                    for group in ['events']:

                        ai.models.nn_predict(h5_path=dh_sim.path_h5,
                                   model=ai.models.CNNModule.load_from_checkpoint(ckp_path),
                                   feature_channel=c,
                                   group_name=group,
                                   prediction_name='cnn_cut',
                                   keys=['event'],
                                   no_channel_idx_in_pred=False,
                                   use_prob=False)

                        ai.models.nn_predict(h5_path=dh_sim.path_h5,
                                   model=ai.models.CNNModule.load_from_checkpoint(ckp_path),
                                   feature_channel=c,
                                   group_name=group,
                                   prediction_name='cnn_prob',
                                   keys=['event'],
                                   no_channel_idx_in_pred=False,
                                   use_prob=True)

                # --------------------------------------------------
                # Delete Raw Events
                # --------------------------------------------------

                dh_sim.drop_raw_data(type='events')

            # --------------------------------------------------
            # Merge the files
            # --------------------------------------------------

            if i > 0 and args['merge']:

                merge_keywords_ = merge_keywords.copy()

                merge_keywords_['path_h5_a'] = args['path_h5'] + args['out_name'] + '_' + args['naming'] + '_{}.h5'.format(
                    args['file_list'][0]) if i == 1 else args['path_h5'] + args['out_name'] + '_{:03d}.h5'.format(i - 1)
                merge_keywords_['a_name'] = args['out_name'] + '_' + args['naming'] + '_{}'.format(args['file_list'][0]) if i == 1 else 'keep'
                merge_keywords_['path_h5_b'] = args['path_h5'] + args['out_name'] + '_' + args['naming'] + '_{}.h5'.format(fn)
                merge_keywords_['b_name'] = args['out_name'] + '_' + args['naming'] + '_{}'.format(fn)
                merge_keywords_['path_h5_merged'] = args['path_h5'] + args['out_name'] + '_{:03d}.h5'.format(i)

                start_a = get_filestart(merge_keywords_['path_h5_a'], args)
                start_b = get_filestart(merge_keywords_['path_h5_b'], args)

                merge_keywords_['second_file_start'] = (start_b[0] + 1e-6*start_b[1] - start_a[0] - 1e-6*start_a[1])/3600

                ai.data.merge_h5_sets(verb=False,
                                      **merge_keywords_,
                                      )

        # ---------------------------------------------
        # Finishing Notes
        # ---------------------------------------------

        print('-----------------------------------------------------')
        print('>> DONE WITH ALL FILES.')