diff --git a/setup.cfg b/setup.cfg index 2dfa61819f1101e341e61622bf5cec59a6e38394..f2d50619d6c48238dfec03ea6001f78e5d28df39 100644 --- a/setup.cfg +++ b/setup.cfg @@ -38,6 +38,7 @@ install_requires = importlib_resources prettytable astropy + setuptools setuptools_scm uproot python_requires = >=3.6 diff --git a/src/km3irf/build_aeff.py b/src/km3irf/build_aeff.py index 98175012f42c5f978361dacbfe4e5a767fa00961..23890a6e4fffa36d5b7dca19c408ba17344e6082 100644 --- a/src/km3irf/build_aeff.py +++ b/src/km3irf/build_aeff.py @@ -15,11 +15,11 @@ import astropy.units as u # from gammapy.irf import EnergyDispersion2D from scipy.stats import binned_statistic -# from scipy.ndimage import gaussian_filter1d, gaussian_filter +# from scipy.ndimage import gaussian_filter1d, gaussian_filter -# from collections import defaultdict +# from collections import defaultdict # import sys # sys.path.append('../') @@ -28,71 +28,68 @@ from scipy.stats import binned_statistic # from python_scripts.func import WriteAeff # from python_scripts.func import WritePSF + def build_aeff( - input=(file_nu, file_nubar), - no_bdt=False, - cuts=False, - power_degree=-2.5 - ): + input=(file_nu, file_nubar), no_bdt=False, cuts=False, power_degree=-2.5 +): """ - Create Aeff .fits from dist files + Create Aeff .fits from dist files - input: a tuple with pathes to nu_dst file and nubar_dst file + input: a tuple with pathes to nu_dst file and nubar_dst file - no_bdt: include or exclude bdt, default False + no_bdt: include or exclude bdt, default False - cuts: apply cuts, default False + cuts: apply cuts, default False - power_degree: re-weight data, default value -2.5 + power_degree: re-weight data, default value -2.5 """ - #Read data files using km3io + # Read data files using km3io f_nu_km3io = OfflineReader(input[0]) f_nubar_km3io = OfflineReader(input[1]) - #Read data files using uproot + # Read data files using uproot f_nu_uproot = ur.open(input[0]) f_nubar_uproot = ur.open(input[1]) def unpack_data(no_bdt): """ - some words + some words - return tuple with two pandas data frames (nu, nubar) + return tuple with two pandas data frames (nu, nubar) """ # Access data arrays data_km3io = dict() - for l,f in zip(['nu', 'nubar'], [f_nu_km3io, f_nubar_km3io]): + for l, f in zip(["nu", "nubar"], [f_nu_km3io, f_nubar_km3io]): data_km3io[l] = dict() - data_km3io[l]['E'] = f.tracks.E[:,0] - data_km3io[l]['dir_x'] = f.tracks.dir_x[:,0] - data_km3io[l]['dir_y'] = f.tracks.dir_y[:,0] - data_km3io[l]['dir_z'] = f.tracks.dir_z[:,0] + data_km3io[l]["E"] = f.tracks.E[:, 0] + data_km3io[l]["dir_x"] = f.tracks.dir_x[:, 0] + data_km3io[l]["dir_y"] = f.tracks.dir_y[:, 0] + data_km3io[l]["dir_z"] = f.tracks.dir_z[:, 0] - data_km3io[l]['energy_mc'] = f.mc_tracks.E[:,0] - data_km3io[l]['dir_x_mc'] = f.mc_tracks.dir_x[:,0] - data_km3io[l]['dir_y_mc'] = f.mc_tracks.dir_y[:,0] - data_km3io[l]['dir_z_mc'] = f.mc_tracks.dir_z[:,0] + data_km3io[l]["energy_mc"] = f.mc_tracks.E[:, 0] + data_km3io[l]["dir_x_mc"] = f.mc_tracks.dir_x[:, 0] + data_km3io[l]["dir_y_mc"] = f.mc_tracks.dir_y[:, 0] + data_km3io[l]["dir_z_mc"] = f.mc_tracks.dir_z[:, 0] - data_km3io[l]['weight_w2'] = f.w[:,1] - + data_km3io[l]["weight_w2"] = f.w[:, 1] - #extracting bdt information + # extracting bdt information if not no_bdt: - for l,f in zip(['nu', 'nubar'], [f_nu_uproot, f_nubar_uproot]): - T = f['T;1'] - bdt = T['bdt'].array() - data_km3io[l]['bdt0'] = bdt[:,0] - data_km3io[l]['bdt1'] = bdt[:,1] - + for l, f in zip(["nu", "nubar"], [f_nu_uproot, f_nubar_uproot]): + T = f["T;1"] + bdt = T["bdt"].array() + data_km3io[l]["bdt0"] = bdt[:, 0] + data_km3io[l]["bdt1"] = bdt[:, 1] + # create Data Frames - df_nu = pd.DataFrame(data_km3io['nu']) - df_nubar = pd.DataFrame(data_km3io['nubar']) + df_nu = pd.DataFrame(data_km3io["nu"]) + df_nubar = pd.DataFrame(data_km3io["nubar"]) data_tuple = (df_nu, df_nubar) - return data_tuple \ No newline at end of file + return data_tuple diff --git a/src/km3irf/irf_tools.py b/src/km3irf/irf_tools.py index 4b0c50240d5de3b2ec63d7a55809e02a08f02741..85d37d7b11289fa4217679c05ce48d74cba88a62 100644 --- a/src/km3irf/irf_tools.py +++ b/src/km3irf/irf_tools.py @@ -3,12 +3,13 @@ import numpy as np import pandas as pd import numba as nb -from numba import jit, prange +from numba import jit, prange from km3pipe.math import azimuth, zenith import astropy.coordinates as ac from astropy.time import Time import astropy.units as u + def calc_theta(table, mc=True): """ Calculate the zenith angle theta of events given the direction coordinates x,y,z @@ -25,56 +26,59 @@ def calc_theta(table, mc=True): """ if not mc: - dir_x=table['dir_x'].to_numpy() - dir_y=table['dir_y'].to_numpy() - dir_z=table['dir_z'].to_numpy() + dir_x = table["dir_x"].to_numpy() + dir_y = table["dir_y"].to_numpy() + dir_z = table["dir_z"].to_numpy() else: - dir_x=table['dir_x_mc'].to_numpy() - dir_y=table['dir_y_mc'].to_numpy() - dir_z=table['dir_z_mc'].to_numpy() + dir_x = table["dir_x_mc"].to_numpy() + dir_y = table["dir_y_mc"].to_numpy() + dir_z = table["dir_z_mc"].to_numpy() - nu_directions=np.vstack([dir_x, dir_y, dir_z]).T + nu_directions = np.vstack([dir_x, dir_y, dir_z]).T - theta = zenith(nu_directions) # zenith angles in rad [0:pi] + theta = zenith(nu_directions) # zenith angles in rad [0:pi] return theta def edisp_3D(e_bins, m_bins, t_bins, dataset, weights=1): """ - Calculate the 3-dimensional energy dispersion matrix. - This is just a historgram with the simulated events. - The normalization needs to be done afterwards. + Calculate the 3-dimensional energy dispersion matrix. + This is just a historgram with the simulated events. + The normalization needs to be done afterwards. - e_bins: Array of energy bins in GeV + e_bins: Array of energy bins in GeV - m_bins: Array of energy migration bins (reconstructed energy / true energy) + m_bins: Array of energy migration bins (reconstructed energy / true energy) - t_bins: Array of zenith angle bins in rad + t_bins: Array of zenith angle bins in rad - dataset: pandas.Dataframe with the events + dataset: pandas.Dataframe with the events - weights: Array of weights for each event + weights: Array of weights for each event - ------ + ------ - Returns: 3-dimensional energy dispersion matrix. Binned in energy, energy migration and zenith angle + Returns: 3-dimensional energy dispersion matrix. Binned in energy, energy migration and zenith angle """ - if 'theta_mc' not in dataset.keys(): - dataset['theta_mc'] = calc_theta(dataset, mc=True) - if 'migra' not in dataset.keys(): - dataset['migra'] = dataset.E / dataset.energy_mc + if "theta_mc" not in dataset.keys(): + dataset["theta_mc"] = calc_theta(dataset, mc=True) + if "migra" not in dataset.keys(): + dataset["migra"] = dataset.E / dataset.energy_mc theta_bins = pd.cut(dataset.theta_mc, t_bins, labels=False).to_numpy() energy_bins = pd.cut(dataset.energy_mc, e_bins, labels=False).to_numpy() migra_bins = pd.cut(dataset.migra, m_bins, labels=False).to_numpy() - edisp = fill_edisp_3D(e_bins, m_bins, t_bins, energy_bins, migra_bins, theta_bins, weights) + edisp = fill_edisp_3D( + e_bins, m_bins, t_bins, energy_bins, migra_bins, theta_bins, weights + ) return edisp + @jit(nopython=True, fastmath=False, parallel=True) def fill_edisp_3D(e_bins, m_bins, t_bins, energy_bins, migra_bins, theta_bins, weights): """ @@ -82,70 +86,79 @@ def fill_edisp_3D(e_bins, m_bins, t_bins, energy_bins, migra_bins, theta_bins, w Needed because numba does not work with pandas but needs numpy arrays. fastmath is disabled because it gives different results. """ - - edisp = np.zeros((len(t_bins)-1, len(m_bins)-1, len(e_bins)-1)) - for i in prange(len(t_bins)-1): - for j in range(len(m_bins)-1): - for k in range(len(e_bins)-1): + + edisp = np.zeros((len(t_bins) - 1, len(m_bins) - 1, len(e_bins) - 1)) + for i in prange(len(t_bins) - 1): + for j in range(len(m_bins) - 1): + for k in range(len(e_bins) - 1): mask = (energy_bins == k) & (migra_bins == j) & (theta_bins == i) - edisp[i,j,k] = np.sum(mask * weights) + edisp[i, j, k] = np.sum(mask * weights) return edisp def psf_3D(e_bins, r_bins, t_bins, dataset, weights=1): """ - Calculate the 3-dimensional PSF matrix. - This is a historgram with the simulated evenets. - The normalization needs to be done afterwards. + Calculate the 3-dimensional PSF matrix. + This is a historgram with the simulated evenets. + The normalization needs to be done afterwards. - e_bins: Array of energy bins in GeV + e_bins: Array of energy bins in GeV - r_bins: Array of radial bins in deg (angle between true and reconstructed direction) + r_bins: Array of radial bins in deg (angle between true and reconstructed direction) - t_bins: Array of zenith angle bins in rad + t_bins: Array of zenith angle bins in rad - dataset: pandas.Dataframe with the events + dataset: pandas.Dataframe with the events - weights: Array of weights for each event + weights: Array of weights for each event - ------ + ------ - Returns: 3-dimensional PSF matrix. Binned in energy, zenith angle and radial bins + Returns: 3-dimensional PSF matrix. Binned in energy, zenith angle and radial bins """ - if 'theta_mc' not in dataset.keys(): - dataset['theta_mc'] = calc_theta(dataset, mc=True) + if "theta_mc" not in dataset.keys(): + dataset["theta_mc"] = calc_theta(dataset, mc=True) - scalar_prod = dataset.dir_x*dataset.dir_x_mc + dataset.dir_y*dataset.dir_y_mc + dataset.dir_z*dataset.dir_z_mc - scalar_prod[scalar_prod>1.0] = 1.0 - rad = np.arccos(scalar_prod) * 180/np.pi # in deg now - dataset['rad'] = rad + scalar_prod = ( + dataset.dir_x * dataset.dir_x_mc + + dataset.dir_y * dataset.dir_y_mc + + dataset.dir_z * dataset.dir_z_mc + ) + scalar_prod[scalar_prod > 1.0] = 1.0 + rad = np.arccos(scalar_prod) * 180 / np.pi # in deg now + dataset["rad"] = rad theta_bins = pd.cut(dataset.theta_mc, t_bins, labels=False).to_numpy() energy_bins = pd.cut(dataset.energy_mc, e_bins, labels=False).to_numpy() rad_bins = pd.cut(rad, r_bins, labels=False).to_numpy() - psf = fill_psf_3D(e_bins, r_bins, t_bins, energy_bins, rad_bins, theta_bins, weights) + psf = fill_psf_3D( + e_bins, r_bins, t_bins, energy_bins, rad_bins, theta_bins, weights + ) return psf -@jit(nopython=True, fastmath=False, parallel=True) # do not use fastmath=True -- gives 300 aditional entries and no gain + +@jit( + nopython=True, fastmath=False, parallel=True +) # do not use fastmath=True -- gives 300 aditional entries and no gain def fill_psf_3D(e_bins, r_bins, t_bins, energy_bins, rad_bins, theta_bins, weights): - psf = np.zeros((len(r_bins)-1, len(t_bins)-1, len(e_bins)-1)) - for j in prange(len(r_bins)-1): - for i in range(len(t_bins)-1): - for k in range(len(e_bins)-1): + psf = np.zeros((len(r_bins) - 1, len(t_bins) - 1, len(e_bins) - 1)) + for j in prange(len(r_bins) - 1): + for i in range(len(t_bins) - 1): + for k in range(len(e_bins) - 1): mask = (energy_bins == k) & (rad_bins == j) & (theta_bins == i) - psf[j,i,k] = np.sum(mask * weights) + psf[j, i, k] = np.sum(mask * weights) return psf def aeff_2D(e_bins, t_bins, dataset, gamma=1.4, nevents=2e7): """ - Calculate the effective area in energy and zenith angle bins. + Calculate the effective area in energy and zenith angle bins. e_bins: Array of energy bins in GeV @@ -163,8 +176,8 @@ def aeff_2D(e_bins, t_bins, dataset, gamma=1.4, nevents=2e7): """ - if 'theta_mc' not in dataset.keys(): - dataset['theta_mc'] = calc_theta(dataset, mc=True) + if "theta_mc" not in dataset.keys(): + dataset["theta_mc"] = calc_theta(dataset, mc=True) theta_bins = pd.cut(dataset.theta_mc, t_bins, labels=False).to_numpy() energy_bins = pd.cut(dataset.energy_mc, e_bins, labels=False).to_numpy() @@ -175,15 +188,20 @@ def aeff_2D(e_bins, t_bins, dataset, gamma=1.4, nevents=2e7): return aeff + @jit(nopython=True, fastmath=False, parallel=True) def fill_aeff_2D(e_bins, t_bins, energy_bins, theta_bins, w2, E, gamma, nevents): - T = 365*24*3600 - aeff = np.empty((len(e_bins)-1, len(t_bins)-1)) - for k in prange(len(e_bins)-1): - for i in range(len(t_bins)-1): + T = 365 * 24 * 3600 + aeff = np.empty((len(e_bins) - 1, len(t_bins) - 1)) + for k in prange(len(e_bins) - 1): + for i in range(len(t_bins) - 1): mask = (energy_bins == k) & (theta_bins == i) - d_omega = -(np.cos(t_bins[i+1]) - np.cos(t_bins[i])) - d_E = (e_bins[k+1])**(1-gamma) - (e_bins[k])**(1-gamma) - aeff[k,i] = (1-gamma) * np.sum(E[mask]**(-gamma)*w2[mask]) / (T*d_omega*d_E*nevents*2*np.pi) + d_omega = -(np.cos(t_bins[i + 1]) - np.cos(t_bins[i])) + d_E = (e_bins[k + 1]) ** (1 - gamma) - (e_bins[k]) ** (1 - gamma) + aeff[k, i] = ( + (1 - gamma) + * np.sum(E[mask] ** (-gamma) * w2[mask]) + / (T * d_omega * d_E * nevents * 2 * np.pi) + ) return aeff diff --git a/src/km3irf/utils.py b/src/km3irf/utils.py index 7a0d6978499a087455680102ea8927de06f2f8eb..2b01fe7065bcd07dbd67e2da7b29feef0b89dc1f 100644 --- a/src/km3irf/utils.py +++ b/src/km3irf/utils.py @@ -36,7 +36,7 @@ def merge_fits( edisp_fits: path to Edisp .fits file bkg_fits: path to Background .fits file - + output_file: name of the merged .fits file in data foledr of the package """ hdu_list = [] diff --git a/tests/test_calc.py b/tests/test_calc.py index 4a88655dd6899817aef89b07839e8b25fa692ccb..6b94211e8d4134159fa30780b6b44b84861c8c86 100644 --- a/tests/test_calc.py +++ b/tests/test_calc.py @@ -5,10 +5,7 @@ from os import path, listdir, curdir, remove import astropy from km3irf import Calculator -from km3irf.utils import ( - merge_fits, - list_data -) +from km3irf.utils import merge_fits, list_data class TestCalculator(unittest.TestCase): @@ -40,6 +37,7 @@ class TestCalculator(unittest.TestCase): for b in range(100): assert 0 == self.calculator.multiply(0, b) + class TestUtils(unittest.TestCase): def setUp(self): self.test_path = path.join(path.abspath(curdir), "src", "km3irf", "data") @@ -49,4 +47,4 @@ class TestUtils(unittest.TestCase): assert "all_in_one.fits" in listdir(self.test_path) def test_list_data(self): - assert len(list_data()) == len(listdir(self.test_path)) \ No newline at end of file + assert len(list_data()) == len(listdir(self.test_path))