issue with concatenate
Hi @sreck and @dguderian,
I am trying to concatenate the ML files I have converted with the master branch of OrcaSong.
However, I get this error:
Checking 185 files ...
Valid input files: 185/185
Datasets: event_info, group_info, x, x_indices, y
Total rows: 792239
complib: gzip
complevel: 1
chunksize: 32
shuffle: True
Creating file /sps/km3net/users/adomi/GNNs/training/Muons_vs_Neutrinos_time_window2.h5
Processing file 1/185: /sps/km3net/users/adomi/GNNs/files_for_training_time_window/ML_mcv5.1.genhen_anueNC.km3_AAv1.jte.jchain.aashower.120.h5
Creating dataset 'event_info' with shape (792239,)
Creating dataset 'group_info' with shape (792239,)
Creating dataset 'x' with shape (792239, 5000, 16)
Creating dataset 'x_indices' with shape (792239,)
Creating dataset 'y' with shape (792239,)
Processing file 2/185: /sps/km3net/users/adomi/GNNs/files_for_training_time_window/ML_mcv5.1.genhen_anueNC.km3_AAv1.jte.jchain.aashower.121.h5
Processing file 3/185: /sps/km3net/users/adomi/GNNs/files_for_training_time_window/ML_mcv5.1.genhen_anueNC.km3_AAv1.jte.jchain.aashower.168.h5
Processing file 4/185: /sps/km3net/users/adomi/GNNs/files_for_training_time_window/ML_mcv5.1.genhen_anumuNC.km3_AAv1.jte.jchain.aashower.2.h5
Traceback (most recent call last):
File "/pbs/home/a/adomi/mypython/bin/concatenate", line 11, in <module>
load_entry_point('orcasong', 'console_scripts', 'concatenate')()
File "/sps/km3net/users/adomi/GNNs/OrcaSong/orcasong/tools/concatenate.py", line 353, in main
append_used_files=not parsed_args.no_used_files,
File "/sps/km3net/users/adomi/GNNs/OrcaSong/orcasong/tools/concatenate.py", line 95, in concatenate
self._conc_file(f_in, f_out, input_file, input_file_nmbr)
File "/sps/km3net/users/adomi/GNNs/OrcaSong/orcasong/tools/concatenate.py", line 163, in _conc_file
self.cumu_rows[input_file_nmbr]:self.cumu_rows[input_file_nmbr + 1]] = folder_data
File "h5py/_objects.pyx", line 54, in h5py._objects.with_phil.wrapper
File "h5py/_objects.pyx", line 55, in h5py._objects.with_phil.wrapper
File "/pbs/home/a/adomi/mypython/lib/python3.7/site-packages/h5py/_hl/dataset.py", line 707, in __setitem__
for fspace in selection.broadcast(mshape):
File "/pbs/home/a/adomi/mypython/lib/python3.7/site-packages/h5py/_hl/selections.py", line 299, in broadcast
raise TypeError("Can't broadcast %s -> %s" % (target_shape, self.mshape))
TypeError: Can't broadcast (7921, 5000, 17) -> (7921, 5000, 16)
I had a look at the file /sps/km3net/users/adomi/GNNs/files_for_training_time_window/ML_mcv5.1.genhen_anumuNC.km3_AAv1.jte.jchain.aashower.2.h5
and I see:
/event_info (Table(7921,), fletcher32, shuffle, zlib(1)) 'EventInfo'
/group_info (Table(7921,), fletcher32, shuffle, zlib(1)) 'Group Info'
/x (EArray(7921, 5000, 17), fletcher32, shuffle, zlib(1)) 'nodes'
/x_indices (Table(7921,), fletcher32, shuffle, zlib(1)) 'Indices'
/y (Table(7921,), fletcher32, shuffle, zlib(1)) 'event_info'
or, more in detail:
/ (RootGroup) 'KM3NeT'
/event_info (Table(7921,), fletcher32, shuffle, zlib(1)) 'EventInfo'
description := {
"det_id": Int64Col(shape=(), dflt=0, pos=0),
"event_id": Int64Col(shape=(), dflt=0, pos=1),
"frame_index": Int64Col(shape=(), dflt=0, pos=2),
"mc_run_id": Int64Col(shape=(), dflt=0, pos=3),
"mc_time": Float64Col(shape=(), dflt=0.0, pos=4),
"nanoseconds": Int64Col(shape=(), dflt=0, pos=5),
"overlays": Int64Col(shape=(), dflt=0, pos=6),
"run_id": Int64Col(shape=(), dflt=0, pos=7),
"timestamp": Int64Col(shape=(), dflt=0, pos=8),
"trigger_counter": Int64Col(shape=(), dflt=0, pos=9),
"trigger_mask": Int64Col(shape=(), dflt=0, pos=10),
"weight_w1": Float64Col(shape=(), dflt=0.0, pos=11),
"weight_w2": Float64Col(shape=(), dflt=0.0, pos=12),
"weight_w3": Float64Col(shape=(), dflt=0.0, pos=13),
"weight_w4": Float64Col(shape=(), dflt=0.0, pos=14),
"group_id": Int64Col(shape=(), dflt=0, pos=15),
"n_hits_intime": Int64Col(shape=(), dflt=0, pos=16)}
byteorder := 'little'
chunkshape := (32,)
autoindex := True
colindexes := {
"event_id": Index(6, medium, shuffle, zlib(1)).is_csi=False,
"group_id": Index(6, medium, shuffle, zlib(1)).is_csi=False}
/group_info (Table(7921,), fletcher32, shuffle, zlib(1)) 'Group Info'
description := {
"blob_length": Int64Col(shape=(), dflt=0, pos=0),
"group_id": Int64Col(shape=(), dflt=0, pos=1)}
byteorder := 'little'
chunkshape := (32,)
autoindex := True
colindexes := {
"group_id": Index(6, medium, shuffle, zlib(1)).is_csi=False}
/x (EArray(7921, 5000, 17), fletcher32, shuffle, zlib(1)) 'nodes'
atom := Float32Atom(shape=(), dflt=0.0)
maindim := 0
flavor := 'numpy'
byteorder := 'little'
chunkshape := (32, 5000, 17)
/x_indices (Table(7921,), fletcher32, shuffle, zlib(1)) 'Indices'
description := {
"index": Int64Col(shape=(), dflt=0, pos=0),
"n_items": Int64Col(shape=(), dflt=0, pos=1)}
byteorder := 'little'
chunkshape := (32,)
/y (Table(7921,), fletcher32, shuffle, zlib(1)) 'event_info'
description := {
"event_id": Int64Col(shape=(), dflt=0, pos=0),
"particle_type": Float64Col(shape=(), dflt=0.0, pos=1),
"energy": Float64Col(shape=(), dflt=0.0, pos=2),
"is_cc": Float64Col(shape=(), dflt=0.0, pos=3),
"bjorkeny": Float64Col(shape=(), dflt=0.0, pos=4),
"dir_x": Float64Col(shape=(), dflt=0.0, pos=5),
"dir_y": Float64Col(shape=(), dflt=0.0, pos=6),
"dir_z": Float64Col(shape=(), dflt=0.0, pos=7),
"time_interaction": Float64Col(shape=(), dflt=0.0, pos=8),
"run_id": Float64Col(shape=(), dflt=0.0, pos=9),
"vertex_pos_x": Float64Col(shape=(), dflt=0.0, pos=10),
"vertex_pos_y": Float64Col(shape=(), dflt=0.0, pos=11),
"vertex_pos_z": Float64Col(shape=(), dflt=0.0, pos=12),
"n_hits": Float64Col(shape=(), dflt=0.0, pos=13),
"weight_w1": Float64Col(shape=(), dflt=0.0, pos=14),
"weight_w2": Float64Col(shape=(), dflt=0.0, pos=15),
"weight_w3": Float64Col(shape=(), dflt=0.0, pos=16),
"n_gen": Float64Col(shape=(), dflt=0.0, pos=17),
"jmuon_AASHOWERFIT_ENERGY": Float64Col(shape=(), dflt=0.0, pos=18),
"jmuon_AASHOWERFIT_NUMBER_OF_HITS": Float64Col(shape=(), dflt=0.0, pos=19),
"jmuon_E": Float64Col(shape=(), dflt=0.0, pos=20),
"jmuon_JCOPY_Z_M": Float64Col(shape=(), dflt=0.0, pos=21),
"jmuon_JENERGY_CHI2": Float64Col(shape=(), dflt=0.0, pos=22),
"jmuon_JENERGY_ENERGY": Float64Col(shape=(), dflt=0.0, pos=23),
"jmuon_JENERGY_MUON_RANGE_METRES": Float64Col(shape=(), dflt=0.0, pos=24),
"jmuon_JENERGY_NDF": Float64Col(shape=(), dflt=0.0, pos=25),
"jmuon_JENERGY_NOISE_LIKELIHOOD": Float64Col(shape=(), dflt=0.0, pos=26),
"jmuon_JENERGY_NUMBER_OF_HITS": Float64Col(shape=(), dflt=0.0, pos=27),
"jmuon_JGANDALF_BETA0_RAD": Float64Col(shape=(), dflt=0.0, pos=28),
"jmuon_JGANDALF_BETA1_RAD": Float64Col(shape=(), dflt=0.0, pos=29),
"jmuon_JGANDALF_CHI2": Float64Col(shape=(), dflt=0.0, pos=30),
"jmuon_JGANDALF_COVERAGE_ORIENTATION": Float64Col(shape=(), dflt=0.0, pos=31),
"jmuon_JGANDALF_COVERAGE_POSITION": Float64Col(shape=(), dflt=0.0, pos=32),
"jmuon_JGANDALF_LAMBDA": Float64Col(shape=(), dflt=0.0, pos=33),
"jmuon_JGANDALF_NUMBER_OF_HITS": Float64Col(shape=(), dflt=0.0, pos=34),
"jmuon_JGANDALF_NUMBER_OF_ITERATIONS": Float64Col(shape=(), dflt=0.0, pos=35),
"jmuon_JSHOWERFIT_ENERGY": Float64Col(shape=(), dflt=0.0, pos=36),
"jmuon_JSTART_LENGTH_METRES": Float64Col(shape=(), dflt=0.0, pos=37),
"jmuon_JSTART_NPE_MIP": Float64Col(shape=(), dflt=0.0, pos=38),
"jmuon_JSTART_NPE_MIP_TOTAL": Float64Col(shape=(), dflt=0.0, pos=39),
"jmuon_JVETO_NPE": Float64Col(shape=(), dflt=0.0, pos=40),
"jmuon_JVETO_NUMBER_OF_HITS": Float64Col(shape=(), dflt=0.0, pos=41),
"jmuon_dir_x": Float64Col(shape=(), dflt=0.0, pos=42),
"jmuon_dir_y": Float64Col(shape=(), dflt=0.0, pos=43),
"jmuon_dir_z": Float64Col(shape=(), dflt=0.0, pos=44),
"jmuon_length": Float64Col(shape=(), dflt=0.0, pos=45),
"jmuon_likelihood": Float64Col(shape=(), dflt=0.0, pos=46),
"jmuon_pos_x": Float64Col(shape=(), dflt=0.0, pos=47),
"jmuon_pos_y": Float64Col(shape=(), dflt=0.0, pos=48),
"jmuon_pos_z": Float64Col(shape=(), dflt=0.0, pos=49),
"jmuon_rec_type": Float64Col(shape=(), dflt=0.0, pos=50),
"jmuon_t": Float64Col(shape=(), dflt=0.0, pos=51),
"jmuon_group_id": Float64Col(shape=(), dflt=0.0, pos=52),
"aashower_AASHOWERFIT_ENERGY": Float64Col(shape=(), dflt=0.0, pos=53),
"aashower_AASHOWERFIT_NUMBER_OF_HITS": Float64Col(shape=(), dflt=0.0, pos=54),
"aashower_E": Float64Col(shape=(), dflt=0.0, pos=55),
"aashower_JCOPY_Z_M": Float64Col(shape=(), dflt=0.0, pos=56),
"aashower_JENERGY_CHI2": Float64Col(shape=(), dflt=0.0, pos=57),
"aashower_JENERGY_ENERGY": Float64Col(shape=(), dflt=0.0, pos=58),
"aashower_JENERGY_MUON_RANGE_METRES": Float64Col(shape=(), dflt=0.0, pos=59),
"aashower_JENERGY_NDF": Float64Col(shape=(), dflt=0.0, pos=60),
"aashower_JENERGY_NOISE_LIKELIHOOD": Float64Col(shape=(), dflt=0.0, pos=61),
"aashower_JENERGY_NUMBER_OF_HITS": Float64Col(shape=(), dflt=0.0, pos=62),
"aashower_JGANDALF_BETA0_RAD": Float64Col(shape=(), dflt=0.0, pos=63),
"aashower_JGANDALF_BETA1_RAD": Float64Col(shape=(), dflt=0.0, pos=64),
"aashower_JGANDALF_CHI2": Float64Col(shape=(), dflt=0.0, pos=65),
"aashower_JGANDALF_COVERAGE_ORIENTATION": Float64Col(shape=(), dflt=0.0, pos=66),
"aashower_JGANDALF_COVERAGE_POSITION": Float64Col(shape=(), dflt=0.0, pos=67),
"aashower_JGANDALF_LAMBDA": Float64Col(shape=(), dflt=0.0, pos=68),
"aashower_JGANDALF_NUMBER_OF_HITS": Float64Col(shape=(), dflt=0.0, pos=69),
"aashower_JGANDALF_NUMBER_OF_ITERATIONS": Float64Col(shape=(), dflt=0.0, pos=70),
"aashower_JSHOWERFIT_ENERGY": Float64Col(shape=(), dflt=0.0, pos=71),
"aashower_JSTART_LENGTH_METRES": Float64Col(shape=(), dflt=0.0, pos=72),
"aashower_JSTART_NPE_MIP": Float64Col(shape=(), dflt=0.0, pos=73),
"aashower_JSTART_NPE_MIP_TOTAL": Float64Col(shape=(), dflt=0.0, pos=74),
"aashower_JVETO_NPE": Float64Col(shape=(), dflt=0.0, pos=75),
"aashower_JVETO_NUMBER_OF_HITS": Float64Col(shape=(), dflt=0.0, pos=76),
"aashower_dir_x": Float64Col(shape=(), dflt=0.0, pos=77),
"aashower_dir_y": Float64Col(shape=(), dflt=0.0, pos=78),
"aashower_dir_z": Float64Col(shape=(), dflt=0.0, pos=79),
"aashower_length": Float64Col(shape=(), dflt=0.0, pos=80),
"aashower_likelihood": Float64Col(shape=(), dflt=0.0, pos=81),
"aashower_pos_x": Float64Col(shape=(), dflt=0.0, pos=82),
"aashower_pos_y": Float64Col(shape=(), dflt=0.0, pos=83),
"aashower_pos_z": Float64Col(shape=(), dflt=0.0, pos=84),
"aashower_rec_type": Float64Col(shape=(), dflt=0.0, pos=85),
"aashower_t": Float64Col(shape=(), dflt=0.0, pos=86),
"aashower_group_id": Float64Col(shape=(), dflt=0.0, pos=87),
"group_id": Int64Col(shape=(), dflt=0, pos=88)}
byteorder := 'little'
chunkshape := (32,)
autoindex := True
colindexes := {
"event_id": Index(6, medium, shuffle, zlib(1)).is_csi=False,
"group_id": Index(6, medium, shuffle, zlib(1)).is_csi=False}
I don't understand how to fix the problem. Do you have an idea?