From 04edffc7d8c474a294c7ba53dabfff2dd9e9c93d Mon Sep 17 00:00:00 2001 From: AMHermansen <mail@andreashermansen.dk> Date: Sat, 3 Jun 2023 15:36:50 +0200 Subject: [PATCH 01/13] Changed distribution from None to "auto", which is the new pytorch_lightning default --- src/graphnet/models/model.py | 7 ++++--- src/graphnet/models/standard_model.py | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/graphnet/models/model.py b/src/graphnet/models/model.py index 5e95ae917..4c19b25d5 100644 --- a/src/graphnet/models/model.py +++ b/src/graphnet/models/model.py @@ -39,6 +39,7 @@ class Model(Logger, Configurable, LightningModule, ABC): log_every_n_steps: int = 1, gradient_clip_val: Optional[float] = None, distribution_strategy: Optional[str] = "ddp", + inference_strategy: Optional[str] = "auto", **trainer_kwargs: Any, ) -> None: @@ -70,7 +71,7 @@ class Model(Logger, Configurable, LightningModule, ABC): devices=inference_devices, callbacks=callbacks, logger=logger, - strategy=None, + strategy=inference_strategy, **trainer_kwargs, ) @@ -157,7 +158,7 @@ class Model(Logger, Configurable, LightningModule, ABC): self, dataloader: DataLoader, gpus: Optional[Union[List[int], int]] = None, - distribution_strategy: Optional[str] = None, + distribution_strategy: Optional[str] = "auto", ) -> List[Tensor]: """Return predictions for `dataloader`. @@ -195,7 +196,7 @@ class Model(Logger, Configurable, LightningModule, ABC): additional_attributes: Optional[List[str]] = None, index_column: str = "event_no", gpus: Optional[Union[List[int], int]] = None, - distribution_strategy: Optional[str] = None, + distribution_strategy: Optional[str] = "auto", ) -> pd.DataFrame: """Return predictions for `dataloader` as a DataFrame. diff --git a/src/graphnet/models/standard_model.py b/src/graphnet/models/standard_model.py index 41b70bb26..844f4f55b 100644 --- a/src/graphnet/models/standard_model.py +++ b/src/graphnet/models/standard_model.py @@ -179,7 +179,7 @@ class StandardModel(Model): self, dataloader: DataLoader, gpus: Optional[Union[List[int], int]] = None, - distribution_strategy: Optional[str] = None, + distribution_strategy: Optional[str] = "auto", ) -> List[Tensor]: """Return predictions for `dataloader`.""" self.inference() @@ -198,7 +198,7 @@ class StandardModel(Model): additional_attributes: Optional[List[str]] = None, index_column: str = "event_no", gpus: Optional[Union[List[int], int]] = None, - distribution_strategy: Optional[str] = None, + distribution_strategy: Optional[str] = "auto", ) -> pd.DataFrame: """Return predictions for `dataloader` as a DataFrame. -- GitLab From 428b184f5a2bfe8e9ec63fe1185e776b59e599c3 Mon Sep 17 00:00:00 2001 From: AMHermansen <mail@andreashermansen.dk> Date: Sat, 3 Jun 2023 15:40:20 +0200 Subject: [PATCH 02/13] Changed default devices from None to 1, since None is no longer allowed in pytorch_lightning --- src/graphnet/models/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/graphnet/models/model.py b/src/graphnet/models/model.py index 4c19b25d5..2fb1af4d9 100644 --- a/src/graphnet/models/model.py +++ b/src/graphnet/models/model.py @@ -48,7 +48,7 @@ class Model(Logger, Configurable, LightningModule, ABC): devices = gpus else: accelerator = "cpu" - devices = None + devices = 1 self._trainer = Trainer( accelerator=accelerator, -- GitLab From 9ea6172e6685029f1e5e40ed0700b6fcf4ad5510 Mon Sep 17 00:00:00 2001 From: AMHermansen <mail@andreashermansen.dk> Date: Sat, 3 Jun 2023 15:42:15 +0200 Subject: [PATCH 03/13] Changed main_progress_bar to train_progress_bar, since main_progress_bar no longer exists in pytorch_lightning >= 2.0 --- src/graphnet/training/callbacks.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/graphnet/training/callbacks.py b/src/graphnet/training/callbacks.py index 04d30933f..a66255ca6 100644 --- a/src/graphnet/training/callbacks.py +++ b/src/graphnet/training/callbacks.py @@ -123,12 +123,12 @@ class ProgressBar(TQDMProgressBar): lightning is to overwrite the progress bar from previous epochs. """ if trainer.current_epoch > 0: - self.main_progress_bar.set_postfix( + self.train_progress_bar.set_postfix( self.get_metrics(trainer, model) ) print("") super().on_train_epoch_start(trainer, model) - self.main_progress_bar.set_description( + self.train_progress_bar.set_description( f"Epoch {trainer.current_epoch:2d}" ) @@ -150,5 +150,5 @@ class ProgressBar(TQDMProgressBar): assert isinstance(h, logging.StreamHandler) level = h.level h.setLevel(logging.ERROR) - logger.info(str(super().main_progress_bar)) + logger.info(str(super().train_progress_bar)) h.setLevel(level) -- GitLab From 4b1f5f55da3e137f8a3f375fceae6b2a41c063ec Mon Sep 17 00:00:00 2001 From: AMHermansen <mail@andreashermansen.dk> Date: Sat, 3 Jun 2023 15:51:14 +0200 Subject: [PATCH 04/13] Added prefetch_factor in the subclass constructor to prevent prefatch_factor from being passed twice to the base class constructor (default value, and through **kwargs). --- src/graphnet/data/dataloader.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/graphnet/data/dataloader.py b/src/graphnet/data/dataloader.py index b199f5865..1ded6fa37 100644 --- a/src/graphnet/data/dataloader.py +++ b/src/graphnet/data/dataloader.py @@ -34,6 +34,7 @@ class DataLoader(torch.utils.data.DataLoader): num_workers: int = 10, persistent_workers: bool = True, collate_fn: Callable = collate_fn, + prefetch_factor: int = 2, **kwargs: Any, ) -> None: """Construct `DataLoader`.""" @@ -45,7 +46,7 @@ class DataLoader(torch.utils.data.DataLoader): num_workers=num_workers, collate_fn=collate_fn, persistent_workers=persistent_workers, - prefetch_factor=2, + prefetch_factor=prefetch_factor, **kwargs, ) -- GitLab From 8527a59f71b34e3097814bf4ec9eaa84f025d62e Mon Sep 17 00:00:00 2001 From: AMHermansen <mail@andreashermansen.dk> Date: Sat, 3 Jun 2023 15:51:40 +0200 Subject: [PATCH 05/13] Added manaul test for None type, since those were not caught in the try-except expression. --- src/graphnet/data/dataset.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/graphnet/data/dataset.py b/src/graphnet/data/dataset.py index a0110988b..0feaf6d6b 100644 --- a/src/graphnet/data/dataset.py +++ b/src/graphnet/data/dataset.py @@ -591,6 +591,12 @@ class Dataset(Logger, Configurable, torch.utils.data.Dataset, ABC): add_these_to_graph.append(node_truth_dict) for write_dict in add_these_to_graph: for key, value in write_dict.items(): + if value is None: + self.debug( + f"Could not assign `{key}` with type 'None' " + f"as attribute to graph." + ) + continue try: graph[key] = torch.tensor(value) except TypeError: -- GitLab From ce87bd057b120038005770cd8b5da514a4c733b5 Mon Sep 17 00:00:00 2001 From: AMHermansen <mail@andreashermansen.dk> Date: Sat, 3 Jun 2023 15:53:39 +0200 Subject: [PATCH 06/13] Updated package requirements for pytorch and pytorch lightning. --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index b262f0fa4..8850bc253 100644 --- a/setup.py +++ b/setup.py @@ -47,12 +47,12 @@ EXTRAS_REQUIRE = { "versioneer", ], "torch": [ - "torch>=1.11", + "torch>=2.0", "torch-cluster>=1.6", "torch-scatter>=2.0", "torch-sparse>=0.6", "torch-geometric>=2.0", - "pytorch-lightning>=1.6, <2.0", + "pytorch-lightning>=2.0", ], } -- GitLab From b73954017f0aee88843dd8ed5b1c12cd9f8d1652 Mon Sep 17 00:00:00 2001 From: AMHermansen <mail@andreashermansen.dk> Date: Thu, 15 Jun 2023 16:13:41 +0200 Subject: [PATCH 07/13] Refactored _construct_trainer to return a trainer, instead of creating a trainer in the class. This removes all references to the object itself in _construct_trainer and as such, it is also made into a static method. --- src/graphnet/models/model.py | 49 ++++++++++++++---------------------- 1 file changed, 19 insertions(+), 30 deletions(-) diff --git a/src/graphnet/models/model.py b/src/graphnet/models/model.py index 2fb1af4d9..00acf9109 100644 --- a/src/graphnet/models/model.py +++ b/src/graphnet/models/model.py @@ -29,8 +29,8 @@ class Model(Logger, Configurable, LightningModule, ABC): def forward(self, x: Union[Tensor, Data]) -> Union[Tensor, Data]: """Forward pass.""" - def _construct_trainers( - self, + @staticmethod + def _construct_trainer( max_epochs: int = 10, gpus: Optional[Union[List[int], int]] = None, callbacks: Optional[List[Callback]] = None, @@ -39,9 +39,8 @@ class Model(Logger, Configurable, LightningModule, ABC): log_every_n_steps: int = 1, gradient_clip_val: Optional[float] = None, distribution_strategy: Optional[str] = "ddp", - inference_strategy: Optional[str] = "auto", **trainer_kwargs: Any, - ) -> None: + ) -> Trainer: if gpus: accelerator = "gpu" @@ -50,7 +49,7 @@ class Model(Logger, Configurable, LightningModule, ABC): accelerator = "cpu" devices = 1 - self._trainer = Trainer( + trainer = Trainer( accelerator=accelerator, devices=devices, max_epochs=max_epochs, @@ -59,21 +58,11 @@ class Model(Logger, Configurable, LightningModule, ABC): logger=logger, gradient_clip_val=gradient_clip_val, strategy=distribution_strategy, + default_root_dir=ckpt_path, **trainer_kwargs, ) - inference_devices = devices - if isinstance(inference_devices, list): - inference_devices = inference_devices[:1] - - self._inference_trainer = Trainer( - accelerator=accelerator, - devices=inference_devices, - callbacks=callbacks, - logger=logger, - strategy=inference_strategy, - **trainer_kwargs, - ) + return trainer def fit( self, @@ -102,7 +91,7 @@ class Model(Logger, Configurable, LightningModule, ABC): ) self.train(mode=True) - self._construct_trainers( + trainer = self._construct_trainer( max_epochs=max_epochs, gpus=gpus, callbacks=callbacks, @@ -115,7 +104,7 @@ class Model(Logger, Configurable, LightningModule, ABC): ) try: - self._trainer.fit( + trainer.fit( self, train_dataloader, val_dataloader, ckpt_path=ckpt_path ) except KeyboardInterrupt: @@ -166,17 +155,17 @@ class Model(Logger, Configurable, LightningModule, ABC): """ self.train(mode=False) - if not hasattr(self, "_inference_trainer"): - self._construct_trainers( - gpus=gpus, distribution_strategy=distribution_strategy - ) - elif gpus is not None: - self.warning( - "A `Trainer` instance has already been constructed, possibly " - "when the model was trained. Will use this to get predictions. " - f"Argument `gpus = {gpus}` will be ignored." - ) - predictions_list = self._inference_trainer.predict(self, dataloader) + callbacks = self._create_default_callbacks( + val_dataloader=None, + ) + + inference_trainer = self._construct_trainer( + gpus=gpus, + distribution_strategy=distribution_strategy, + callbacks=callbacks, + ) + + predictions_list = inference_trainer.predict(self, dataloader) assert len(predictions_list), "Got no predictions" nb_outputs = len(predictions_list[0]) -- GitLab From 746061e09d4def625c953e05faab798aab858563 Mon Sep 17 00:00:00 2001 From: AMHermansen <mail@andreashermansen.dk> Date: Sat, 17 Jun 2023 18:56:58 +0200 Subject: [PATCH 08/13] I was getting an error for torch-geometric 2.0.0, and since the PR will break backwards compatibility anyway I figured it was an easier change to require torch-geometric>=2.1 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 8850bc253..a9dcd4540 100644 --- a/setup.py +++ b/setup.py @@ -51,7 +51,7 @@ EXTRAS_REQUIRE = { "torch-cluster>=1.6", "torch-scatter>=2.0", "torch-sparse>=0.6", - "torch-geometric>=2.0", + "torch-geometric>=2.1", "pytorch-lightning>=2.0", ], } -- GitLab From dfa410f9f9c49292806408f50cec6192f4ddcd62 Mon Sep 17 00:00:00 2001 From: AMHermansen <mail@andreashermansen.dk> Date: Sat, 17 Jun 2023 18:59:42 +0200 Subject: [PATCH 09/13] Updated requirements. --- requirements/torch_cpu.txt | 9 ++------- requirements/torch_gpu.txt | 7 +------ requirements/torch_macos.txt | 7 +------ 3 files changed, 4 insertions(+), 19 deletions(-) diff --git a/requirements/torch_cpu.txt b/requirements/torch_cpu.txt index 76f533905..6f68e3600 100644 --- a/requirements/torch_cpu.txt +++ b/requirements/torch_cpu.txt @@ -1,7 +1,2 @@ ---find-links https://download.pytorch.org/whl/torch_stable.html -torch==1.11+cpu ---find-links https://data.pyg.org/whl/torch-1.11.0+cpu.html -torch-cluster==1.6.0 -torch_scatter==2.0.9 -torch-sparse==0.6.13 -torch_geometric==2.0.4 \ No newline at end of file +--find-links https://download.pytorch.org/whl/cpu +--find-links https://data.pyg.org/whl/torch-2.0.0+cpu.html \ No newline at end of file diff --git a/requirements/torch_gpu.txt b/requirements/torch_gpu.txt index 4004fd8af..c325f35af 100644 --- a/requirements/torch_gpu.txt +++ b/requirements/torch_gpu.txt @@ -1,8 +1,3 @@ # Contains packages recommended for functional performance --find-links https://download.pytorch.org/whl/torch_stable.html -torch==1.11+cu115 ---find-links https://data.pyg.org/whl/torch-1.11.0+cu115.html -torch-cluster==1.6.0 -torch_scatter==2.0.9 -torch-sparse==0.6.13 -torch_geometric==2.0.4 \ No newline at end of file +--find-links https://data.pyg.org/whl/torch-2.0.0+cu117.html diff --git a/requirements/torch_macos.txt b/requirements/torch_macos.txt index a9e43921c..be7a35257 100644 --- a/requirements/torch_macos.txt +++ b/requirements/torch_macos.txt @@ -1,7 +1,2 @@ --find-links https://download.pytorch.org/whl/torch_stable.html -torch==1.11 ---find-links https://data.pyg.org/whl/torch-1.11.0+cpu.html -torch-cluster==1.6.0 -torch_scatter==2.0.9 -torch-sparse==0.6.13 -torch_geometric==2.0.4 \ No newline at end of file +--find-links https://data.pyg.org/whl/torch-2.0.0+cpu.html \ No newline at end of file -- GitLab From f0a909fe637be142229dad23dd8e9845e2d4aea5 Mon Sep 17 00:00:00 2001 From: AMHermansen <mail@andreashermansen.dk> Date: Sat, 17 Jun 2023 19:00:26 +0200 Subject: [PATCH 10/13] Revert "Added manaul test for None type, since those were not caught in the try-except expression." This reverts commit 8527a59f71b34e3097814bf4ec9eaa84f025d62e. --- src/graphnet/data/dataset.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/graphnet/data/dataset.py b/src/graphnet/data/dataset.py index 0feaf6d6b..a0110988b 100644 --- a/src/graphnet/data/dataset.py +++ b/src/graphnet/data/dataset.py @@ -591,12 +591,6 @@ class Dataset(Logger, Configurable, torch.utils.data.Dataset, ABC): add_these_to_graph.append(node_truth_dict) for write_dict in add_these_to_graph: for key, value in write_dict.items(): - if value is None: - self.debug( - f"Could not assign `{key}` with type 'None' " - f"as attribute to graph." - ) - continue try: graph[key] = torch.tensor(value) except TypeError: -- GitLab From 50703304ffa8c3bff567e28307ea665356663cdb Mon Sep 17 00:00:00 2001 From: AMHermansen <97125645+AMHermansen@users.noreply.github.com> Date: Thu, 22 Jun 2023 12:20:14 +0200 Subject: [PATCH 11/13] Removed python 3.7 from setup --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index a9dcd4540..67cf25023 100644 --- a/setup.py +++ b/setup.py @@ -61,7 +61,6 @@ CLASSIFIERS = [ "Development Status :: 3 - Alpha", "Intended Audience :: Developers", "Intended Audience :: Science/Research", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", -- GitLab From 5d53779c61a6715e1fb1d6f7ded7f6119a2091fb Mon Sep 17 00:00:00 2001 From: AMHermansen <mail@andreashermansen.dk> Date: Mon, 26 Jun 2023 10:50:06 +0200 Subject: [PATCH 12/13] Removed python 3.7 from build.yml --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b840a4c0c..d36ff97d8 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -73,7 +73,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.7, 3.8, 3.9, '3.10'] + python-version: [3.8, 3.9, '3.10'] steps: - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} -- GitLab From 1d3b45d4ceba0c5f25694dd5ed8b31544af4f07c Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe <rahn@outlook.dk> Date: Mon, 26 Jun 2023 12:47:47 +0200 Subject: [PATCH 13/13] debug --- .github/workflows/build.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d36ff97d8..00f59c0bd 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -46,8 +46,10 @@ jobs: - uses: actions/checkout@v3 - name: Upgrade packages already installed on icecube/icetray run: | + python --version pip install --upgrade astropy # Installed version incompatible with numpy 1.23.0 [https://github.com/astropy/astropy/issues/12534] pip install --ignore-installed PyYAML # Distutils installed [https://github.com/pypa/pip/issues/5247] + pip install --upgrade psutil # lets see.. - name: Install package uses: ./.github/actions/install with: -- GitLab