From 04edffc7d8c474a294c7ba53dabfff2dd9e9c93d Mon Sep 17 00:00:00 2001
From: AMHermansen <mail@andreashermansen.dk>
Date: Sat, 3 Jun 2023 15:36:50 +0200
Subject: [PATCH 01/13] Changed distribution from None to "auto", which is the
 new pytorch_lightning default

---
 src/graphnet/models/model.py          | 7 ++++---
 src/graphnet/models/standard_model.py | 4 ++--
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/graphnet/models/model.py b/src/graphnet/models/model.py
index 5e95ae917..4c19b25d5 100644
--- a/src/graphnet/models/model.py
+++ b/src/graphnet/models/model.py
@@ -39,6 +39,7 @@ class Model(Logger, Configurable, LightningModule, ABC):
         log_every_n_steps: int = 1,
         gradient_clip_val: Optional[float] = None,
         distribution_strategy: Optional[str] = "ddp",
+        inference_strategy: Optional[str] = "auto",
         **trainer_kwargs: Any,
     ) -> None:
 
@@ -70,7 +71,7 @@ class Model(Logger, Configurable, LightningModule, ABC):
             devices=inference_devices,
             callbacks=callbacks,
             logger=logger,
-            strategy=None,
+            strategy=inference_strategy,
             **trainer_kwargs,
         )
 
@@ -157,7 +158,7 @@ class Model(Logger, Configurable, LightningModule, ABC):
         self,
         dataloader: DataLoader,
         gpus: Optional[Union[List[int], int]] = None,
-        distribution_strategy: Optional[str] = None,
+        distribution_strategy: Optional[str] = "auto",
     ) -> List[Tensor]:
         """Return predictions for `dataloader`.
 
@@ -195,7 +196,7 @@ class Model(Logger, Configurable, LightningModule, ABC):
         additional_attributes: Optional[List[str]] = None,
         index_column: str = "event_no",
         gpus: Optional[Union[List[int], int]] = None,
-        distribution_strategy: Optional[str] = None,
+        distribution_strategy: Optional[str] = "auto",
     ) -> pd.DataFrame:
         """Return predictions for `dataloader` as a DataFrame.
 
diff --git a/src/graphnet/models/standard_model.py b/src/graphnet/models/standard_model.py
index 41b70bb26..844f4f55b 100644
--- a/src/graphnet/models/standard_model.py
+++ b/src/graphnet/models/standard_model.py
@@ -179,7 +179,7 @@ class StandardModel(Model):
         self,
         dataloader: DataLoader,
         gpus: Optional[Union[List[int], int]] = None,
-        distribution_strategy: Optional[str] = None,
+        distribution_strategy: Optional[str] = "auto",
     ) -> List[Tensor]:
         """Return predictions for `dataloader`."""
         self.inference()
@@ -198,7 +198,7 @@ class StandardModel(Model):
         additional_attributes: Optional[List[str]] = None,
         index_column: str = "event_no",
         gpus: Optional[Union[List[int], int]] = None,
-        distribution_strategy: Optional[str] = None,
+        distribution_strategy: Optional[str] = "auto",
     ) -> pd.DataFrame:
         """Return predictions for `dataloader` as a DataFrame.
 
-- 
GitLab


From 428b184f5a2bfe8e9ec63fe1185e776b59e599c3 Mon Sep 17 00:00:00 2001
From: AMHermansen <mail@andreashermansen.dk>
Date: Sat, 3 Jun 2023 15:40:20 +0200
Subject: [PATCH 02/13] Changed default devices from None to 1, since None is
 no longer allowed in pytorch_lightning

---
 src/graphnet/models/model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/graphnet/models/model.py b/src/graphnet/models/model.py
index 4c19b25d5..2fb1af4d9 100644
--- a/src/graphnet/models/model.py
+++ b/src/graphnet/models/model.py
@@ -48,7 +48,7 @@ class Model(Logger, Configurable, LightningModule, ABC):
             devices = gpus
         else:
             accelerator = "cpu"
-            devices = None
+            devices = 1
 
         self._trainer = Trainer(
             accelerator=accelerator,
-- 
GitLab


From 9ea6172e6685029f1e5e40ed0700b6fcf4ad5510 Mon Sep 17 00:00:00 2001
From: AMHermansen <mail@andreashermansen.dk>
Date: Sat, 3 Jun 2023 15:42:15 +0200
Subject: [PATCH 03/13] Changed main_progress_bar to train_progress_bar, since
 main_progress_bar no longer exists in pytorch_lightning >= 2.0

---
 src/graphnet/training/callbacks.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/graphnet/training/callbacks.py b/src/graphnet/training/callbacks.py
index 04d30933f..a66255ca6 100644
--- a/src/graphnet/training/callbacks.py
+++ b/src/graphnet/training/callbacks.py
@@ -123,12 +123,12 @@ class ProgressBar(TQDMProgressBar):
         lightning is to overwrite the progress bar from previous epochs.
         """
         if trainer.current_epoch > 0:
-            self.main_progress_bar.set_postfix(
+            self.train_progress_bar.set_postfix(
                 self.get_metrics(trainer, model)
             )
             print("")
         super().on_train_epoch_start(trainer, model)
-        self.main_progress_bar.set_description(
+        self.train_progress_bar.set_description(
             f"Epoch {trainer.current_epoch:2d}"
         )
 
@@ -150,5 +150,5 @@ class ProgressBar(TQDMProgressBar):
             assert isinstance(h, logging.StreamHandler)
             level = h.level
             h.setLevel(logging.ERROR)
-            logger.info(str(super().main_progress_bar))
+            logger.info(str(super().train_progress_bar))
             h.setLevel(level)
-- 
GitLab


From 4b1f5f55da3e137f8a3f375fceae6b2a41c063ec Mon Sep 17 00:00:00 2001
From: AMHermansen <mail@andreashermansen.dk>
Date: Sat, 3 Jun 2023 15:51:14 +0200
Subject: [PATCH 04/13] Added prefetch_factor in the subclass constructor to
 prevent prefatch_factor from being passed twice to the base class constructor
 (default value, and through **kwargs).

---
 src/graphnet/data/dataloader.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/graphnet/data/dataloader.py b/src/graphnet/data/dataloader.py
index b199f5865..1ded6fa37 100644
--- a/src/graphnet/data/dataloader.py
+++ b/src/graphnet/data/dataloader.py
@@ -34,6 +34,7 @@ class DataLoader(torch.utils.data.DataLoader):
         num_workers: int = 10,
         persistent_workers: bool = True,
         collate_fn: Callable = collate_fn,
+        prefetch_factor: int = 2,
         **kwargs: Any,
     ) -> None:
         """Construct `DataLoader`."""
@@ -45,7 +46,7 @@ class DataLoader(torch.utils.data.DataLoader):
             num_workers=num_workers,
             collate_fn=collate_fn,
             persistent_workers=persistent_workers,
-            prefetch_factor=2,
+            prefetch_factor=prefetch_factor,
             **kwargs,
         )
 
-- 
GitLab


From 8527a59f71b34e3097814bf4ec9eaa84f025d62e Mon Sep 17 00:00:00 2001
From: AMHermansen <mail@andreashermansen.dk>
Date: Sat, 3 Jun 2023 15:51:40 +0200
Subject: [PATCH 05/13] Added manaul test for None type, since those were not
 caught in the try-except expression.

---
 src/graphnet/data/dataset.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/graphnet/data/dataset.py b/src/graphnet/data/dataset.py
index a0110988b..0feaf6d6b 100644
--- a/src/graphnet/data/dataset.py
+++ b/src/graphnet/data/dataset.py
@@ -591,6 +591,12 @@ class Dataset(Logger, Configurable, torch.utils.data.Dataset, ABC):
             add_these_to_graph.append(node_truth_dict)
         for write_dict in add_these_to_graph:
             for key, value in write_dict.items():
+                if value is None:
+                    self.debug(
+                        f"Could not assign `{key}` with type 'None' "
+                        f"as attribute to graph."
+                    )
+                    continue
                 try:
                     graph[key] = torch.tensor(value)
                 except TypeError:
-- 
GitLab


From ce87bd057b120038005770cd8b5da514a4c733b5 Mon Sep 17 00:00:00 2001
From: AMHermansen <mail@andreashermansen.dk>
Date: Sat, 3 Jun 2023 15:53:39 +0200
Subject: [PATCH 06/13] Updated package requirements for pytorch and pytorch
 lightning.

---
 setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index b262f0fa4..8850bc253 100644
--- a/setup.py
+++ b/setup.py
@@ -47,12 +47,12 @@ EXTRAS_REQUIRE = {
         "versioneer",
     ],
     "torch": [
-        "torch>=1.11",
+        "torch>=2.0",
         "torch-cluster>=1.6",
         "torch-scatter>=2.0",
         "torch-sparse>=0.6",
         "torch-geometric>=2.0",
-        "pytorch-lightning>=1.6, <2.0",
+        "pytorch-lightning>=2.0",
     ],
 }
 
-- 
GitLab


From b73954017f0aee88843dd8ed5b1c12cd9f8d1652 Mon Sep 17 00:00:00 2001
From: AMHermansen <mail@andreashermansen.dk>
Date: Thu, 15 Jun 2023 16:13:41 +0200
Subject: [PATCH 07/13] Refactored _construct_trainer to return a trainer,
 instead of creating a trainer in the class. This removes all references to
 the object itself in _construct_trainer and as such, it is also made into a
 static method.

---
 src/graphnet/models/model.py | 49 ++++++++++++++----------------------
 1 file changed, 19 insertions(+), 30 deletions(-)

diff --git a/src/graphnet/models/model.py b/src/graphnet/models/model.py
index 2fb1af4d9..00acf9109 100644
--- a/src/graphnet/models/model.py
+++ b/src/graphnet/models/model.py
@@ -29,8 +29,8 @@ class Model(Logger, Configurable, LightningModule, ABC):
     def forward(self, x: Union[Tensor, Data]) -> Union[Tensor, Data]:
         """Forward pass."""
 
-    def _construct_trainers(
-        self,
+    @staticmethod
+    def _construct_trainer(
         max_epochs: int = 10,
         gpus: Optional[Union[List[int], int]] = None,
         callbacks: Optional[List[Callback]] = None,
@@ -39,9 +39,8 @@ class Model(Logger, Configurable, LightningModule, ABC):
         log_every_n_steps: int = 1,
         gradient_clip_val: Optional[float] = None,
         distribution_strategy: Optional[str] = "ddp",
-        inference_strategy: Optional[str] = "auto",
         **trainer_kwargs: Any,
-    ) -> None:
+    ) -> Trainer:
 
         if gpus:
             accelerator = "gpu"
@@ -50,7 +49,7 @@ class Model(Logger, Configurable, LightningModule, ABC):
             accelerator = "cpu"
             devices = 1
 
-        self._trainer = Trainer(
+        trainer = Trainer(
             accelerator=accelerator,
             devices=devices,
             max_epochs=max_epochs,
@@ -59,21 +58,11 @@ class Model(Logger, Configurable, LightningModule, ABC):
             logger=logger,
             gradient_clip_val=gradient_clip_val,
             strategy=distribution_strategy,
+            default_root_dir=ckpt_path,
             **trainer_kwargs,
         )
 
-        inference_devices = devices
-        if isinstance(inference_devices, list):
-            inference_devices = inference_devices[:1]
-
-        self._inference_trainer = Trainer(
-            accelerator=accelerator,
-            devices=inference_devices,
-            callbacks=callbacks,
-            logger=logger,
-            strategy=inference_strategy,
-            **trainer_kwargs,
-        )
+        return trainer
 
     def fit(
         self,
@@ -102,7 +91,7 @@ class Model(Logger, Configurable, LightningModule, ABC):
             )
 
         self.train(mode=True)
-        self._construct_trainers(
+        trainer = self._construct_trainer(
             max_epochs=max_epochs,
             gpus=gpus,
             callbacks=callbacks,
@@ -115,7 +104,7 @@ class Model(Logger, Configurable, LightningModule, ABC):
         )
 
         try:
-            self._trainer.fit(
+            trainer.fit(
                 self, train_dataloader, val_dataloader, ckpt_path=ckpt_path
             )
         except KeyboardInterrupt:
@@ -166,17 +155,17 @@ class Model(Logger, Configurable, LightningModule, ABC):
         """
         self.train(mode=False)
 
-        if not hasattr(self, "_inference_trainer"):
-            self._construct_trainers(
-                gpus=gpus, distribution_strategy=distribution_strategy
-            )
-        elif gpus is not None:
-            self.warning(
-                "A `Trainer` instance has already been constructed, possibly "
-                "when the model was trained. Will use this to get predictions. "
-                f"Argument `gpus = {gpus}` will be ignored."
-            )
-        predictions_list = self._inference_trainer.predict(self, dataloader)
+        callbacks = self._create_default_callbacks(
+            val_dataloader=None,
+        )
+
+        inference_trainer = self._construct_trainer(
+            gpus=gpus,
+            distribution_strategy=distribution_strategy,
+            callbacks=callbacks,
+        )
+
+        predictions_list = inference_trainer.predict(self, dataloader)
         assert len(predictions_list), "Got no predictions"
 
         nb_outputs = len(predictions_list[0])
-- 
GitLab


From 746061e09d4def625c953e05faab798aab858563 Mon Sep 17 00:00:00 2001
From: AMHermansen <mail@andreashermansen.dk>
Date: Sat, 17 Jun 2023 18:56:58 +0200
Subject: [PATCH 08/13] I was getting an error for torch-geometric 2.0.0, and
 since the PR will break backwards compatibility anyway I figured it was an
 easier change to require torch-geometric>=2.1

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 8850bc253..a9dcd4540 100644
--- a/setup.py
+++ b/setup.py
@@ -51,7 +51,7 @@ EXTRAS_REQUIRE = {
         "torch-cluster>=1.6",
         "torch-scatter>=2.0",
         "torch-sparse>=0.6",
-        "torch-geometric>=2.0",
+        "torch-geometric>=2.1",
         "pytorch-lightning>=2.0",
     ],
 }
-- 
GitLab


From dfa410f9f9c49292806408f50cec6192f4ddcd62 Mon Sep 17 00:00:00 2001
From: AMHermansen <mail@andreashermansen.dk>
Date: Sat, 17 Jun 2023 18:59:42 +0200
Subject: [PATCH 09/13] Updated requirements.

---
 requirements/torch_cpu.txt   | 9 ++-------
 requirements/torch_gpu.txt   | 7 +------
 requirements/torch_macos.txt | 7 +------
 3 files changed, 4 insertions(+), 19 deletions(-)

diff --git a/requirements/torch_cpu.txt b/requirements/torch_cpu.txt
index 76f533905..6f68e3600 100644
--- a/requirements/torch_cpu.txt
+++ b/requirements/torch_cpu.txt
@@ -1,7 +1,2 @@
---find-links https://download.pytorch.org/whl/torch_stable.html
-torch==1.11+cpu
---find-links https://data.pyg.org/whl/torch-1.11.0+cpu.html
-torch-cluster==1.6.0
-torch_scatter==2.0.9
-torch-sparse==0.6.13
-torch_geometric==2.0.4
\ No newline at end of file
+--find-links https://download.pytorch.org/whl/cpu
+--find-links https://data.pyg.org/whl/torch-2.0.0+cpu.html
\ No newline at end of file
diff --git a/requirements/torch_gpu.txt b/requirements/torch_gpu.txt
index 4004fd8af..c325f35af 100644
--- a/requirements/torch_gpu.txt
+++ b/requirements/torch_gpu.txt
@@ -1,8 +1,3 @@
 # Contains packages recommended for functional performance
 --find-links https://download.pytorch.org/whl/torch_stable.html
-torch==1.11+cu115
---find-links https://data.pyg.org/whl/torch-1.11.0+cu115.html
-torch-cluster==1.6.0
-torch_scatter==2.0.9
-torch-sparse==0.6.13
-torch_geometric==2.0.4
\ No newline at end of file
+--find-links https://data.pyg.org/whl/torch-2.0.0+cu117.html
diff --git a/requirements/torch_macos.txt b/requirements/torch_macos.txt
index a9e43921c..be7a35257 100644
--- a/requirements/torch_macos.txt
+++ b/requirements/torch_macos.txt
@@ -1,7 +1,2 @@
 --find-links https://download.pytorch.org/whl/torch_stable.html
-torch==1.11
---find-links https://data.pyg.org/whl/torch-1.11.0+cpu.html
-torch-cluster==1.6.0
-torch_scatter==2.0.9
-torch-sparse==0.6.13
-torch_geometric==2.0.4
\ No newline at end of file
+--find-links https://data.pyg.org/whl/torch-2.0.0+cpu.html
\ No newline at end of file
-- 
GitLab


From f0a909fe637be142229dad23dd8e9845e2d4aea5 Mon Sep 17 00:00:00 2001
From: AMHermansen <mail@andreashermansen.dk>
Date: Sat, 17 Jun 2023 19:00:26 +0200
Subject: [PATCH 10/13] Revert "Added manaul test for None type, since those
 were not caught in the try-except expression."

This reverts commit 8527a59f71b34e3097814bf4ec9eaa84f025d62e.
---
 src/graphnet/data/dataset.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/src/graphnet/data/dataset.py b/src/graphnet/data/dataset.py
index 0feaf6d6b..a0110988b 100644
--- a/src/graphnet/data/dataset.py
+++ b/src/graphnet/data/dataset.py
@@ -591,12 +591,6 @@ class Dataset(Logger, Configurable, torch.utils.data.Dataset, ABC):
             add_these_to_graph.append(node_truth_dict)
         for write_dict in add_these_to_graph:
             for key, value in write_dict.items():
-                if value is None:
-                    self.debug(
-                        f"Could not assign `{key}` with type 'None' "
-                        f"as attribute to graph."
-                    )
-                    continue
                 try:
                     graph[key] = torch.tensor(value)
                 except TypeError:
-- 
GitLab


From 50703304ffa8c3bff567e28307ea665356663cdb Mon Sep 17 00:00:00 2001
From: AMHermansen <97125645+AMHermansen@users.noreply.github.com>
Date: Thu, 22 Jun 2023 12:20:14 +0200
Subject: [PATCH 11/13] Removed python 3.7 from setup

---
 setup.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/setup.py b/setup.py
index a9dcd4540..67cf25023 100644
--- a/setup.py
+++ b/setup.py
@@ -61,7 +61,6 @@ CLASSIFIERS = [
     "Development Status :: 3 - Alpha",
     "Intended Audience :: Developers",
     "Intended Audience :: Science/Research",
-    "Programming Language :: Python :: 3.7",
     "Programming Language :: Python :: 3.8",
     "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
-- 
GitLab


From 5d53779c61a6715e1fb1d6f7ded7f6119a2091fb Mon Sep 17 00:00:00 2001
From: AMHermansen <mail@andreashermansen.dk>
Date: Mon, 26 Jun 2023 10:50:06 +0200
Subject: [PATCH 12/13] Removed python 3.7 from build.yml

---
 .github/workflows/build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index b840a4c0c..d36ff97d8 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -73,7 +73,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.7, 3.8, 3.9, '3.10']
+        python-version: [3.8, 3.9, '3.10']
     steps:
       - uses: actions/checkout@v3
       - name: Set up Python ${{ matrix.python-version }}
-- 
GitLab


From 1d3b45d4ceba0c5f25694dd5ed8b31544af4f07c Mon Sep 17 00:00:00 2001
From: Rasmus Oersoe <rahn@outlook.dk>
Date: Mon, 26 Jun 2023 12:47:47 +0200
Subject: [PATCH 13/13] debug

---
 .github/workflows/build.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index d36ff97d8..00f59c0bd 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -46,8 +46,10 @@ jobs:
       - uses: actions/checkout@v3
       - name: Upgrade packages already installed on icecube/icetray
         run: |
+          python --version
           pip install --upgrade astropy  # Installed version incompatible with numpy 1.23.0 [https://github.com/astropy/astropy/issues/12534]
           pip install --ignore-installed PyYAML  # Distutils installed [https://github.com/pypa/pip/issues/5247]
+          pip install --upgrade psutil # lets see..
       - name: Install package
         uses: ./.github/actions/install
         with:
-- 
GitLab