ufal
diff --git a/‎labs/05/cags_classification.py
Lines changed: 69 additions & 0 deletions b/‎labs/05/cags_classification.py
Lines changed: 69 additions & 0 deletions
diff --git a/‎labs/05/cags_segmentation.py
Lines changed: 86 additions & 0 deletions b/‎labs/05/cags_segmentation.py
Lines changed: 86 additions & 0 deletions
diff --git a/‎labs/05/cnn_manual.py
Lines changed: 176 additions & 0 deletions b/‎labs/05/cnn_manual.py
Lines changed: 176 additions & 0 deletions
diff --git a/‎labs/05/image_classification.py
Lines changed: 59 additions & 0 deletions b/‎labs/05/image_classification.py
Lines changed: 59 additions & 0 deletions
diff --git a/‎labs/05/rottweiler.jpg
65.4 KB b/‎labs/05/rottweiler.jpg
65.4 KB
@@ -0,0 +1,69 @@
+#!/usr/bin/env python3
+import argparse
+import datetime
+import os
+import re
+
+import numpy as np
+import timm
+import torch
+import torchvision.transforms.v2 as v2
+
+import npfl138
+npfl138.require_version("2425.5")
+from npfl138.datasets.cags import CAGS
+
+# TODO: Define reasonable defaults and optionally more parameters.
+# Also, you can set the number of threads to 0 to use all your CPU cores.
+parser = argparse.ArgumentParser()
+parser.add_argument("--batch_size", default=..., type=int, help="Batch size.")
+parser.add_argument("--epochs", default=..., type=int, help="Number of epochs.")
+parser.add_argument("--seed", default=42, type=int, help="Random seed.")
+parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.")
+
+
+def main(args: argparse.Namespace) -> None:
+    # Set the random seed and the number of threads.
+    npfl138.startup(args.seed, args.threads)
+    npfl138.global_keras_initializers()
+
+    # Create logdir name.
+    args.logdir = os.path.join("logs", "{}-{}-{}".format(
+        os.path.basename(globals().get("__file__", "notebook")),
+        datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"),
+        ",".join(("{}={}".format(re.sub("(.)[^_]*_?", r"\1", k), v) for k, v in sorted(vars(args).items())))
+    ))
+
+    # Load the data. The individual examples are dictionaries with the keys:
+    # - "image", a `[3, 224, 224]` tensor of `torch.uint8` values in [0-255] range,
+    # - "mask", a `[1, 224, 224]` tensor of `torch.float32` values in [0-1] range,
+    # - "label", a scalar of the correct class in `range(CAGS.LABELS)`.
+    # The `decode_on_demand` argument can be set to `True` to save memory and decode
+    # each image only when accessed, but it will most likely slow down training.
+    cags = CAGS(decode_on_demand=False)
+
+    # Load the EfficientNetV2-B0 model without the classification layer. For an
+    # input image, the model returns a tensor of shape `[batch_size, 1280]`.
+    efficientnetv2_b0 = timm.create_model("tf_efficientnetv2_b0.in1k", pretrained=True, num_classes=0)
+
+    # Create a simple preprocessing performing necessary normalization.
+    preprocessing = v2.Compose([
+        v2.ToDtype(torch.float32, scale=True),  # The `scale=True` also rescales the image to [0, 1].
+        v2.Normalize(mean=efficientnetv2_b0.pretrained_cfg["mean"], std=efficientnetv2_b0.pretrained_cfg["std"]),
+    ])
+
+    # TODO: Create the model and train it.
+    model = ...
+
+    # Generate test set annotations, but in `args.logdir` to allow parallel execution.
+    os.makedirs(args.logdir, exist_ok=True)
+    with open(os.path.join(args.logdir, "cags_classification.txt"), "w", encoding="utf-8") as predictions_file:
+        # TODO: Perform the prediction on the test data. The line below assumes you have
+        # a dataloader `test` where the individual examples are `(image, target)` pairs.
+        for prediction in model.predict(test, data_with_labels=True):
+            print(np.argmax(prediction), file=predictions_file)
+
+
+if __name__ == "__main__":
+    main_args = parser.parse_args([] if "__file__" not in globals() else None)
+    main(main_args)
@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+import argparse
+import datetime
+import os
+import re
+
+import numpy as np
+import timm
+import torch
+import torchvision.transforms.v2 as v2
+
+import npfl138
+npfl138.require_version("2425.5")
+from npfl138.datasets.cags import CAGS
+
+# TODO: Define reasonable defaults and optionally more parameters.
+# Also, you can set the number of threads to 0 to use all your CPU cores.
+parser = argparse.ArgumentParser()
+parser.add_argument("--batch_size", default=..., type=int, help="Batch size.")
+parser.add_argument("--epochs", default=None, type=int, help="Number of epochs.")
+parser.add_argument("--seed", default=42, type=int, help="Random seed.")
+parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.")
+
+
+def main(args: argparse.Namespace) -> None:
+    # Set the random seed and the number of threads.
+    npfl138.startup(args.seed, args.threads)
+    npfl138.global_keras_initializers()
+
+    # Create logdir name.
+    args.logdir = os.path.join("logs", "{}-{}-{}".format(
+        os.path.basename(globals().get("__file__", "notebook")),
+        datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"),
+        ",".join(("{}={}".format(re.sub("(.)[^_]*_?", r"\1", k), v) for k, v in sorted(vars(args).items())))
+    ))
+
+    # Load the data. The individual examples are dictionaries with the keys:
+    # - "image", a `[3, 224, 224]` tensor of `torch.uint8` values in [0-255] range,
+    # - "mask", a `[1, 224, 224]` tensor of `torch.float32` values in [0-1] range,
+    # - "label", a scalar of the correct class in `range(CAGS.LABELS)`.
+    # The `decode_on_demand` argument can be set to `True` to save memory and decode
+    # each image only when accessed, but it will most likely slow down training.
+    cags = CAGS(decode_on_demand=False)
+
+    # Load the EfficientNetV2-B0 model without the classification layer.
+    # Apart from calling the model as in the classification task, you can call it using
+    #   output, features = efficientnetv2_b0.forward_intermediates(batch_of_images)
+    # obtaining (assuming the input images have 224x224 resolution):
+    # - `output` is a `[N, 1280, 7, 7]` tensor with the final features before global average pooling,
+    # - `features` is a list of intermediate features with resolution 112x112, 56x56, 28x28, 14x14, 7x7.
+    efficientnetv2_b0 = timm.create_model("tf_efficientnetv2_b0.in1k", pretrained=True, num_classes=0)
+
+    # Create a simple preprocessing performing necessary normalization.
+    preprocessing = v2.Compose([
+        v2.ToDtype(torch.float32, scale=True),  # The `scale=True` also rescales the image to [0, 1].
+        v2.Normalize(mean=efficientnetv2_b0.pretrained_cfg["mean"], std=efficientnetv2_b0.pretrained_cfg["std"]),
+    ])
+
+    # TODO: Create the model and train it.
+    model = ...
+
+    # Generate test set annotations, but in `args.logdir` to allow parallel execution.
+    os.makedirs(args.logdir, exist_ok=True)
+    with open(os.path.join(args.logdir, "cags_segmentation.txt"), "w", encoding="utf-8") as predictions_file:
+        # TODO: Perform the prediction on the test data. The line below assumes you have
+        # a dataloader `test` where the individual examples are `(image, target)` pairs.
+        for mask in model.predict(test, data_with_labels=True):
+            zeros, ones, runs = 0, 0, []
+            for pixel in np.reshape(mask >= 0.5, [-1]):
+                if pixel:
+                    if zeros or (not zeros and not ones):
+                        runs.append(zeros)
+                        zeros = 0
+                    ones += 1
+                else:
+                    if ones:
+                        runs.append(ones)
+                        ones = 0
+                    zeros += 1
+            runs.append(zeros + ones)
+            print(*runs, file=predictions_file)
+
+
+if __name__ == "__main__":
+    main_args = parser.parse_args([] if "__file__" not in globals() else None)
+    main(main_args)
@@ -0,0 +1,176 @@
+#!/usr/bin/env python3
+import argparse
+
+import numpy as np
+import torch
+
+import npfl138
+npfl138.require_version("2425.5")
+from npfl138.datasets.mnist import MNIST
+
+parser = argparse.ArgumentParser()
+# These arguments will be set appropriately by ReCodEx, even if you change them.
+parser.add_argument("--batch_size", default=50, type=int, help="Batch size.")
+parser.add_argument("--cnn", default="5-3-2,10-3-2", type=str, help="CNN architecture.")
+parser.add_argument("--epochs", default=5, type=int, help="Number of epochs.")
+parser.add_argument("--learning_rate", default=0.1, type=float, help="Learning rate.")
+parser.add_argument("--recodex", default=False, action="store_true", help="Evaluation in ReCodEx.")
+parser.add_argument("--seed", default=42, type=int, help="Random seed.")
+parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.")
+parser.add_argument("--verify", default=False, action="store_true", help="Verify the implementation.")
+# If you add more arguments, ReCodEx will keep them with your default values.
+
+
+class Convolution:
+    def __init__(
+        self, filters: int, kernel_size: int, stride: int, input_shape: list[int], seed: int, verify: bool,
+    ) -> None:
+        # Create a convolutional layer with the given arguments and given input shape.
+        # Note that we use NHWC format, so the MNIST images have shape [28, 28, 1].
+        self._filters = filters
+        self._kernel_size = kernel_size
+        self._stride = stride
+        self._verify = verify
+
+        # Here the kernel and bias variables are created, the kernel has shape
+        # [kernel_height, kernel_width, input_channels, output_channels], bias [output_channels].
+        self._kernel = torch.nn.Parameter(torch.randn(kernel_size, kernel_size, input_shape[2], filters) * 0.1)
+        self._bias = torch.nn.Parameter(torch.zeros(filters))
+
+    def forward(self, inputs: torch.Tensor) -> torch.Tensor:
+        # TODO: Compute the forward propagation through the convolution
+        # with ReLU activation, and return the result.
+        #
+        # In order for the computation to be reasonably fast, you cannot
+        # manually iterate through the individual pixels, batch examples,
+        # input filters, or output filters. However, you can manually
+        # iterate through the kernel size.
+        output = ...
+
+        # If requested, verify that `output` contains a correct value.
+        if self._verify:
+            reference = torch.relu(torch.nn.functional.conv2d(
+                inputs.movedim(-1, 1), self._kernel.permute(3, 2, 0, 1), self._bias, self._stride)).movedim(1, -1)
+            np.testing.assert_allclose(output.detach().numpy(), reference.detach().numpy(), atol=1e-4,
+                                       err_msg="Forward pass differs!")
+
+        return output
+
+    def backward(
+        self, inputs: torch.Tensor, outputs: torch.Tensor, outputs_gradient: torch.Tensor
+    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        # TODO: Given this layer's inputs, this layer's outputs,
+        # and the gradient with respect to the layer's outputs,
+        # compute the derivatives of the loss with respect to
+        # - the `inputs` layer,
+        # - `self._kernel`,
+        # - `self._bias`.
+        inputs_gradient, kernel_gradient, bias_gradient = ..., ..., ...
+
+        # If requested, verify that the three computed gradients are correct.
+        if self._verify:
+            with torch.enable_grad():
+                inputs.requires_grad_(True)
+                inputs.grad = self._kernel.grad = self._bias.grad = None
+                reference = (outputs > 0) * torch.nn.functional.conv2d(
+                    inputs.movedim(-1, 1), self._kernel.permute(3, 2, 0, 1), self._bias, self._stride).movedim(1, -1)
+                reference.backward(gradient=outputs_gradient, inputs=[inputs, self._kernel, self._bias])
+                for name, computed, reference in zip(
+                        ["Bias", "Kernel", "Inputs"], [bias_gradient, kernel_gradient, inputs_gradient],
+                        [self._bias.grad, self._kernel.grad, inputs.grad]):
+                    np.testing.assert_allclose(computed.detach().numpy(), reference.detach().numpy(),
+                                               atol=2e-4, err_msg=name + " gradient differs!")
+
+        # Return the inputs gradient, the layer variables, and their gradients.
+        return inputs_gradient, [self._kernel, self._bias], [kernel_gradient, bias_gradient]
+
+
+class Model:
+    def __init__(self, args: argparse.Namespace) -> None:
+        self._args = args
+
+        # Create the convolutional layers according to `args.cnn`.
+        input_shape = [MNIST.H, MNIST.W, MNIST.C]
+        self._convs = []
+        for layer in args.cnn.split(","):
+            filters, kernel_size, stride = map(int, layer.split("-"))
+            self._convs.append(Convolution(filters, kernel_size, stride, input_shape, args.seed, args.verify))
+            input_shape = [(input_shape[0] - kernel_size) // stride + 1,
+                           (input_shape[1] - kernel_size) // stride + 1, filters]
+
+        # Create the classification head.
+        self._classifier = torch.nn.Linear(np.prod(input_shape), MNIST.LABELS)
+
+    def train_epoch(self, dataset: MNIST.Dataset) -> None:
+        for batch in dataset.batches(self._args.batch_size, shuffle=True):
+            # Forward pass through the convolutions.
+            hidden = batch["images"].to(torch.float32).movedim(1, -1) / 255
+            conv_values = [hidden]
+            for conv in self._convs:
+                hidden = conv.forward(hidden)
+                conv_values.append(hidden)
+
+            # Run the classification head.
+            hidden_flat = torch.flatten(hidden, 1)
+            predictions = self._classifier(hidden_flat).softmax(dim=-1)
+
+            # Compute the gradients of the classifier and the convolution output.
+            one_hot_labels = torch.nn.functional.one_hot(batch["labels"].to(torch.int64), MNIST.LABELS)
+            d_logits = (predictions - one_hot_labels) / len(batch["images"])
+            variables = [self._classifier.bias, self._classifier.weight]
+            gradients = [d_logits.sum(dim=0), d_logits.T @ hidden_flat]
+            hidden_gradient = (d_logits @ self._classifier.weight).reshape(hidden.shape)
+
+            # Backpropagate the gradient through the convolutions.
+            for conv, inputs, outputs in reversed(list(zip(self._convs, conv_values[:-1], conv_values[1:]))):
+                hidden_gradient, conv_variables, conv_gradients = conv.backward(inputs, outputs, hidden_gradient)
+                variables.extend(conv_variables)
+                gradients.extend(conv_gradients)
+
+            # Update the weights using a manual SGD.
+            for variable, gradient in zip(variables, gradients):
+                variable -= self._args.learning_rate * gradient
+
+    def evaluate(self, dataset: MNIST.Dataset) -> float:
+        total = correct = 0
+        for batch in dataset.batches(self._args.batch_size):
+            hidden = batch["images"].to(torch.float32).movedim(1, -1) / 255
+            for conv in self._convs:
+                hidden = conv.forward(hidden)
+            hidden = torch.flatten(hidden, 1)
+            predictions = self._classifier(hidden)
+            correct += torch.sum(predictions.argmax(dim=-1) == batch["labels"])
+            total += len(batch["labels"])
+        return correct / total
+
+
+def main(args: argparse.Namespace) -> tuple[float, float]:
+    # Set the random seed and the number of threads.
+    npfl138.startup(args.seed, args.threads)
+    npfl138.global_keras_initializers()
+
+    # Do not compute gradients in this assignment.
+    torch.set_grad_enabled(False)
+
+    # Load data, using only 5 000 training images, and create the dataloaders.
+    mnist = MNIST(sizes={"train": 5_000})
+
+    # Create the model.
+    model = Model(args)
+
+    for epoch in range(args.epochs):
+        model.train_epoch(mnist.train)
+
+        dev_accuracy = model.evaluate(mnist.dev)
+        print("Dev accuracy after epoch {} is {:.2f}".format(epoch + 1, 100 * dev_accuracy))
+
+    test_accuracy = model.evaluate(mnist.test)
+    print("Test accuracy after epoch {} is {:.2f}".format(epoch + 1, 100 * test_accuracy))
+
+    # Return dev and test accuracies for ReCodEx to validate.
+    return dev_accuracy, test_accuracy
+
+
+if __name__ == "__main__":
+    main_args = parser.parse_args([] if "__file__" not in globals() else None)
+    main(main_args)
@@ -0,0 +1,59 @@
+#!/usr/bin/env python3
+import argparse
+import time
+
+import timm
+import torch
+import torchvision
+import torchvision.transforms.v2 as v2
+
+parser = argparse.ArgumentParser()
+parser.add_argument("images", nargs="+", type=str, help="Files to classify.")
+parser.add_argument("--seed", default=42, type=int, help="Random seed.")
+parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.")
+
+
+def main(args: argparse.Namespace) -> None:
+    # Set the number of threads.
+    if args.threads:
+        torch.set_num_threads(args.threads)
+        torch.set_num_interop_threads(args.threads)
+
+    # Load the EfficientNetV2-B0 model.
+    efficientnetv2_b0 = timm.create_model("tf_efficientnetv2_b0.in1k", pretrained=True).eval()
+
+    # Create a simple preprocessing pipeline.
+    preprocessing = v2.Compose([
+        v2.ToDtype(torch.float32, scale=True),  # The `scale=True` also rescales the image to [0, 1].
+        v2.Resize(224, interpolation=v2.InterpolationMode(efficientnetv2_b0.pretrained_cfg["interpolation"])),
+        v2.Normalize(mean=efficientnetv2_b0.pretrained_cfg["mean"], std=efficientnetv2_b0.pretrained_cfg["std"]),
+    ])
+
+    # Load the ImageNet labels.
+    imagenet_labels = timm.data.ImageNetInfo().label_descriptions()
+
+    for image_path in args.images:
+        # Load the image.
+        image = torchvision.io.decode_image(image_path, mode="RGB")
+
+        # Transform the image by resizing to 224, 224 and normalizing.
+        image = preprocessing(image)
+
+        # Compute the prediction
+        start = time.time()
+
+        with torch.no_grad():
+            predictions = efficientnetv2_b0(image.unsqueeze(0)).squeeze(0)
+
+        predictions = torch.topk(predictions.softmax(dim=-1), k=5)
+
+        print("Image {} [{} ms] labels:{}".format(
+            image_path,
+            1000 * (time.time() - start),
+            "".join("\n- {}: {}".format(imagenet_labels[label], prob) for prob, label in zip(*predictions)),
+        ))
+
+
+if __name__ == "__main__":
+    main_args = parser.parse_args([] if "__file__" not in globals() else None)
+    main(main_args)