! pip install -q gdown
! pip install -q colorama # for color-based texts
! pip install -q torchviz # for visualizing graphs
! pip install -q torchview # for visualizing graphs
! pip install -q graphviz # for visualizing graphs
! pip install -q torchsummary # for finding the number of parameters of a model

# General imports
import os
import cv2
import glob
import numpy as np
import random
from tqdm import tqdm
from pathlib import Path
from colorama import Fore, Style
from collections import Counter, defaultdict

# Model based imports
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

# Viz-based imports
from torchsummary import summary
from torchview import draw_graph
import graphviz
graphviz.set_jupyter_format('png')
import seaborn as sns
import matplotlib.pyplot as plt

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")


SEED = 42
random.seed(SEED)
np.random.seed(SEED)

Using device: cpu

class Example1FCL(nn.Module):
    def __init__(self, input_size=9, output_size=4):
        super(Example1FCL, self).__init__()
        self.fc1 = nn.Linear(input_size, output_size)

    def forward(self, x):
        x = self.fc1(x)
        return x

model_FCL = Example1FCL(input_size=9, output_size=4)
model_FCL = model_FCL.to(device)
print(model_FCL)

Example1FCL(
  (fc1): Linear(in_features=9, out_features=4, bias=True)
)

# Create sample input (batch_size=1, features=9)
# sample_input = torch.randn(1, 9)
summary(model_FCL, input_size=(1,9))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Linear-1                 [-1, 1, 4]              40
================================================================
Total params: 40
Trainable params: 40
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00
----------------------------------------------------------------

model_graph1 = draw_graph(model_FCL, input_size=[(1, 9)],  expand_nested=True)
model_graph1.visual_graph.render(format='png')
model_graph1.visual_graph

fcl_model = Example1FCL(input_size=9, output_size=4)
fcl_params = sum(p.numel() for p in fcl_model.parameters())
print(f"FCL Parameters: {fcl_params}")

FCL Parameters: 40

class Example1CNN(nn.Module):
    def __init__(self):
        super(Example1CNN, self).__init__()
        # 1 input channel, 1 output channel, kernel_size=4
        # Parameters: (1 * 4 + 1) * 1 = 5 total
        self.c1 = nn.Conv1d(1, 1, kernel_size=4)

    def forward(self, x):
        x = self.c1(x)
        return x

# Test the model
model_CNN = Example1CNN()
print(f"Total parameters: {sum(p.numel() for p in model_CNN.parameters())}")

Total parameters: 5

# For input (1, 9), reshape to (1, 1, 9) for Conv1d
# TorchSummary input size: (channels, sequence_length)
summary(model_CNN, input_size=(1, 9))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Conv1d-1                 [-1, 1, 6]               5
================================================================
Total params: 5
Trainable params: 5
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00
----------------------------------------------------------------

# draw the computational graph of the model
model_graph1 = draw_graph(model_CNN, input_size=[(1, 9)],  expand_nested=True)
model_graph1.visual_graph.render(format='png')
model_graph1.visual_graph

input_vol = torch.randn(1, 1, 1, 3072)
kernel = torch.randn(1, 10, 1, 3072)
# the multiplication broadcasts to shape (1, 10, 1, 3072) and
# you need to sum along the feature dimension (last dimension) to
# get the convolution result of shape (1, 10, 1, 1).
output = torch.sum(input_vol * kernel, dim=-1, keepdim=True)
print(output.shape)

torch.Size([1, 10, 1, 1])

import torch

in_channels = 3072
out_channels = 10
kernel_size = 1
bias = True

params = out_channels * in_channels * kernel_size * kernel_size + (out_channels if bias else 0)
print(params)

30730

import torch

input_vol = torch.randn(1, 3, 32, 32)
kernel = torch.randn(1, 3, 5, 5)
output = torch.conv2d(input_vol, kernel)

params = 1 * 3 * 5 * 5 + 1
print(f"Output shape: {output.shape}, Parameters: {params}")

Output shape: torch.Size([1, 1, 28, 28]), Parameters: 76

class ExampleCNN_Slide1(nn.Module):
    def __init__(self):
        super(ExampleCNN_Slide1, self).__init__()
        self.c1 = nn.Conv2d(in_channels=3, out_channels=1, kernel_size=5, stride=1, padding=0)

    def forward(self, x):
        x = self.c1(x)
        return x

# Test the model
model_CNN_slide1 = ExampleCNN_Slide1()
print(f"Total parameters: {sum(p.numel() for p in model_CNN_slide1.parameters())}")

Total parameters: 76

summary(model_CNN_slide1, input_size=(3, 32, 32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Conv2d-1            [-1, 1, 28, 28]              76
================================================================
Total params: 76
Trainable params: 76
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 0.01
Params size (MB): 0.00
Estimated Total Size (MB): 0.02
----------------------------------------------------------------

# draw the computational graph of the model
model_graph1 = draw_graph(model_CNN_slide1, input_size=[(1, 3, 32, 32)],  expand_nested=True)
model_graph1.visual_graph.render(format='png')
model_graph1.visual_graph

class ExampleCNN_Slide2(nn.Module):
    def __init__(self):
        super(ExampleCNN_Slide2, self).__init__()
        self.c1 = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=5, stride=1, padding=0)

    def forward(self, x):
        x = self.c1(x)
        return x

# Test the model
model_ExampleCNN_Slide2 = ExampleCNN_Slide2()
print(f"Total parameters: {sum(p.numel() for p in model_ExampleCNN_Slide2.parameters())}")

Total parameters: 456

summary(model_ExampleCNN_Slide2, input_size=(3, 32, 32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Conv2d-1            [-1, 6, 28, 28]             456
================================================================
Total params: 456
Trainable params: 456
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 0.04
Params size (MB): 0.00
Estimated Total Size (MB): 0.05
----------------------------------------------------------------

# draw the computational graph of the model
model_graph1 = draw_graph(model_ExampleCNN_Slide2, input_size=[(1, 3, 32, 32)],  expand_nested=True)
model_graph1.visual_graph.render(format='png')
model_graph1.visual_graph

# draw the computational graph of the model
model_graph1 = draw_graph(model_ExampleCNN_Slide2, input_size=[(2, 3, 32, 32)],  expand_nested=True)
model_graph1.visual_graph.render(format='png')
model_graph1.visual_graph

class ExampleCNN_Slide3(nn.Module):
    def __init__(self):
        super(ExampleCNN_Slide3, self).__init__()
        self.c1 = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=5, stride=1, padding=0)
        self.c2 = nn.Conv2d(in_channels=6, out_channels=10, kernel_size=5, stride=1, padding=0)

    def forward(self, x):
        x = self.c1(x)
        x = self.c2(x)
        return x

# Test the model
model_ExampleCNN_Slide3 = ExampleCNN_Slide3()
# print(f"Total parameters: {sum(p.numel() for p in ExampleCNN_Slide2.parameters())}")
summary(model_ExampleCNN_Slide3, input_size=(3, 32, 32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Conv2d-1            [-1, 6, 28, 28]             456
            Conv2d-2           [-1, 10, 24, 24]           1,510
================================================================
Total params: 1,966
Trainable params: 1,966
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 0.08
Params size (MB): 0.01
Estimated Total Size (MB): 0.10
----------------------------------------------------------------

# draw the computational graph of the model
model_graph1 = draw_graph(model_ExampleCNN_Slide3, input_size=[(1, 3, 32, 32)],  expand_nested=True)
model_graph1.visual_graph.render(format='png')
model_graph1.visual_graph

class ExampleCNN_Slide4(nn.Module):
    def __init__(self):
        super(ExampleCNN_Slide4, self).__init__()
        self.c1 = nn.Conv2d(in_channels=64, out_channels=32, kernel_size=1, stride=1, padding=0)

    def forward(self, x):
        x = self.c1(x)
        return x

# Test the model
model_ExampleCNN_Slide4 = ExampleCNN_Slide4()
# print(f"Total parameters: {sum(p.numel() for p in ExampleCNN_Slide2.parameters())}")
summary(model_ExampleCNN_Slide4, input_size=(64, 56, 56))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Conv2d-1           [-1, 32, 56, 56]           2,080
================================================================
Total params: 2,080
Trainable params: 2,080
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.77
Forward/backward pass size (MB): 0.77
Params size (MB): 0.01
Estimated Total Size (MB): 1.54
----------------------------------------------------------------

# draw the computational graph of the model
model_graph1 = draw_graph(model_ExampleCNN_Slide4, input_size=[(1,64, 56, 56)],  expand_nested=True)
model_graph1.visual_graph.render(format='png')
model_graph1.visual_graph

class Example_ffcnn(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=3,
                               out_channels=3,kernel_size=3,
                               stride=2, padding=1)
        self.conv2 = nn.Conv2d(in_channels=3,
                               out_channels=3, kernel_size=3,
                               stride=1, padding=3)
        self.bn2d = nn.BatchNorm2d(3)
        self.maxpool = nn.MaxPool2d(2)
        self.flatten = torch.nn.Flatten()
        self.fc1 = nn.Linear(48, 10)
        self.bn1d = nn.BatchNorm1d(10)
        self.out = nn.Linear(10, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.bn2d(x)
        x = self.maxpool(x)
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.bn1d(x)
        x = self.out(x)
        x = self.sigmoid(x)
        return x

model = Example_ffcnn()
summary(model, input_size=(3,7,7))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Conv2d-1              [-1, 3, 4, 4]              84
            Conv2d-2              [-1, 3, 8, 8]              84
       BatchNorm2d-3              [-1, 3, 8, 8]               6
         MaxPool2d-4              [-1, 3, 4, 4]               0
           Flatten-5                   [-1, 48]               0
            Linear-6                   [-1, 10]             490
       BatchNorm1d-7                   [-1, 10]              20
            Linear-8                    [-1, 1]              11
           Sigmoid-9                    [-1, 1]               0
================================================================
Total params: 695
Trainable params: 695
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.01
----------------------------------------------------------------

model = Example_ffcnn()
mg1 = draw_graph(model, input_size=(1,3,7,7), expand_nested=True)
mg1.visual_graph.render(format='png')
mg1.visual_graph

! wget "https://jimut123.github.io/blogs/IITB_OLD/old/main_building_1958.jpg"

--2025-08-27 08:12:49--  https://jimut123.github.io/blogs/IITB_OLD/old/main_building_1958.jpg
Resolving jimut123.github.io (jimut123.github.io)... 185.199.108.153, 185.199.109.153, 185.199.110.153, ...
Connecting to jimut123.github.io (jimut123.github.io)|185.199.108.153|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 602677 (589K) [image/jpeg]
Saving to: ‘main_building_1958.jpg’

main_building_1958. 100%[===================>] 588.55K  2.63MB/s    in 0.2s    

2025-08-27 08:12:50 (2.63 MB/s) - ‘main_building_1958.jpg’ saved [602677/602677]

import cv2
import numpy as np
import matplotlib.pyplot as plt

img_mb = cv2.imread("main_building_1958.jpg",cv2.IMREAD_UNCHANGED)
print("shape of image = ",img_mb.shape)
print("first 10x10 pixels from top left = \n",img_mb[:10,:10])

shape of image =  (1409, 2835)
first 10x10 pixels from top left = 
 [[174 173 171 170 170 172 173 174 177 178]
 [174 173 172 171 171 172 173 174 178 179]
 [175 174 173 172 172 173 174 174 178 179]
 [176 176 175 175 175 175 175 175 179 180]
 [179 179 179 179 178 177 177 176 180 181]
 [182 183 183 183 182 181 180 179 181 182]
 [185 186 187 187 186 184 182 181 181 182]
 [187 188 189 189 188 186 184 182 182 183]
 [188 188 188 188 188 188 188 188 181 183]
 [187 187 187 188 188 189 189 189 183 184]]

plt.imshow(img_mb[:10,:10],cmap='gray')

<matplotlib.image.AxesImage at 0x7f6eb3b18a70>

! wget "https://jimut123.github.io/img/gallery/this_side_sameer_hills.jpg"

--2025-08-27 08:29:16--  https://jimut123.github.io/img/gallery/this_side_sameer_hills.jpg
Resolving jimut123.github.io (jimut123.github.io)... 185.199.108.153, 185.199.109.153, 185.199.110.153, ...
Connecting to jimut123.github.io (jimut123.github.io)|185.199.108.153|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5748752 (5.5M) [image/jpeg]
Saving to: ‘this_side_sameer_hills.jpg’

this_side_sameer_hi 100%[===================>]   5.48M  14.0MB/s    in 0.4s    

2025-08-27 08:29:18 (14.0 MB/s) - ‘this_side_sameer_hills.jpg’ saved [5748752/5748752]

img_sh = cv2.imread("this_side_sameer_hills.jpg",cv2.IMREAD_UNCHANGED)
print("shape of image = \n",img_sh.shape)
print("first 5x5 pixels \n from top left = \n",img_sh[:5,:5,:])

shape of image = 
 (2088, 4640, 3)
first 5x5 pixels 
 from top left = 
 [[[224 198 128]
  [226 200 130]
  [230 204 134]
  [226 200 130]
  [225 199 129]]

 [[230 204 134]
  [225 199 129]
  [226 200 130]
  [223 197 127]
  [226 200 130]]

 [[232 207 137]
  [227 202 132]
  [230 205 135]
  [228 203 133]
  [232 206 136]]

 [[224 199 129]
  [223 198 128]
  [227 202 132]
  [227 201 131]
  [228 202 132]]

 [[222 197 127]
  [225 200 130]
  [228 202 132]
  [228 202 132]
  [228 202 132]]]

Code accompanying slides for CNN¶