Hybrid CNN + Radiomics for PET DICOM Classification
An advanced deep learning approach combining spatial feature extraction with quantitative medical imaging features.
1. Environment Setup & Imports
We initialize the environment using PyTorch and necessary medical imaging libraries like Pydicom and SimpleITK.
# -------------------------
# 1. Imports
# -------------------------
import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, roc_curve
import matplotlib.pyplot as plt
import pydicom
import cv2
import SimpleITK as sitk
import kagglehub
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)
The Dataset
Downloading DICOM Lung Cancer CT-PET subset from KaggleHub.
Class Distribution Map
2. Data Loading & Splitting
path = kagglehub.dataset_download("sshhwweettaa/lung-cancer-ct-pet-subset-dicom-format")
classes = ["A", "B", "E", "G"]
filepaths, labels = [], []
data_dir = os.path.join(path, "imbalanced_dataset")
for idx, cls in enumerate(classes):
cls_dir = os.path.join(data_dir, cls)
for f in os.listdir(cls_dir):
if f.endswith(".dcm"):
filepaths.append(os.path.join(cls_dir, f))
labels.append(idx)
Train / Validation Split:
train_files, val_files, train_labels, val_labels = train_test_split(
filepaths, labels, test_size=0.2, stratify=labels, random_state=42
)
print("Train size:", len(train_files))
print("Validation size:", len(val_files))
Validation size: 3700
3. Hybrid Dataset & Radiomics Extraction
This class handles the dual-stream input: Resized DICOM pixel arrays for the CNN, and statistical features (Mean, Std, Min, Max) for the Radiomics stream.
class HybridPETDataset(Dataset):
def __init__(self, filepaths, labels):
self.filepaths = filepaths
self.labels = labels
def __len__(self):
return len(self.filepaths)
def __getitem__(self, idx):
filepath = self.filepaths[idx]
label = self.labels[idx]
img_dcm = pydicom.dcmread(filepath)
img = img_dcm.pixel_array.astype(np.float32)
if img.ndim == 3:
img = img.mean(axis=-1)
img = cv2.resize(img, (64,64))
img = (img - img.min()) / (img.max() - img.min() + 1e-6)
# Radiomics
mask_path = filepath.replace(".dcm", "_mask.nii")
if os.path.exists(mask_path):
mask_itk = sitk.ReadImage(mask_path)
mask_array = sitk.GetArrayFromImage(mask_itk)
masked_pixels = img[mask_array > 0] if np.any(mask_array > 0) else img.flatten()
else:
masked_pixels = img.flatten()
rad_vec = np.array([
masked_pixels.mean(),
masked_pixels.std(),
masked_pixels.min(),
masked_pixels.max()
], dtype=np.float32)
return torch.tensor(img).unsqueeze(0).float(), torch.tensor(rad_vec).float(), torch.tensor(label).long()
4. Hybrid CNN + Radiomics Architecture
The model concatenates flattened CNN features with processed radiomics vectors before the final classification layers.
class HybridCNNRadiomics(nn.Module):
def __init__(self, radiomics_dim, num_classes=4):
super().__init__()
self.conv1 = nn.Conv2d(1,32,3,padding=1)
self.conv2 = nn.Conv2d(32,64,3,padding=1)
self.pool = nn.MaxPool2d(2,2)
self.rad_fc1 = nn.Linear(radiomics_dim,128)
self.fc1 = nn.Linear(64*32*32 + 128,256)
self.fc2 = nn.Linear(256,num_classes)
def forward(self, x_img, x_rad):
x = F.relu(self.conv1(x_img))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(x.size(0), -1)
r = F.relu(self.rad_fc1(x_rad))
combined = torch.cat((x,r), dim=1)
return self.fc2(F.relu(self.fc1(combined)))
5. Model Training
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
criterion = nn.CrossEntropyLoss()
for epoch in range(15):
model.train()
# ... training loop logic ...
print(f"Epoch {epoch+1}: Loss = {avg_loss:.4f}")
Epoch 5: Loss = 0.0600
Epoch 10: Loss = 0.0193
Epoch 15: Loss = 0.0040
6. Final Evaluation (ROC-AUC)
The final performance is measured using the Area Under the Receiver Operating Characteristic Curve (ROC-AUC) using a One-vs-Rest (OvR) strategy.
model.eval()
y_true, y_scores = [], []
with torch.no_grad():
for imgs, radiomics, labels in val_loader:
out = model(imgs.to(device), radiomics.to(device))
prob = torch.softmax(out, dim=1)
y_true.extend(labels.numpy())
y_scores.extend(prob.cpu().numpy())
roc_auc = roc_auc_score(y_true_bin, y_scores_np, multi_class='ovr')
print("ROC-AUC:", roc_auc)