Source code for UM2N.loader.dataset

# Author: Chunyang Wang
# GitHub Username: acse-cw1722

import glob
import os
import sys

import numpy as np
import torch
from torch.utils.data import DataLoader, Dataset
from torch_geometric.data import Data

cur_dir = os.path.dirname(__file__)
sys.path.append(cur_dir)
from cluster_utils import get_new_edges  # noqa

# from torch_geometric.loader import DataLoader as geoDataLoader

__all__ = ["MeshDataset", "MeshLoader", "MeshData", "normalise", "AggreateDataset"]


[docs] class AggreateDataset(Dataset): """Aggregate multiple datasets into a single dataset. Attributes: datasets (list): List of datasets. datasets_len (list): Length of each dataset in `datasets`. """ def __init__(self, datasets): self.datasets = datasets self.datasets_len = [len(dataset) for dataset in datasets] def __len__(self): """Return the total number of samples in all datasets.""" return sum(self.datasets_len) def __getitem__(self, idx): """Fetch an individual data point from the aggregated dataset. Args: idx (int): The index of the sample to fetch. Returns: tuple: The sample fetched from one of the aggregated datasets. """ dataset_idx = 0 while idx >= self.datasets_len[dataset_idx]: idx -= self.datasets_len[dataset_idx] dataset_idx += 1 return self.datasets[dataset_idx][idx]
[docs] class MeshDataset(Dataset): """Dataset for mesh-based data. Attributes: x_feature (list): List of feature names for node features. mesh_feature (list): List of feature names for mesh features. conv_feature (list): List of feature names for convolution features. file_names (list): List of filenames containing mesh data. """ def __init__( self, file_dir, transform=None, target_transform=None, x_feature=[ "coord", "bd_mask", "bd_left_mask", "bd_right_mask", "bd_down_mask", "bd_up_mask", ], mesh_feature=[ "coord", "u", ], conv_feature=[ "conv_uh", ], conv_feature_fix=[ "conv_uh_fix", ], load_analytical=False, load_jacobian=False, use_cluster=False, use_run_time_cluster=False, r=0.35, M=25, dist_weight=False, add_nei=True, ): # x feature contains the coordiate related features self.x_feature = x_feature # mesh feature is used to construct the edge realted features self.mesh_feature = mesh_feature # conv_feat, which is passed to a cnn list self.conv_feature = conv_feature # conv_feat_fix, which is passed to a cnn list self.conv_feature_fix = conv_feature_fix self.file_dir = file_dir file_path = os.path.join(self.file_dir, "data_*.npy") self.file_names = glob.glob(file_path) self.file_names = sorted( self.file_names, key=lambda x: int(x.split("_")[-1].split(".")[0]) ) self.transform = transform self.target_transform = target_transform # if True, load the params used to generate the data self.load_analytical = load_analytical # if True, load the jacobian and jacobian det self.load_jacobian = load_jacobian # if True, use the cluster to sample the neighbors self.use_cluster = use_cluster # if True, use the run time cluster to sample the neighbors self.use_run_time_cluster = use_run_time_cluster # params for run time cluster self.r = r self.M = M self.dist_weight = dist_weight self.add_nei = add_nei # load phi of the MA solution
[docs] def get_x_feature(self, data): """ Extracts and concatenates the x_features for each node from the data. Args: data (dict): The data dictionary loaded from a .npy file. Returns: tensor: The concatenated x_features for each node. """ x_list = [] for key in self.x_feature: feat = data.item().get(key) if len(feat.shape) == 1: feat = feat.reshape(-1, 1) x_list.append(feat) x = np.concatenate(x_list, axis=1) x = torch.from_numpy(x).float() return x
[docs] def get_mesh_feature(self, data): """ Extracts and concatenates the mesh_features from the data. Args: data (dict): The data dictionary loaded from a .npy file. Returns: tensor: The concatenated mesh_features. """ mesh_list = [] for key in self.mesh_feature: feat = data.item().get(key) if len(feat.shape) == 1: feat = feat.reshape(-1, 1) mesh_list.append(feat) mesh = np.concatenate(mesh_list, axis=1) mesh = torch.from_numpy(mesh).float() return mesh
[docs] def get_conv_feature(self, data): """ Extracts and concatenates the conv_features from the data. Args: data (dict): The data dictionary loaded from a .npy file. Returns: tensor: The concatenated conv_features. """ conv_list = [] for key in self.conv_feature: feat = data.item().get(key) conv_list.append(feat) conv = np.concatenate(conv_list, axis=0) conv = torch.from_numpy(conv).float() return conv
[docs] def get_conv_feature_fix(self, data): """ Extracts and concatenates the conv_features from the data. Args: data (dict): The data dictionary loaded from a .npy file. Returns: tensor: The concatenated conv_features. """ conv_list = [] for key in self.conv_feature_fix: feat = data.item().get(key) conv_list.append(feat) conv = np.concatenate(conv_list, axis=0) conv = torch.from_numpy(conv).float() return conv
def __len__(self): return len(self.file_names) def __getitem__(self, idx): """ Loads and returns a mesh data sample and its target from a .npy file. Args: idx (int): The index of the .npy file to load. Returns: MeshData: A MeshData object containing the sample and target. """ data_path = self.file_names[idx] data = np.load(data_path, allow_pickle=True) num_nodes = torch.tensor([data.item().get("x").shape[0]]) # advance version train_data = MeshData( x=self.get_x_feature(data), # noqa: x here is the coordinate related features bd_mask=torch.from_numpy(data.item().get("bd_mask")).int(), conv_feat=self.get_conv_feature(data), # conv_feat_fix=self.get_conv_feature_fix(data), conv_feat_fix=self.get_conv_feature(data), mesh_feat=self.get_mesh_feature(data), edge_index=torch.from_numpy(data.item().get("edge_index_bi")).to( torch.int64 ), y=torch.from_numpy(data.item().get("y")).float(), face=( torch.from_numpy(data.item().get("face_idxs")).to(torch.long).T if data.item().get("face_idxs") is not None else None ), # noqa: E501 phi=( torch.from_numpy(data.item().get("phi")).float() if data.item().get("phi") is not None else None ), # noqa: E501 grad_phi=( torch.from_numpy(data.item().get("grad_phi")).float() if data.item().get("grad_phi") is not None else None ), # noqa: E501 f=( torch.from_numpy(data.item().get("f")).float() if data.item().get("f") is not None else None ), # noqa monitor_val=( torch.from_numpy(data.item().get("monitor_val")).float() if data.item().get("monitor_val") is not None else None ), # noqa: E501 node_num=num_nodes, poly_mesh=( data.item().get("poly_mesh") if data.item().get("poly_mesh") is not None else False ), # noqa: E501 ) if self.load_analytical: train_data.dist_params = { "σ_x": data.item().get("σ_x"), "σ_y": data.item().get("σ_y"), "μ_x": data.item().get("μ_x"), "μ_y": data.item().get("μ_y"), "z": data.item().get("z"), "w": data.item().get("w"), "simple_u": data.item().get("use_iso"), "n_dist": data.item().get("n_dist"), } if self.load_jacobian: train_data.jacobian = torch.from_numpy(data.item().get("jacobian")) train_data.jacobian_det = torch.from_numpy(data.item().get("jacobian_det")) if self.transform: train_data = self.transform(train_data) if self.use_cluster: train_data.edge_index = data.item().get("cluster_edges").to(torch.int64) # noqa if self.use_run_time_cluster: train_data.edge_index = get_new_edges( num_nodes, train_data.x[:, :2], train_data.edge_index, r=self.r, M=self.M, dist_weight=self.dist_weight, add_nei=self.add_nei, ) return train_data
[docs] class MeshData(Data): """ Custom PyTorch Data object designed to handle mesh data features.P This class is intended to be used as the base class of data samples returned by the MeshDataset. """ def __cat_dim__(self, key, value, *args, **kwargs): # conv_feat is feeded into cnn, so another dim is needed if key == "conv_feat": return None if key == "conv_feat_fix": return None if key == "node_num": return None return super().__cat_dim__(key, value, *args, **kwargs)
[docs] def MeshLoader(dataset, batch_size=10, shuffle=True): def collate_fn(batch): return [item for item in batch] return DataLoader( dataset, batch_size=batch_size, shuffle=shuffle, collate_fn=collate_fn )
[docs] def normalise(data): """ Normalizes the mesh and convolution features of a given MeshData object. Args: data (MeshData): The MeshData object containing features to normalize. Returns: MeshData: The MeshData object with normalized features. """ # normalise mesh feature (only last dims, first 2 dim is coordinate) # Compute minimum and maximum values along the second axis mesh_val_feat = data.mesh_feat[:, 2:] # value feature (no coord) min_val = torch.min(mesh_val_feat, dim=0).values max_val = torch.max(mesh_val_feat, dim=0).values max_abs_val = torch.max(torch.abs(min_val), torch.abs(max_val)) data.mesh_feat[:, 2:] = data.mesh_feat[:, 2:] / max_abs_val # normalise conv feature # that is, uh and hessian norm conv_feat_shape = data.conv_feat.shape conv_feat = data.conv_feat conv_feat = conv_feat.reshape(conv_feat_shape[0], -1) min_val = torch.min(conv_feat, dim=1).values max_val = torch.max(conv_feat, dim=1).values max_abs_val = torch.max(torch.abs(min_val), torch.abs(max_val)) max_abs_val = max_abs_val.reshape(-1, 1) conv_feat[:, :] = conv_feat[:, :] / max_abs_val[:, :] data.conv_feat = conv_feat.reshape(conv_feat_shape) # normalise conv_fix feature conv_feat_fix_shape = data.conv_feat_fix.shape conv_feat_fix = data.conv_feat_fix conv_feat_fix = conv_feat_fix.reshape(conv_feat_fix_shape[0], -1) min_val = torch.min(conv_feat_fix, dim=1).values max_val = torch.max(conv_feat_fix, dim=1).values max_abs_val = torch.max(torch.abs(min_val), torch.abs(max_val)) max_abs_val = max_abs_val.reshape(-1, 1) conv_feat_fix[:, :] = conv_feat_fix[:, :] / max_abs_val[:, :] data.conv_feat_fix = conv_feat_fix.reshape(conv_feat_fix_shape) return data