| | import torch.utils.data as data |
| | import os |
| | import os.path |
| | import torch |
| | import numpy as np |
| | import pandas as pd |
| | import sys |
| | import pickle |
| | import time |
| | import torchvision.datasets as datasets |
| | import torchvision.transforms as transforms |
| | from PIL import Image |
| | from typing import Any, Callable, cast, Dict, List, Optional, Tuple, Union |
| | from torchvision.datasets import VisionDataset |
| | from torch.utils.data import Dataset |
| | from datetime import date, timedelta,datetime |
| | import random |
| | import pickle as pkl |
| | import string |
| |
|
| | valid_chars = 'EFHILOTUYZ' |
| |
|
| | alphabetic_labels = [char1 + char2 for char1 in valid_chars for char2 in valid_chars] |
| | alphabetic_labels.sort() |
| | label_mapping = {label: idx for idx, label in enumerate(alphabetic_labels)} |
| | reverse_label_mapping = {v: k for k, v in label_mapping.items()} |
| |
|
| | single_alphabetic_labels=[char1 for char1 in valid_chars] |
| | single_alphabetic_labels.sort() |
| | single_label_mapping = {label: idx for idx, label in enumerate(single_alphabetic_labels)} |
| | single_reverse_label_mapping = {v: k for k, v in single_label_mapping.items()} |
| |
|
| | def get_mnist_dataset(data_dir='data/multi_mnist.pkl',Seed=0,test_ratio=0.2): |
| |
|
| | random.seed(Seed) |
| | torch.manual_seed(Seed) |
| | np.random.seed(Seed) |
| |
|
| | with open(data_dir, 'rb') as f: |
| | dataset = pkl.load(f) |
| | for entry in dataset: |
| | entry.y -= 10 |
| | |
| | np.random.shuffle(dataset) |
| | val_test_split = int(np.around( test_ratio * len(dataset) )) |
| | train_val_split = int(len(dataset)-2*val_test_split) |
| | train_ds = dataset[:train_val_split] |
| | val_ds = dataset[train_val_split:train_val_split+val_test_split] |
| | test_ds = dataset[train_val_split+val_test_split:] |
| |
|
| | print(data_dir) |
| | print('Train: ' +str(len(train_ds))) |
| | print('Val : ' +str(len(val_ds))) |
| | print('Test : ' +str(len(test_ds))) |
| | |
| | return train_ds,val_ds,test_ds |
| |
|
| | def get_building_dataset(data_dir='data/building_with_index.pkl',Seed=0,test_ratio=0.2): |
| |
|
| | random.seed(Seed) |
| | torch.manual_seed(Seed) |
| | np.random.seed(Seed) |
| |
|
| | with open(data_dir, 'rb') as f: |
| | dataset = pkl.load(f) |
| | for entry in dataset: |
| | entry.y = label_mapping[entry.y] |
| | |
| | np.random.shuffle(dataset) |
| | val_test_split = int(np.around( test_ratio * len(dataset) )) |
| | train_val_split = int(len(dataset)-2*val_test_split) |
| | train_ds = dataset[:train_val_split] |
| | val_ds = dataset[train_val_split:train_val_split+val_test_split] |
| | test_ds = dataset[train_val_split+val_test_split:] |
| |
|
| | print(data_dir) |
| | print('Train: ' +str(len(train_ds))) |
| | print('Val : ' +str(len(val_ds))) |
| | print('Test : ' +str(len(test_ds))) |
| | |
| | return train_ds,val_ds,test_ds |
| |
|
| | def get_mbuilding_dataset(data_dir='data/mp_building.pkl',Seed=0,test_ratio=0.2): |
| |
|
| | random.seed(Seed) |
| | torch.manual_seed(Seed) |
| | np.random.seed(Seed) |
| |
|
| | with open(data_dir, 'rb') as f: |
| | dataset = pkl.load(f) |
| | for entry in dataset: |
| | entry.y = label_mapping[entry.y] |
| | |
| | np.random.shuffle(dataset) |
| | val_test_split = int(np.around( test_ratio * len(dataset) )) |
| | train_val_split = int(len(dataset)-2*val_test_split) |
| | train_ds = dataset[:train_val_split] |
| | val_ds = dataset[train_val_split:train_val_split+val_test_split] |
| | test_ds = dataset[train_val_split+val_test_split:] |
| |
|
| | print(data_dir) |
| | print('Train: ' +str(len(train_ds))) |
| | print('Val : ' +str(len(val_ds))) |
| | print('Test : ' +str(len(test_ds))) |
| | |
| | return train_ds,val_ds,test_ds |
| |
|
| | def get_sbuilding_dataset(data_dir='data/single_building.pkl',Seed=0,test_ratio=0.2): |
| |
|
| | random.seed(Seed) |
| | torch.manual_seed(Seed) |
| | np.random.seed(Seed) |
| |
|
| | with open(data_dir, 'rb') as f: |
| | dataset = pkl.load(f) |
| | for entry in dataset: |
| | entry.y = single_label_mapping[entry.y] |
| | |
| | np.random.shuffle(dataset) |
| | val_test_split = int(np.around( test_ratio * len(dataset) )) |
| | train_val_split = int(len(dataset)-2*val_test_split) |
| | train_ds = dataset[:train_val_split] |
| | val_ds = dataset[train_val_split:train_val_split+val_test_split] |
| | test_ds = dataset[train_val_split+val_test_split:] |
| |
|
| | print(data_dir) |
| | print('Train: ' +str(len(train_ds))) |
| | print('Val : ' +str(len(val_ds))) |
| | print('Test : ' +str(len(test_ds))) |
| | |
| | return train_ds,val_ds,test_ds |
| |
|
| | def get_smnist_dataset(data_dir='data/single_mnist.pkl',Seed=0,test_ratio=0.2): |
| |
|
| | random.seed(Seed) |
| | torch.manual_seed(Seed) |
| | np.random.seed(Seed) |
| |
|
| | with open(data_dir, 'rb') as f: |
| | dataset = pkl.load(f) |
| | |
| | np.random.shuffle(dataset) |
| | val_test_split = int(np.around( test_ratio * len(dataset) )) |
| | train_val_split = int(len(dataset)-2*val_test_split) |
| | train_ds = dataset[:train_val_split] |
| | val_ds = dataset[train_val_split:train_val_split+val_test_split] |
| | test_ds = dataset[train_val_split+val_test_split:] |
| |
|
| | print(data_dir) |
| | print('Train: ' +str(len(train_ds))) |
| | print('Val : ' +str(len(val_ds))) |
| | print('Test : ' +str(len(test_ds))) |
| | |
| | return train_ds,val_ds,test_ds |
| |
|
| | def get_dbp_dataset(data_dir='data/triple_building.pkl',Seed=0,test_ratio=0.2): |
| |
|
| | random.seed(Seed) |
| | torch.manual_seed(Seed) |
| | np.random.seed(Seed) |
| |
|
| | with open(data_dir, 'rb') as f: |
| | dataset = pkl.load(f) |
| | for entry in dataset: |
| | entry.y = 1 if entry.y>=1 else 0 |
| | |
| | np.random.shuffle(dataset) |
| | val_test_split = int(np.around( test_ratio * len(dataset) )) |
| | train_val_split = int(len(dataset)-2*val_test_split) |
| | train_ds = dataset[:train_val_split] |
| | val_ds = dataset[train_val_split:train_val_split+val_test_split] |
| | test_ds = dataset[train_val_split+val_test_split:] |
| |
|
| | print(data_dir) |
| | print('Train: ' +str(len(train_ds))) |
| | print('Val : ' +str(len(val_ds))) |
| | print('Test : ' +str(len(test_ds))) |
| | |
| | return train_ds,val_ds,test_ds |
| |
|
| | def affine_transform_to_range(ds, target_range=(-1, 1)): |
| | |
| | for item in ds: |
| | min_x = torch.min(item.pos[:,0]) |
| | min_y = torch.min(item.pos[:,1]) |
| | |
| | max_x = torch.max(item.pos[:,0]) |
| | max_y = torch.max(item.pos[:,1]) |
| | |
| | scale_x = (target_range[1] - target_range[0]) / (max_x - min_x) |
| | scale_y = (target_range[1] - target_range[0]) / (max_y - min_y) |
| | translate_x = target_range[0] - min_x * scale_x |
| | translate_y = target_range[0] - min_y * scale_y |
| |
|
| | |
| | item.pos[:,0] = item.pos[:,0] * scale_x + translate_x |
| | item.pos[:,1] = item.pos[:,1] * scale_y + translate_y |
| | return ds |
| |
|
| | class CustomDataset(Dataset): |
| | def __init__(self, data_list): |
| | super(CustomDataset, self).__init__() |
| | self.data_list = data_list |
| | |
| | def len(self): |
| | return len(self.data_list) |
| | |
| | def get(self, idx): |
| | return self.data_list[idx] |
| | |
| | if __name__ == '__main__': |
| | a,b,c=get_mnist_dataset() |
| | print("") |