Source code for metamorphic_relations.Data

import numpy as np
import random


[docs]class Data: """ Stores the data :param train_x: a numpy array, the first dimension is the index of elements for training :param train_y: a 1D numpy array of label indices for training :param test_x: a numpy array, the first dimension is the index of elements for testing :param test_y: a 1D numpy array of label indices for testing :param max_y: the largest y value possible """ def __init__(self, train_x: np.array, train_y: np.array, test_x: np.array, test_y: np.array, max_y: int): self.train_x = train_x self.train_y = train_y self.test_x = test_x self.test_y = test_y self.train = (train_x, train_y) self.test = (test_x, test_y) self.max_y = max_y self.shuffle_train()
[docs] def update_train(self, train_x: np.array, train_y: np.array): """ Updates the training data :param train_x: new training x data :param train_y: new training y data """ if len(train_x) != len(train_y): raise Exception("train_x and train_y must be of the same length") self.train_x = train_x self.train_y = train_y self.train = (train_x, train_y)
[docs] def shuffle_train(self): """ Rearranges the order of the training data """ train_data = list(zip(self.train_x, self.train_y)) random.shuffle(train_data) train_x = [train[0] for train in train_data] train_y = [train[1] for train in train_data] train_x = np.array(train_x) train_y = np.array(train_y) self.update_train(train_x, train_y)
[docs] def update_test(self, test_x: np.array, test_y: np.array): """ Updates the testing data :param test_x: new testing x data :param test_y: new testing y data """ if len(test_x) != len(test_y): raise Exception("test_x and test_y must be of the same length") self.test_x = test_x self.test_y = test_y self.train = (test_x, test_y)
[docs] @staticmethod def concat_lists(lists: list[tuple[np.array, np.array]]) -> tuple[np.array, np.array]: """ Takes a list of pairs of numpy arrays of xs and ys and makes them a single xs and ys list :param lists: a numpy array of pairs of numpy arrays of xs and ys e.g. [[xs1, ys1], [xs2, ys2]] :return: a tuple of xs and ys e.g. (xs1 + xs2, ys1 + ys2) """ if len(lists[0]) != 2: raise Exception("The input must have exactly 2 elements in the second dimension") xs = np.zeros(tuple([0] + list(lists[0][0].shape)[1:])) ys = np.zeros((0,), dtype=int) for i in range(len(lists)): if len(lists[i][0]) != len(lists[i][1]): raise Exception("Each pair of xs and ys must have the same length") xs = np.concatenate((xs, lists[i][0])) ys = np.concatenate((ys, lists[i][1])) return np.array(xs), np.array(ys)
[docs] @staticmethod def group_by_label(y: np.array, max_y: int) -> list[list[int]]: """ Groups an array of ints by their values. E.g. ([3, 3, 2, 3, 1, 0], 5) -> [[5], [4], [2], [0, 1, 3], []] :param y: a numpy array of ints :param max_y: the maximum possible number of values :return: a list of y indices for each possible y value """ group_indices = [[] for _ in range(max_y)] for i in range(y.shape[0]): if y[i] < 0: raise Exception("y values must be positive") elif y[i] > max_y: raise Exception("max y must be at least as large as the largest y value given") group_indices[y[i]].append(i) return group_indices
[docs] def get_train_subset(self, i_min: int = 0, i_max: int = 9999999) -> tuple[np.array, np.array]: """ Gets a subset of the training data :param i_min: the lower bound index (inclusive) :param i_max: the upper bound index (not inclusive) :return: train_x_subset, train_y_subset """ return self.train_x[i_min:i_max], self.train_y[i_min:i_max]