Source code for pept.utilities.misc.aggregate

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# File   : aggregate.py
# License: GNU v3.0
# Author : Andrei Leonard Nicusan <a.l.nicusan@bham.ac.uk>
# Date   : 10.06.2020


import numpy as np


[docs]def group_by_column(data_array, column_to_separate): '''Group the rows in a 2D `data_array` based on the unique values in a given `column_to_separate`, returning the groups as a list of numpy arrays. Parameters ---------- data_array : (M, N) numpy.ndarray A generic 2D numpy array-like (will be converted using numpy.asarray). column_to_separate : int The column index in `data_array` from which the unique values will be used for grouping. Returns ------- groups : list of numpy.ndarray A list whose elements are 2D numpy arrays - these are sub-arrays from `data_array` for which the entries in the column `column_to_separate` are the same. Raises ------ ValueError If data_array does not have exactly 2 dimensions. Examples -------- Separate a 6x3 numpy array based on the last column: >>> x = np.array([ >>> [1, 2, 1], >>> [5, 3, 1], >>> [1, 1, 2], >>> [5, 2, 1], >>> [2, 4, 2] >>> ]) >>> x_sep = pept.utilities.group_by_column(x, -1) >>> x_sep >>> [array([[1, 2, 1], >>> [5, 3, 1], >>> [5, 2, 1]]), >>> array([[1, 1, 2], >>> [2, 4, 2]])] ''' data_array = np.asarray(data_array) if data_array.ndim != 2: raise ValueError(( "\n[ERROR]: `data_array` should have exactly 2 dimensions. " f"Received {data_array} with {data_array.ndim} dimensions.\n" )) data_col = data_array[:, column_to_separate] labels = np.unique(data_col) groups = [data_array[data_col == label] for label in labels] return groups