Source code for pept.tracking.peptml.minpoints

#!/usr/bin/env python3
# -*- coding: utf-8 -*-


#    pept is a Python library that unifies Positron Emission Particle
#    Tracking (PEPT) research, including tracking, simulation, data analysis
#    and visualisation tools.
#
#    If you used this codebase or any software making use of it in a scientific
#    publication, you must cite the following paper:
#        Nicuşan AL, Windows-Yule CR. Positron emission particle tracking
#        using machine learning. Review of Scientific Instruments.
#        2020 Jan 1;91(1):013329.
#        https://doi.org/10.1063/1.5129251
#
#    Copyright (C) 2019-2021 the pept developers
#
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <https://www.gnu.org/licenses/>.


# File   : minpoints.py
# License: GNU v3.0
# Author : Andrei Leonard Nicusan <a.l.nicusan@bham.ac.uk>
# Date   : 20.10.2020


import  numpy               as      np

import  pept
from    .cutpoints          import  get_cutoffs


def find_minpoints(
    sample_lines,
    num_lines,
    max_distance,
    cutoffs = None,
    append_indices = False
):
    '''Compute the minimum distance points (MDPs) from all combinations of
    `num_lines` lines given in an array of lines `sample_lines`.

    Given a sample of lines, this functions computes the minimum distance
    points (MDPs) for every possible combination of `num_lines` lines. The
    returned numpy array contains all MDPs that satisfy the following:

    1. Are within the `cutoffs`.
    2. Are closer to all the constituent LoRs than `max_distance`.

    Parameters
    ----------
    sample_lines : (M, N) numpy.ndarray
        A 2D array of lines, where each line is defined by two points such that
        every row is formatted as `[t, x1, y1, z1, x2, y2, z2, etc.]`. It
        *must* have at least 2 lines and the combination size `num_lines`
        *must* be smaller or equal to the number of lines. Put differently:
        2 <= num_lines <= len(sample_lines).

    num_lines : int
        The number of lines in each combination of LoRs used to compute the
        MDP. This function considers every combination of `numlines` from the
        input `sample_lines`. It must be smaller or equal to the number of
        input lines `sample_lines`.

    max_distance : float
        The maximum allowed distance between an MDP and its constituent lines.
        If any distance from the MDP to one of its lines is larger than
        `max_distance`, the MDP is thrown away.

    cutoffs : (6,) numpy.ndarray, optional
        An array of spatial cutoff coordinates with *exactly 6 elements* as
        [x_min, x_max, y_min, y_max, z_min, z_max]. If any MDP lies outside
        this region, it is thrown away. If it is `None`, they are computed
        automatically by calling `get_cutoffs`. The default is `None`.

    append_indices : bool, default False
        A boolean specifying whether to include the indices of the lines used
        to compute each MDP. If `False`, the output array will only contain the
        [time, x, y, z] of the MDPs. If `True`, the output array will have
        extra columns [time, x, y, z, line_idx(1), ..., line_idx(n)] where
        n = `num_lines`.

    Returns
    -------
    minpoints : (M, N) numpy.ndarray
        A 2D array of `float`s containing the time and coordinates of the MDPs
        [time, x, y, z]. The time is computed as the average of the constituent
        lines. If `append_indices` is `True`, then `num_lines` indices of the
        constituent lines are appended as extra columns:
        [time, x, y, z, line_idx1, line_idx2, ..]. The first column (for time)
        is sorted.

    Raises
    ------
    ValueError
        If `sample_lines` is not a numpy array with shape (N, M >= 7).

    ValueError
        If 2 <= num_lines <= len(sample_lines) is not satisfied.

    ValueError
        If `cutoffs` is not a one-dimensional array with values
        `[min_x, max_x, min_y, max_y, min_z, max_z]`

    See Also
    --------
    pept.tracking.peptml.Minpoints : Compute minpoints from `pept.LineData`.
    pept.utilities.read_csv : Fast CSV file reading into numpy arrays.
    '''

    if not isinstance(sample_lines, pept.LineData):
        sample_lines = pept.LineData(sample_lines)

    lines = sample_lines.lines

    lines = np.asarray(lines, order = 'C', dtype = float)

    num_lines = int(num_lines)
    max_distance = float(max_distance)

    if cutoffs is None:
        cutoffs = get_cutoffs(sample_lines)
    else:
        cutoffs = np.asarray(cutoffs, order = 'C', dtype = float)
        if cutoffs.ndim != 1 or len(cutoffs) != 6:
            raise ValueError((
                "\n[ERROR]: cutoffs should be a one-dimensional array with "
                "values [min_x, max_x, min_y, max_y, min_z, max_z]. Received "
                f"{cutoffs}.\n"
            ))

    sample_minpoints = pept.utilities.find_minpoints(
        lines,
        num_lines,
        max_distance,
        cutoffs,
        append_indices = append_indices
    )

    columns = ["t", "x", "y", "z"]
    if append_indices:
        columns += [f"line_index{i + 1}" for i in range(num_lines)]

    points = pept.PointData(sample_minpoints, columns = columns)

    # Add optional metadata to the points; because they have an underscore,
    # they won't be propagated when new objects are constructed
    points._max_distance = max_distance
    points._cutoffs = cutoffs
    points._num_lines = num_lines

    if append_indices:
        points._lines = sample_lines

    return points




[docs]class Minpoints(pept.base.LineDataFilter): '''Transform LoRs (a `pept.LineData` instance) into *minpoints* (a `pept.PointData` instance) for clustering, in parallel. Given a sample of lines, the minpoints are the minimum distance points (MDPs) for every possible combination of `num_lines` lines that satisfy the following conditions: 1. Are within the `cutoffs`. 2. Are closer to all the constituent LoRs than `max_distance`. Under typical usage, the `Minpoints` class is initialised with a `pept.LineData` instance, automatically calculating the minpoints from the samples of lines. The `Minpoints` class inherits from `pept.PointData`, such that once the cutpoints have been computed, all the methods from the parent class `pept.PointData` can be used on them (such as visualisation functionality). For more control over the operations, `pept.tracking.peptml.find_minpoints` can be used - it receives a generic numpy array of LoRs (one 'sample') and returns a numpy array of cutpoints. Attributes ---------- line_data : instance of pept.LineData The LoRs for which the cutpoints will be computed. It must be an instance of `pept.LineData`. num_lines : int The number of lines in each combination of LoRs used to compute the MDP. This function considers every combination of `num_lines` from the input `sample_lines`. It must be smaller or equal to the number of input lines `sample_lines`. max_distance : float The maximum allowed distance between an MDP and its constituent lines. If any distance from the MDP to one of its lines is larger than `max_distance`, the MDP is thrown away. A good starting value would be 0.1 mm for small tracers and/or clean data, or 0.2 mm for larger tracers and/or noisy data. cutoffs : list-like of length 6 A list (or equivalent) of the cutoff distances for every axis, formatted as `[x_min, x_max, y_min, y_max, z_min, z_max]`. Only the minpoints which fall within these cutoff distances are considered. The default is None, in which case they are automatically computed using `pept.tracking.peptml.get_cutoffs`. See Also -------- pept.LineData : Encapsulate LoRs for ease of iteration and plotting. pept.tracking.peptml.HDBSCANClusterer : Efficient, parallel HDBSCAN-based clustering of cutpoints. pept.scanners.ParallelScreens : Read in and initialise a `pept.LineData` instance from parallel screens PET/PEPT detectors. pept.utilities.read_csv : Fast CSV file reading into numpy arrays. Examples -------- Compute the minpoints for a `LineData` instance for all triplets of lines that are less than 0.1 from those lines: >>> line_data = pept.LineData(example_data) >>> minpts = peptml.Minpoints(line_data, 3, 0.1) Compute the minpoints for a single sample: >>> sample = line_data[0] >>> cutpts_sample = peptml.find_minpoints(sample, 3, 0.1) '''
[docs] def __init__( self, num_lines, max_distance, cutoffs = None, append_indices = False, ): '''Minpoints class constructor. Parameters ---------- num_lines : int The number of lines in each combination of LoRs used to compute the MDP. This function considers every combination of `num_lines` from the input `sample_lines`. It must be smaller or equal to the number of input lines `sample_lines`. max_distance : float The maximum allowed distance between an MDP and its constituent lines. If any distance from the MDP to one of its lines is larger than `max_distance`, the MDP is thrown away. A good starting value would be 0.1 mm for small tracers and/or clean data, or 0.2 mm for larger tracers and/or noisy data. cutoffs : list-like of length 6, optional A list (or equivalent) of the cutoff distances for every axis, formatted as `[x_min, x_max, y_min, y_max, z_min, z_max]`. Only the minpoints which fall within these cutoff distances are considered. The default is None, in which case they are automatically computed using `pept.tracking.peptml.get_cutoffs`. append_indices : bool, default False If set to `True`, the indices of the individual LoRs that were used to compute each minpoint are also appended to the returned array. Raises ------ TypeError If `line_data` is not an instance of `pept.LineData`. ValueError If 2 <= num_lines <= len(sample_lines) is not satisfied. ValueError If `cutoffs` is not a one-dimensional array with values formatted as `[min_x, max_x, min_y, max_y, min_z, max_z]`. ''' # Setting class attributes. The ones below call setters which do type # checking self.num_lines = num_lines self.max_distance = max_distance self.cutoffs = cutoffs self.append_indices = append_indices
@property def num_lines(self): return self._num_lines @num_lines.setter def num_lines(self, num_lines): self._num_lines = int(num_lines) @property def max_distance(self): return self._max_distance @max_distance.setter def max_distance(self, max_distance): self._max_distance = float(max_distance) @property def cutoffs(self): return self._cutoffs @cutoffs.setter def cutoffs(self, cutoffs): if cutoffs is not None: cutoffs = np.asarray(cutoffs, order = 'C', dtype = float) if cutoffs.ndim != 1 or len(cutoffs) != 6: raise ValueError(( "\n[ERROR]: cutoffs should be a one-dimensional array " "with values [min_x, max_x, min_y, max_y, min_z, max_z]. " f"Received {cutoffs}.\n" )) self._cutoffs = cutoffs else: self._cutoffs = None @property def append_indices(self): return self._append_indices @append_indices.setter def append_indices(self, append_indices): self._append_indices = bool(append_indices)
[docs] def fit_sample(self, sample_lines): if not isinstance(sample_lines, pept.LineData): sample_lines = pept.LineData(sample_lines) # If cutoffs were not defined, automatically compute them if self.cutoffs is not None: cutoffs = self.cutoffs else: cutoffs = get_cutoffs(sample_lines.lines) # Only compute minpoints if there are at least num_lines LoRs if len(sample_lines.lines) >= self.num_lines: sample_minpoints = pept.utilities.find_minpoints( sample_lines.lines, self.num_lines, self.max_distance, cutoffs, append_indices = self.append_indices, ) else: ncols = 4 + self.num_lines if self.append_indices else 4 sample_minpoints = np.empty((0, ncols)) # Column names columns = ["t", "x", "y", "z"] if self.append_indices: columns += [f"line_index{i + 1}" for i in range(self.num_lines)] # Encapsulate minpoints in a PointData points = pept.PointData(sample_minpoints, columns = columns) # Add optional metadata to the points; because they have an underscore, # they won't be propagated when new objects are constructed points.attrs["_num_lines"] = self.num_lines points.attrs["_max_distance"] = self.max_distance points.attrs["_cutoffs"] = cutoffs # If LoR indices were appended, also include the constituent LoRs if self.append_indices: points.attrs["_lines"] = sample_lines return points