#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# pept is a Python library that unifies Positron Emission Particle
# Tracking (PEPT) research, including tracking, simulation, data analysis
# and visualisation tools.
#
# If you used this codebase or any software making use of it in a scientific
# publication, you must cite the following paper:
# Nicuşan AL, Windows-Yule CR. Positron emission particle tracking
# using machine learning. Review of Scientific Instruments.
# 2020 Jan 1;91(1):013329.
# https://doi.org/10.1063/1.5129251
#
# Copyright (C) 2019-2021 the pept developers
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
# File : cutpoints.py
# License: GNU v3.0
# Author : Andrei Leonard Nicusan <a.l.nicusan@bham.ac.uk>
# Date : 13.04.2020
import numpy as np
import pept
def find_cutpoints(
sample_lines,
max_distance,
cutoffs = None,
append_indices = False
):
'''Find the cutpoints from a sample / array of LoRs.
A cutpoint is the point in 3D space that minimises the distance between any
two lines. For any two non-parallel 3D lines, this point corresponds to the
midpoint of the unique segment that is perpendicular to both lines.
This function considers every pair of lines in `sample_lines` and returns
all the cutpoints that satisfy the following conditions:
1. The distance between the two lines is smaller than `max_distance`.
2. The cutpoint is within the `cutoffs`.
Parameters
----------
sample_lines : (N, M >= 7) numpy.ndarray
A sample of LoRs, where each row is `[time, x1, y1, z1, x2, y2, z2]`,
such that every line is defined by the points `[x1, y1, z1]` and
`[x2, y2, z2]`.
max_distance : float
The maximum distance between any two lines for their cutpoint to be
considered. A good starting value would be 0.1 mm for small tracers
and/or clean data, or 0.2 mm for larger tracers and/or noisy data.
cutoffs : list, optional
The cutoffs for each dimension, formatted as `[x_min, x_max,
y_min, y_max, z_min, z_max]`. If it is `None`, they are computed
automatically by calling `get_cutoffs`. The default is `None`.
append_indices : bool, optional
If set to `True`, the indices of the individual LoRs that were used
to compute each cutpoint are also appended to the returned array.
Default is `False`.
Returns
-------
cutpoints : (M, 4) or (M, 6) numpy.ndarray
A numpy array of the calculated cutpoints. If `append_indices` is
`False`, then the columns are [time, x, y, z]. If `append_indices` is
`True`, then the columns are [time, x, y, z, i, j], where `i` and `j`
are the LoR indices from `sample_lines` that were used to compute the
weighted cutpoints. The time is the average between the timestamps of
the two LoRs that were used to compute the cutpoint. The first column
(for time) is sorted.
Raises
------
ValueError
If `sample_lines` is not a numpy array with shape (N, M >= 7).
ValueError
If `cutoffs` is not a one-dimensional array with values
`[min_x, max_x, min_y, max_y, min_z, max_z]`
See Also
--------
pept.tracking.peptml.Cutpoints : Compute cutpoints from `pept.LineData`.
pept.utilities.read_csv : Fast CSV file reading into numpy arrays.
'''
if not isinstance(sample_lines, pept.LineData):
sample_lines = pept.LineData(sample_lines)
lines = sample_lines.lines
lines = np.asarray(lines, order = 'C', dtype = float)
max_distance = float(max_distance)
# If cutoffs were not defined, automatically compute them
if cutoffs is None:
cutoffs = get_cutoffs(lines)
else:
cutoffs = np.asarray(cutoffs, order = 'C', dtype = float)
if cutoffs.ndim != 1 or len(cutoffs) != 6:
raise ValueError((
"\n[ERROR]: cutoffs should be a one-dimensional array with "
"values [min_x, max_x, min_y, max_y, min_z, max_z]. Received "
f"{cutoffs}.\n"
))
sample_cutpoints = pept.utilities.find_cutpoints(
lines,
max_distance,
cutoffs,
append_indices = append_indices
)
columns = ["t", "x", "y", "z"]
if append_indices:
columns += ["line_index1", "line_index2"]
points = pept.PointData(sample_cutpoints, columns = columns)
# Add optional metadata to the points; because they have an underscore,
# they won't be propagated when new objects are constructed
points._max_distance = max_distance
points._cutoffs = cutoffs
if append_indices:
points._lines = sample_lines
return points
def get_cutoffs(sample):
'''Compute the cutoffs from a sample of LoR data.
It computes the cutoffs from the minimum and maximum values of the LoRs in
`sample` in each dimension (e.g. the x-dimension is defined by data in
columns 1 and 4).
Parameters
----------
sample : (N, M >= 7) numpy.ndarray
A sample of LoRs, where each row is `[time, x1, y1, z1, x2, y2, z2]`,
such that every line is defined by the points `[x1, y1, z1]` and
`[x2, y2, z2]`.
Returns
-------
cutoffs : (6,) numpy.ndarray
The computed cutoffs for each dimension, formatted as
`[x_min, x_max, y_min, y_max, z_min, z_max]`.
Raises
------
ValueError
If `sample` is not a numpy array with shape (N, M >= 7).
See Also
--------
pept.tracking.peptml.Cutpoints : Compute cutpoints from `pept.LineData`.
pept.utilities.read_csv : Fast CSV file reading into numpy arrays.
'''
# Check sample has shape (N, M >= 7)
if sample.ndim != 2 or sample.shape[1] < 7:
raise ValueError((
"\n[ERROR]: `sample_lines` should have dimensions (M, N), "
f" where N >= 7. Received {sample.shape}.\n"
))
# Compute cutoffs for cutpoints as the (min, max) values of the lines
# Minimum value of the two points that define a line
min_x = min(sample[:, 1].min(),
sample[:, 4].min())
# Maximum value of the two points that define a line
max_x = max(sample[:, 1].max(),
sample[:, 4].max())
# Minimum value of the two points that define a line
min_y = min(sample[:, 2].min(),
sample[:, 5].min())
# Maximum value of the two points that define a line
max_y = max(sample[:, 2].max(),
sample[:, 5].max())
# Minimum value of the two points that define a line
min_z = min(sample[:, 3].min(),
sample[:, 6].min())
# Maximum value of the two points that define a line
max_z = max(sample[:, 3].max(),
sample[:, 6].max())
cutoffs = np.array([min_x, max_x, min_y, max_y, min_z, max_z],
dtype = float)
return cutoffs
[docs]class Cutpoints(pept.base.LineDataFilter):
'''Transform LoRs (a `pept.LineData` instance) into *cutpoints* (a
`pept.PointData` instance) for clustering, in parallel.
Under typical usage, the `Cutpoints` class is initialised with a
`pept.LineData` instance, automatically calculating the cutpoints from the
samples of lines. The `Cutpoints` class inherits from `pept.PointData`,
such that once the cutpoints have been computed, all the methods from the
parent class `pept.PointData` can be used on them (such as visualisation
functionality).
For more control over the operations, `pept.tracking.peptml.find_cutpoints`
can be used - it receives a generic numpy array of LoRs (one 'sample') and
returns a numpy array of cutpoints.
Attributes
----------
max_distance : float
The maximum distance between any two lines for their cutpoint to be
considered. A good starting value would be 0.1 mm for small tracers
and/or clean data, or 0.2 mm for larger tracers and/or noisy data.
cutoffs : list-like of length 6
A list (or equivalent) of the cutoff distances for every axis,
formatted as `[x_min, x_max, y_min, y_max, z_min, z_max]`. Only the
cutpoints which fall within these cutoff distances are considered. The
default is None, in which case they are automatically computed using
`pept.tracking.peptml.get_cutoffs`.
See Also
--------
pept.LineData : Encapsulate LoRs for ease of iteration and plotting.
pept.tracking.HDBSCAN : Efficient, parallel HDBSCAN-based clustering of
(cut)points.
pept.read_csv : Fast CSV file reading into numpy arrays.
Examples
--------
Compute the cutpoints for a `LineData` instance between lines that are less
than 0.1 apart:
>>> line_data = pept.LineData(example_data)
>>> cutpts = peptml.Cutpoints(0.1).fit(line_data)
Compute the cutpoints for a single sample:
>>> sample = line_data[0]
>>> cutpts_sample = peptml.Cutpoints(0.1).fit_sample(sample)
'''
[docs] def __init__(
self,
max_distance,
cutoffs = None,
append_indices = False,
):
'''Cutpoints class constructor.
Parameters
----------
max_distance : float
The maximum distance between any two lines for their cutpoint to be
considered. A good starting value would be 0.1 mm for small tracers
and/or clean data, or 0.5 mm for larger tracers and/or noisy data.
cutoffs : list-like of length 6, optional
A list (or equivalent) of the cutoff distances for every axis,
formatted as `[x_min, x_max, y_min, y_max, z_min, z_max]`. Only the
cutpoints which fall within these cutoff distances are considered.
The default is None, in which case they are automatically computed
using `pept.tracking.peptml.get_cutoffs`.
append_indices : bool, default False
If set to `True`, the indices of the individual LoRs that were used
to compute each cutpoint are also appended to the returned array.
Raises
------
ValueError
If `cutoffs` is not a one-dimensional array with values formatted
as `[min_x, max_x, min_y, max_y, min_z, max_z]`.
'''
# Setting class attributes. The ones below call setters which do type
# checking
self.cutoffs = cutoffs
self.append_indices = append_indices
self.max_distance = max_distance
@property
def max_distance(self):
return self._max_distance
@max_distance.setter
def max_distance(self, max_distance):
self._max_distance = float(max_distance)
@property
def cutoffs(self):
return self._cutoffs
@cutoffs.setter
def cutoffs(self, cutoffs):
if cutoffs is not None:
cutoffs = np.asarray(cutoffs, order = 'C', dtype = float)
if cutoffs.ndim != 1 or len(cutoffs) != 6:
raise ValueError((
"\n[ERROR]: cutoffs should be a one-dimensional array "
"with values [min_x, max_x, min_y, max_y, min_z, max_z]. "
f"Received {cutoffs}.\n"
))
self._cutoffs = cutoffs
else:
self._cutoffs = None
@property
def append_indices(self):
return self._append_indices
@append_indices.setter
def append_indices(self, append_indices):
self._append_indices = bool(append_indices)
[docs] def fit_sample(self, sample_lines):
if not isinstance(sample_lines, pept.LineData):
sample_lines = pept.LineData(sample_lines)
# If cutoffs were not defined, automatically compute them
if self.cutoffs is not None:
cutoffs = self.cutoffs
else:
cutoffs = get_cutoffs(sample_lines.lines)
# Only compute cutpoints if there are at least 2 LoRs
if len(sample_lines.lines) >= 2:
sample_cutpoints = pept.utilities.find_cutpoints(
sample_lines.lines,
self.max_distance,
cutoffs,
append_indices = self.append_indices,
)
else:
sample_cutpoints = np.empty((0, 6 if self.append_indices else 4))
# Column names
columns = ["t", "x", "y", "z"]
if self.append_indices:
columns += ["line_index1", "line_index2"]
# Encapsulate points in a PointData
points = pept.PointData(sample_cutpoints, columns = columns)
# Add optional metadata to the points; because they have an underscore,
# they won't be propagated when new objects are constructed
points.attrs["_max_distance"] = self.max_distance
points.attrs["_cutoffs"] = cutoffs
# If LoR indices were appended, also include the cutpoints' constituent
# lines
if self.append_indices:
points.attrs["_lines"] = sample_lines
return points