Source code for porepy.utils.setmembership

"""
Various functions with set operations.
"""
from __future__ import annotations

from typing import Any, Tuple

import numba
import numpy as np
from scipy.spatial import KDTree


[docs] def unique_rows( data: np.ndarray[Any, np.dtype[np.float64]] ) -> Tuple[ np.ndarray[Any, np.dtype[np.float64]], np.ndarray[Any, np.dtype[np.int64]], np.ndarray[Any, np.dtype[np.int64]], ]: """ Function similar to Matlab's unique(...,'rows') See also function unique_columns in this module; this is likely slower, but is understandable, documented, and has a tolerance option. Copied from http://stackoverflow.com/questions/16970982/find-unique-rows-in-numpy-array/ (summary pretty far down on the page) Note: I have no idea what happens here """ b = np.ascontiguousarray(data).view( np.dtype((np.void, data.dtype.itemsize * data.shape[1])) ) _, ia = np.unique(b, return_index=True) _, ic = np.unique(b, return_inverse=True) # return np.unique(b).view(data.dtype).reshape(-1, data.shape[1]), ia, ic return data[ia], ia, ic
[docs] def ismember_rows( a: np.ndarray[Any, np.dtype[np.int64]], b: np.ndarray[Any, np.dtype[np.int64]], sort: float = True, ) -> Tuple[np.ndarray[Any, np.dtype[np.bool_]], np.ndarray[Any, np.dtype[np.int64]]]: """ Find *columns* of a that are also members of *columns* of b. The function mimics Matlab's function ismember(..., 'rows'). TODO: Rename function, this is confusing! Parameters: a (np.array): Each column in a will search for an equal in b. b (np.array): Array in which we will look for a twin sort (boolean, optional): If true, the arrays will be sorted before seraching, increasing the chances for a match. Defaults to True. Returns: np.array (boolean): For each column in a, true if there is a corresponding column in b. np.array (int): Indexes so that b[:, ind] is also found in a. Examples: >>> a = np.array([[1, 3, 3, 1, 7], [3, 3, 2, 3, 0]]) >>> b = np.array([[3, 1, 3, 5, 3], [3, 3, 2, 1, 2]]) >>> ismember_rows(a, b) (array([ True, True, True, True, False], dtype=bool), [1, 0, 2, 1]) >>> a = np.array([[1, 3, 3, 1, 7], [3, 3, 2, 3, 0]]) >>> b = np.array([[3, 1, 2, 5, 1], [3, 3, 3, 1, 2]]) >>> ismember_rows(a, b, sort=False) (array([ True, True, False, True, False], dtype=bool), [1, 0, 1]) """ # IMPLEMENTATION NOTE: A serious attempt was made (June 2022) to speed up # this calculation by using functionality from scipy.spatial. This did not # work: The Scipy functions scaled poorly with the size of arrays a and b, # and lead to memory overflow for large arrays. # Sort if required, but not if the input is 1d if sort and a.ndim > 1: sa = np.sort(a, axis=0) sb = np.sort(b, axis=0) else: sa = a sb = b b = np.atleast_1d(b) a = np.atleast_1d(a) num_a = a.shape[-1] # stack the arrays c = np.hstack((sa, sb)) # Uniquify c. We don't care about the unique array itself, but rather # the indices which maps the unique array back to the original c if a.ndim > 1: _, ind = np.unique(c, axis=1, return_inverse=True) else: _, ind = np.unique(c, return_inverse=True) # Indices in a and b referring to the unique array. # Elements in ind_a that are also in ind_b will correspond to rows # in a that are also found in b ind_a = ind[:num_a] ind_b = ind[num_a:] # Find common members ismem_a = np.isin(ind_a, ind_b) # Found this trick on # https://stackoverflow.com/questions/8251541/numpy-for-every-element-in-one-array-find-the-index-in-another-array # See answer by Joe Kington sort_ind = np.argsort(ind_b) ypos = np.searchsorted(ind_b[sort_ind], ind_a[ismem_a]) ia = sort_ind[ypos] # Done return ismem_a, ia
[docs] def unique_columns_tol( mat: np.ndarray, tol: float = 1e-8, ) -> Tuple[ np.ndarray, np.ndarray[Any, np.dtype[np.int64]], np.ndarray[Any, np.dtype[np.int64]], ]: """ For an array, remove columns that are closer than a given tolerance. To uniquify a point set, consider using the function uniquify_point_set instead. Resembles Matlab's uniquetol function, as applied to columns. To rather work on rows, use a transpose. Parameters: mat (np.ndarray, nd x n_pts): Columns to be uniquified. tol (double, optional): Tolerance for when columns are considered equal. Should be seen in connection with distance between the points in the points (due to rounding errors). Defaults to 1e-8. Returns: np.ndarray: Unique columns. new_2_old: Index of which points that are preserved old_2_new: Index of the representation of old points in the reduced list. Example: >>> p_un, n2o, o2n = unique_columns(np.array([[1, 0, 1], [1, 0, 1]])) >>> p_un array([[1, 0], [1, 0]]) >>> n2o array([0, 1]) >>> o2n array([0, 1, 0]) """ # Treat 1d array as 2d mat = np.atleast_2d(mat) # Some special cases if mat.shape[1] == 0: # Empty arrays gets empty return return mat, np.array([], dtype=int), np.array([], dtype=int) elif mat.shape[1] == 1: # Array with a single column needs no processing return mat, np.array([0], dtype=int), np.array([0], dtype=int) # If the matrix is integers, and the tolerance less than 1/2, we can use # numpy's unique function if issubclass(mat.dtype.type, np.int_) and tol < 0.5: un_ar, new_2_old, old_2_new = np.unique( mat, return_index=True, return_inverse=True, axis=1 ) return un_ar, new_2_old, old_2_new @numba.jit("Tuple((b1[:],i8[:],i8[:]))(f8[:, :],f8)", nopython=True, cache=True) def _numba_distance(mat, tol): """Helper function for numba acceleration of unique_columns_tol. IMPLEMENTATION NOTE: Calling this function many times (it is unclear what this really means, but likely >=100s of thousands of times) may lead to enhanced memory consumption and significant reductions in performance. This could be related to this GH issue https://github.com/numba/numba/issues/1361 However, it is not clear this is really the error. No solution is known at the time of writing, the only viable options seem to be algorithmic modifications that reduce the number of calls to this function. """ num_cols = mat.shape[0] keep = np.zeros(num_cols, dtype=numba.types.bool_) keep[0] = True keep_counter = 1 # Map from old points to the unique subspace. Defaults to map to itself. old_2_new = np.arange(num_cols) # Loop over all points, check if it is already represented in the kept list for i in range(1, num_cols): d = np.sum((mat[i] - mat[keep]) ** 2, axis=1) condition = d < tol**2 if np.any(condition): # We will not keep this point old_2_new[i] = np.argmin(d) else: # We have found a new point keep[i] = True old_2_new[i] = keep_counter keep_counter += 1 # Finally find which elements we kept new_2_old = np.nonzero(keep)[0] return keep, new_2_old, old_2_new mat_t = np.atleast_2d(mat.T).astype(float) # IMPLEMENTATION NOTE: It could pay off to make a pure Python implementation # to be used for small arrays, however, attempts on making this work in # practice failed. keep, new_2_old, old_2_new = _numba_distance(mat_t, tol) return mat[:, keep], new_2_old, old_2_new
[docs] def uniquify_point_set( points: np.ndarray[Any, np.dtype[np.float64]], tol: float = 1e-8 ) -> Tuple[ np.ndarray[Any, np.dtype[np.float64]], np.ndarray[Any, np.dtype[np.int64]], np.ndarray[Any, np.dtype[np.int64]], ]: """Uniquify a set of points so that no two sets of points are closer than a distance tol from each other. This function partially overlaps the function unique_columns_tol, but the latter is more general, as it provides fast treatment of integer arrays. FIXME: It should be possible to unify the two implementations, however, more experience is needed before doing so. Parameters: points (np.ndarray, nd x n_pts): Columns to be uniquified. tol (double, optional): Tolerance for when columns are considered equal. Should be seen in connection with distance between the points in the point set (due to rounding errors). Defaults to 1e-8. Returns: np.ndarray: Unique columns. new_2_old: Index of which points that are preserved old_2_new: Index of the representation of old points in the reduced list. """ # The implementation uses Scipy's KDTree implementation to efficiently get # the distance between points. num_p = points.shape[1] # Transpose needed to comply with KDTree. tree = KDTree(points.T) # Get all pairs of points closer than the tolerance. pairs = tree.query_pairs(tol, output_type="ndarray") if pairs.size == 0: # No points were found, we can return return points, np.arange(num_p), np.arange(num_p) # Process information to arrive at a unique point set. This is technical, # since we need to deal with cases where more than two points coincide. # As an example: if the points p1, p2 and p3 coincide, they will be # identified either by the pairs {(i1, i2), (i1, i3)}, by # {(i1, i2), (i2, i3)}, or by {(i1, i3), (i2, i3)}). To be clear, # more than three points can coincide - such configurations will # include more point combinations, but will not introduce additional # complications. # Sort the index pairs of identical points for simpler identification. # NOTE: pairs, as returned by KDTree, is a num_pairs x 2 array, thus # sorting the pairs should be along axis 1. pair_arr = np.sort(pairs, axis=1) # Sort the pairs along axis=1. The lexsort will make the sorting first # according to pair_arr[:, 0] (the point with the lowest index in each # pair), and then according to the second index (pair_arr[:, 1]). The # result will be a lexicographically ordered array. # Also note the transport back to a 2 x num_pairs array. sorted_arr = pair_arr[np.lexsort((pair_arr[:, 1], pair_arr[:, 0]))].T # Find points that are both in the first and second row. Referring to the # example with three intersecting points, this will identify triplets # expressed as pairs {(i1, i2), (i2, i3)}. duplicate = np.isin(sorted_arr[0], sorted_arr[1]) # Array with duplicates of the type {(i1, i2), (i1, i3)} removed. reduced_arr = sorted_arr[:, np.logical_not(duplicate)] # Also identify points that are not involved in any pairs, these should be # included in the unique set. Append these to the point array. not_in_pairs = np.setdiff1d(np.arange(points.shape[1]), pair_arr.ravel()) reduced_arr = np.hstack((reduced_arr, np.tile(not_in_pairs, (2, 1)))) # The array can still contain pairs of type {(i1, i2), (i1, i3)} and # {(i1, i3), (i1, i3)}, again referring to the example with three identical # points. # These can be identified by a unique on the first row. ia = np.unique(reduced_arr[0]) # Make a mapping from all points to the reduced set. ib = np.arange(num_p) _, inv_map = np.unique(reduced_arr[0], return_inverse=True) ib[reduced_arr[0]] = inv_map ib[reduced_arr[1]] = ib[reduced_arr[0]] # Uniquify points. upoints = points[:, ia] # Done. return upoints, ia, ib