Source code for topo_metrics.symbols

from __future__ import annotations

from typing import NamedTuple, Sequence

import numpy as np
import numpy.typing as npt

from topo_metrics.utils import uniform_repr


[docs] class VertexSymbol(NamedTuple): """ Vertex Symbol (VS) representation. Attributes ---------- vector The Vertex Symbol vector. vector_all_rings The Vertex Symbol vector considering all rings. """
[docs] vector: list[list[int]]
[docs] vector_all_rings: list[list[int]]
def __repr__(self) -> str: info = {} info["VS"] = self.to_str() info["VS(all_rings)"] = self.to_str(all_rings=True) return uniform_repr("VertexSymbol", **info, indent_size=4)
[docs] def to_str(self, all_rings: bool = False) -> str: """ Returns the string representation of the VertexSymbol. If `all_rings` is True, ring counts are grouped and formatted with multiplicity. Otherwise, only the smallest ring sizes are shown, with multiplicity for repeated values. Parameters ---------- all_rings If True, ring counts are grouped and formatted with multiplicity. Otherwise, only the smallest ring sizes are shown, with multiplicity for repeated values. Returns ------- A string representation of the VertexSymbol. """ vector = self.vector_all_rings if all_rings else self.vector formatted_elements = [] for rings in vector: ring_counts = {size: rings.count(size) for size in set(rings)} # Format elements with multiplicity if needed element = ",".join( f"{size}({count})" if count > 1 else f"{size}" for size, count in sorted(ring_counts.items()) ) # Use parentheses only if there are multiple distinct ring sizes if len(set(rings)) > 1: formatted_elements.append(f"({element})") else: formatted_elements.append(element) return f"[{'.'.join(formatted_elements)}]"
[docs] class CARVS(NamedTuple): """ Cummulative All-Rings Vertex Symbol (CARVS) vector. Attributes ---------- vector The CARVS vector. spread The standard deviation of the CARVS vectors in the network. is_single_node True if the CARVS vector is for a single-node network, False otherwise. """
[docs] vector: npt.NDArray[np.floating]
[docs] spread: float
[docs] is_single_node: bool
@classmethod
[docs] def from_list(cls, carvs_list: Sequence[CARVS]) -> CARVS: """ Construct a CARVS object from a list of CARVS objects, averaging the vectors and spreads. Parameters ---------- carvs_list One or more CARVS objects to be averaged. Returns ------- A new CARVS object whose vector is the average of all input vectors, whose spread is the average of all input spreads, and whose 'is_single_node' is True only if it is True for every entry in `carvs_list`. Raises ------ ValueError If `carvs_list` is empty or if the vectors in `carvs_list` do not all have the same length. """ if not carvs_list: raise ValueError("Cannot create a CARVS from an empty list.") # 1. pad the vectors to the same length. padded_carvs = pad_carvs(carvs_list) # 2. average the vectors. padded_vectors_array = np.array([c.vector for c in padded_carvs]) avg_vector = padded_vectors_array.mean(axis=0) # 3. average the spreads avg_spread = float(np.mean([c.spread for c in carvs_list])) # 4. decide how to set is_single_node all_single_node = all(c.is_single_node for c in carvs_list) return cls( vector=avg_vector, spread=avg_spread, is_single_node=all_single_node, )
def __str__(self) -> str: """Generate a formatted string representation of the object.""" lbracket, rbracket = "{", "}" elements = [] for size, count in enumerate(self.vector, 1): if count < 1.0: continue if count == 1.0: elements.append(f"{size}.") else: formatted_count = ( f"{int(round(count))}" if abs(count - round(count)) < 1e-5 else f"{count:.1f}" ) elements.append(f"{size}({formatted_count}).") symbol = lbracket + "".join(elements).rstrip(".") + rbracket if not np.isclose(self.spread, 0.0): symbol += f" σ={self.spread:.1f}" return symbol def __repr__(self) -> str: """Generate a string representation of the object.""" return f"CARVS( {str(self)} )"
############################### HELPERS ###############################
[docs] def pad_carvs(carvs_list: Sequence[CARVS]) -> Sequence[CARVS]: """ Pad the vectors of a list of CARVS objects to the same length. Parameters ---------- carvs_list A list of CARVS objects. Returns ------- A list of CARVS objects with the vectors padded to the same length. """ max_length = max(len(carv.vector) for carv in carvs_list) padded_vectors = [] for carvs in carvs_list: padded = np.zeros(max_length, dtype=float) padded[: len(carvs.vector)] = carvs.vector padded_vectors.append(padded) new_carvs = [] for orig_carvs, padded_vector in zip(carvs_list, padded_vectors): new_carvs.append( CARVS( vector=padded_vector, spread=orig_carvs.spread, is_single_node=orig_carvs.is_single_node, ) ) return new_carvs
[docs] def pad_carvs_per_atom( all_carvs: list[npt.NDArray[np.int_]], ) -> list[npt.NDArray[np.int_]]: """ Pad the CARVs per atom to the same length. Parameters ---------- all_carvs List of CARVs per atom. Returns ------- List of padded CARVs per atom. """ max_length = max(c.shape[1] for c in all_carvs) padded_carvs = [ np.pad(c, ((0, 0), (0, max_length - c.shape[1])), mode="constant") for c in all_carvs ] return padded_carvs
############################### ANALYSIS ###############################
[docs] def get_all_topological_distances(carvs: list[CARVS]) -> np.ndarray: """ Compute the topological distances between all pairs of CARVS objects. Parameters ---------- carvs A list of CARVS objects. Returns ------- A square matrix of shape (n_points, n_points) containing the Euclidean distances between all pairs of points. """ if not isinstance(carvs, list): raise TypeError("'carvs' must be a list of CARVS objects") if not all(isinstance(carv, CARVS) for carv in carvs): raise TypeError("All elements of 'carvs' must be CARVS objects") # 1. gather the vectors of all carvs objects. carvs_vectors = np.array([carvs.vector for carvs in pad_carvs(carvs)]) # 2. divide by sum of values. carvs_vectors /= carvs_vectors.sum(axis=1)[:, np.newaxis] # 3. compute distance between all pairs of vectors as: # # d(\alpha, \beta) = \frac{1}{\sqrt{2}} | r(\alpha) - r(\beta) | # distances = np.sqrt( np.maximum( np.einsum("ij,ij->i", carvs_vectors, carvs_vectors)[:, None] + np.einsum("ij,ij->i", carvs_vectors, carvs_vectors)[None, :] - 2 * np.einsum("ik,jk->ij", carvs_vectors, carvs_vectors), 0, ) ) return distances / np.sqrt(2)