Source code for corrdim.dimension

from __future__ import annotations

from typing import Optional, Tuple

import numpy as np
import sklearn.linear_model

from .types import CurveResult, DimensionResult
from .utils import clamp


[docs] def auto_linear_region_bounds(sequence_length: int, epsilons: np.ndarray, corrints: np.ndarray) -> Tuple[float, float]: n = sequence_length for low, high in [ (20.0 / n / (n - 1), 1.0 / n), (20.0 / n / (n - 1), 1.0 / n * 10), (20.0 / n / (n - 1), 1.0 / n * 100), (0.0, 1.0), ]: eps_filtered, _corr_filtered = clamp(epsilons, corrints, low=low, high=high) if len(eps_filtered) >= 2: return low, high raise ValueError("Not enough points in the selected correlation-integral range.")
[docs] def estimate_dimension_from_curve( curve: CurveResult, correlation_integral_range: Optional[Tuple[float, float]] = None, epsilon_range: Optional[Tuple[float, float]] = None, ) -> DimensionResult: epsilons = curve.epsilons.copy() corrints = curve.corrints.copy() if correlation_integral_range is None and epsilon_range is None: try: low, high = auto_linear_region_bounds(curve.sequence_length, epsilons, corrints) except ValueError: low, high = 0.0, 1.0 eps_linear, corr_linear = clamp(epsilons, corrints, low=low, high=high) elif correlation_integral_range is not None: low, high = correlation_integral_range eps_linear, corr_linear = clamp(epsilons, corrints, low=low, high=high) else: low, high = None, None eps_linear, corr_linear = epsilons, corrints if epsilon_range is not None: corr_linear, eps_linear = clamp(corr_linear, eps_linear, low=epsilon_range[0], high=epsilon_range[1]) valid = np.isfinite(corr_linear) & (corr_linear > 0) if valid.sum() < 2: # Fallback to the full curve if selected range is too narrow (common on short/noisy sequences). eps_linear = epsilons corr_linear = corrints valid = np.isfinite(corr_linear) & (corr_linear > 0) if valid.sum() < 2: raise ValueError("Not enough points with positive finite correlation integral after range selection.") eps_fit = eps_linear[valid] corr_fit = corr_linear[valid] log_eps = np.log10(eps_fit).reshape(-1, 1) log_corr = np.log10(corr_fit) fit = sklearn.linear_model.LinearRegression().fit(log_eps, log_corr) fit_r2 = fit.score(log_eps, log_corr) return DimensionResult( sequence_length=curve.sequence_length, epsilons=curve.epsilons, corrints=curve.corrints, corrdim=float(fit.coef_[0]), fit_r2=float(fit_r2), epsilons_linear_region=eps_fit, corrints_linear_region=corr_fit, linear_region_bounds=(low, high), )
[docs] def estimate_dimension_from_curves( curves: list[CurveResult], correlation_integral_range: Optional[Tuple[float, float]] = None, epsilon_range: Optional[Tuple[float, float]] = None, ) -> list[DimensionResult]: return [ estimate_dimension_from_curve( curve, correlation_integral_range=correlation_integral_range, epsilon_range=epsilon_range, ) for curve in curves ]