Source code for sphractal.boxCnt

from os import mkdir
from os.path import isdir
from warnings import warn

import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter
import numpy as np
from statsmodels.api import OLS, add_constant

from sphractal.constants import PLT_PARAMS
from sphractal.utils import findNN, findSurf, readInp
from sphractal.surfVoxel import voxelBoxCnts
from sphractal.surfExact import exactBoxCnts
# from sphractal.utils import estDuration, annotate


# @annotate('findSlope', color='green')
[docs]def findSlope(scales, counts, npName='', outDir='outputs', trimLen=True, minSample=6, confLvl=95, visReg=True, figType='paper', saveFig=False, showPlot=False, verbose=False): """ Compute the slope (box counting dimension) from the box-counting data collected. Parameters ---------- scales : list of floats Box lengths. counts : list of floats Number of boxes that cover the exact spherical surface of interest. npName : str, optional Identifier of the measured object, which forms part of the output file name, ideally unique. outDir : str, optional Path to the directory to store the output files. trimLen : bool, optional Whether to remove the box counts obtained using boxes of extreme sizes. minSample : int, optional Minimum number of box count data points to be retained for slope estimation from the linear regression fitting. confLvl : Union[int, float] Confidence level of confidence interval in percentage. visReg : bool, optional Whether to generate figures from the linear regression fitting process. figType : {'paper', 'poster', 'talk'} Type of figures to be generated. saveFig : bool, optional Whether to save the final figure generated, only works when 'visReg' is True. showPlot : bool, optional Whether to show the figures generated, only works when 'visReg' is True. verbose : bool, optional Whether to display the details. Returns ------- r2score : float Coefficient of determination from determination of the dimension of point clouds surface. boxCntDim : float Box-counting dimension of the point clouds representation of the surface. slopeCI : 1D ndarray of floats Confidence interval of the box-counting dimension of the point clouds surface. minMaxLens : tuple of floats Minimum and maximum box lengths used to determine slope. """ assert isinstance(minSample, int), 'minSample should be integer!' if visReg: plt.rc('font', family='sans-serif') plt.rc('xtick', labelsize='x-small') plt.rc('ytick', labelsize='x-small') params = PLT_PARAMS[figType] figSize, dpi, fontSize, labelSize = params['figSize'], params['dpi'], params['fontSize'], params['labelSize'] legendSize, lineWidth, markerSize = params['legendSize'], params['lineWidth'], params['markerSize'] else: figSize = dpi = fontSize = labelSize = legendSize = lineWidth = markerSize = None # Remove invalid entries in the box counts data collected while np.nan in counts: nanIdx = counts.index(np.nan) del counts[nanIdx] del scales[nanIdx] if abs(confLvl) > 100: warn(f"Confidence level out of range, confidence intervals are unreliable! 'confLvl' should be within [0, 100) " f"instead of {confLvl}") alphaCI = 1 - confLvl/100 firstPointIdx, lastPointIdx, removeSmallBoxes = 0, len(scales), True r2score, boxCntDim, slopeCI, minMaxLens = 0.0, 0.0, np.array((np.inf, np.inf)), (scales[0], scales[-1]) r2scorePrev, boxCntDimPrev, slopeCIPrev, minMaxLensPrev = 0.0, 0.0, np.array((np.inf, np.inf)), (scales[0], scales[-1]) while len(scales[firstPointIdx:lastPointIdx]) >= minSample: x, y = scales[firstPointIdx:lastPointIdx], counts[firstPointIdx:lastPointIdx] regModel = OLS(endog=y, exog=add_constant(x)).fit() r2score, boxCntDim, slopeCI = regModel.rsquared, regModel.params[1], regModel.conf_int(alpha=alphaCI)[1] minMaxLens = (x[0], x[-1]) yPred = regModel.predict() # Returns ndarray, allowing subtraction later if visReg: plt.close() fig = plt.figure(figsize=figSize, dpi=dpi) ax = fig.add_subplot(1, 1, 1) handleScatter = ax.scatter(x, y, marker='o', s=markerSize, c='r', alpha=1, edgecolors='k', linewidths=1.2, zorder=3) handleBestFit = ax.plot(x, yPred, linestyle='-', linewidth=1., color='k', label='OLS') ax.grid(linestyle='dotted') # Compute confidence bands predOLS = regModel.get_prediction() lowCIs, upCIs = predOLS.summary_frame()['mean_ci_lower'], predOLS.summary_frame()['mean_ci_upper'] handleConfBand = ax.plot(x, upCIs, linestyle='--', linewidth=lineWidth, color='b') ax.plot(x, lowCIs, linestyle='--', linewidth=lineWidth, color='b') ax.fill_between(x, upCIs, lowCIs, alpha=0.2) ax.set_xlabel(r'log$(1/\epsilon)$', fontsize=labelSize) ax.set_ylabel(r'log$(N)$', fontsize=labelSize) ax.yaxis.set_major_formatter(FormatStrFormatter('% 1.1f')) # ax.set_title('', fontsize=fontSize) ax.legend(handles=(handleScatter, handleBestFit[0], handleConfBand[0]), labels=('Actual box counts', fr"Best fit line ($R^2$: {r2score:.3f})", f"{confLvl}% confidence bands"), title=fr"$D_{{box}}$: {boxCntDim:.3f} [{slopeCI[0]:.3f}, {slopeCI[1]:.3f}]", title_fontsize=legendSize, fontsize=legendSize) plt.tight_layout() # Removal of next point (beware of weird behaviour in middle range) # lstSqErrs = np.subtract(y, yPred) ** 2 # if len(y) % 2 == 0: # lowBoundErrSum, upBoundErrSum = lstSqErrs[:len(y) // 2].sum(), lstSqErrs[len(y) // 2:].sum() # else: # lowBoundErrSum, upBoundErrSum = lstSqErrs[:len(y) // 2].sum(), lstSqErrs[len(y) // 2 + 1:].sum() # if lowBoundErrSum > upBoundErrSum: firstPointIdx += 1 # else: lastPointIdx -= 1 if saveFig: boxCntDimsDir = f"{outDir}/boxCntDims" if not isdir(boxCntDimsDir): if not isdir(outDir): mkdir(outDir) mkdir(boxCntDimsDir) plt.savefig(f"{boxCntDimsDir}/{npName}_boxCntDim.png", bbox_inches='tight') if showPlot: plt.show() if trimLen: if removeSmallBoxes: if round(r2score, 3) < round(r2scorePrev, 3): removeSmallBoxes = False lastPointIdx -= 1 else: if round(r2score, 3) < round(r2scorePrev, 3): if verbose: print(f" D_Box: {boxCntDimPrev:.4f} [{slopeCIPrev[0]:.4f}, {slopeCIPrev[1]:.4f}], R2: {r2scorePrev:.4f}, boxLens: ({minMaxLensPrev[0]:.4f}, {minMaxLensPrev[1]:.4f})\n") return r2scorePrev, boxCntDimPrev, slopeCIPrev, minMaxLensPrev firstPointIdx += 1 else: break r2scorePrev, boxCntDimPrev, slopeCIPrev, minMaxLensPrev = r2score, boxCntDim, slopeCI, minMaxLens if verbose: print(f" D_Box: {boxCntDim:.4f} [{slopeCI[0]:.4f}, {slopeCI[1]:.4f}], R2: {r2score:.4f}, boxLens: ({minMaxLens[0]:.4f}, {minMaxLens[1]:.4f})\n") return r2score, boxCntDim, slopeCI, minMaxLens
# @annotate('runCase', color='cyan') # @estDuration
[docs]def runBoxCnt(inpFilePath, radType='atomic', radMult=1.2, calcBL=False, findSurfAlg='alphaShape', alphaMult=2.0, bulkCN=12, outDir='outputs', trimLen=True, minSample=6, confLvl=95, rmInSurf=True, vis=True, figType='paper', saveFig=False, showPlot=False, verbose=False, voxelSurf=True, numPoints=10000, gridNum=1024, exePath='$FASTBC', genPCD=False, exactSurf=True, minLenMult=0.25, maxLenMult=1, numCPUs=8, numBoxLen=10, bufferDist=5.0, writeBox=True): """ Run box-counting algorithm on the surface of a given atomistic object consisting of a set of spheres represented as either a voxelised point cloud or mathematically precise object. Parameters ---------- inpFilePath : str Path to xyz file containing Cartesian coordinates of a set of atoms. radType : {'atomic', 'metallic'}, optional Type of radii to use for the atoms. radMult : Union[int, float], optional Multiplier to the radii of atoms to identify their neighbouring atoms. calcBL : bool, optional Whether to compute the average distance from its neighbours for each atom. findSurfAlg : {'alphaShape', 'convexHull', 'numNeigh'}, optional Algorithm to identify the surface atoms. alphaMult : Union[int, float], optional Multiplier to the minimum radius to decide 'alpha' value for the alpha shape algorithm, only used if 'findSurfAlg' is 'alphaShape'. Recommendation: 2.0 * 100% ATOMIC_RAD == 5/3 * 120% ATOMIC_RAD ~= 100% METALLIC_RAD * 2.5 bulkCN : int, optional Minimum number of neighbouring atoms for non-surface atoms. outDir : str, optional Path to directory to store the output files. trimLen : bool, optional Whether to remove the box counts obtained using boxes of extreme sizes. minSample : int, optional Minimum number of data points to retain for slope estimation from the linear regression fitting. confLvl : Union[int, float], optional Confidence level of confidence intervals (%). rmInSurf : bool, optional Whether to remove the inner surface points. vis : bool, optional Whether to generate files for visualisation. figType : {'paper', 'poster', 'talk', 'notebook'} Research purpose of the figures generated. saveFig : bool, optional Whether to save the figures generated from linear regression fitting, only used if 'vis' is True. showPlot : bool, optional Whether to show the figures generated from linear regression fitting, only used if 'vis' is True. verbose : bool, optional Whether to display the details. voxelSurf : bool, optional Whether to represent the surface as voxelised point clouds. numPoints : int, optional Number of surface points to generate around each atom. gridNum : int, optional Resolution of the 3D binary image. exePath : str, optional Path to the compiled executable of the C++ code for box-counting on 3D binary image written by Ruiz de Miras et al. genPCD : bool, optional Whether to generate point cloud data (pcd) file. exactSurf : bool, optional Whether to represent the surface in a mathematically exact manner. minLenMult : float, optional Multiplier to the minimum radius to determine the minimum box length. maxLenMult : float, optional Multiplier to the maximum radius to determine the maximum box length. numCPUs : int, optional Number of cores to be used for parallelisation. numBoxLen : int, optional Number of box length samples for the collection of the box count data, spaced evenly on logarithmic scale. bufferDist : Union[int,float] Buffer distance from the borders of the largest box (Angstrom). writeBox : bool, optional Whether to generate output file containing coordinates of examined boxes. Returns ------- r2VX : float Coefficient of determination from linear regression fitting to the box-counting data obtained from the voxelised point cloud surface representation. bcDimVX : float Box-counting dimension of the voxelised point cloud representation of the surface. confIntVX : 1D ndarray of floats Confidence interval of the slope estimated for the voxelised point cloud surface representation. minMaxLensVX : tuple of floats Minimum and maximum box lengths for the final slope determination for the voxelised point cloud surface representation. r2EX : float Coefficient of determination from linear regression fitting to the box-counting data obtained from the mathematically exact surface representation. bcDimEX : float Box-counting dimension of the mathematically exact representation of the surface. confIntEX : 1D ndarray of floats Confidence interval of the slope estimated for the mathematically exact surface representation. minMaxLensEX : tuple of floats Minimum and maximum box lengths for final slope determination for the mathematically exact surface representation. Examples -------- >>> r2VX, bcDimVX, confIntVX, minMaxLensVX, r2EX, bcDimEX, confIntEX, minMaxLensEX = runBoxCnt('example.xyz') """ atomsEle, atomsRad, atomsXYZ, maxRange, minXYZ, maxXYZ = readInp(inpFilePath, radType) atomsNeighIdxs, atomsAvgBondLen = findNN(atomsRad, atomsXYZ, minXYZ, maxXYZ, atomsRad.max(), radMult, calcBL) atomsSurfIdxs = findSurf(atomsXYZ, atomsNeighIdxs, findSurfAlg, alphaMult * atomsRad.min(), bulkCN) testCase = inpFilePath.split('/')[-1][:-4] if verbose: print(f"\n{testCase}") r2VX, bcDimVX, confIntVX, minMaxLensVX = np.nan, np.nan, (np.nan, np.nan), (np.nan, np.nan) r2EX, bcDimEX, confIntEX, minMaxLensEX = np.nan, np.nan, (np.nan, np.nan), (np.nan, np.nan) if not isdir(outDir): mkdir(outDir) if voxelSurf: scalesVX, countsVX = voxelBoxCnts(atomsEle, atomsRad, atomsSurfIdxs, atomsXYZ, atomsNeighIdxs, testCase, outDir, numCPUs, exePath, radType, numPoints, gridNum, rmInSurf, vis, verbose, genPCD) r2VX, bcDimVX, confIntVX, minMaxLensVX = findSlope(scalesVX, countsVX, f"{testCase}_VX", outDir, trimLen, minSample, confLvl, vis, figType, saveFig, showPlot, verbose) if exactSurf: minAtomRad = atomsRad.min() scalesEX, countsEX = exactBoxCnts(atomsEle, atomsRad, atomsSurfIdxs, atomsXYZ, atomsNeighIdxs, maxRange, (minAtomRad * minLenMult, minAtomRad * maxLenMult), minXYZ, testCase, outDir, numCPUs, numBoxLen, bufferDist, rmInSurf, writeBox, verbose) r2EX, bcDimEX, confIntEX, minMaxLensEX = findSlope(scalesEX, countsEX, f"{testCase}_EX", outDir, trimLen, minSample, confLvl, vis, figType, saveFig, showPlot, verbose) return r2VX, bcDimVX, confIntVX, minMaxLensVX, r2EX, bcDimEX, confIntEX, minMaxLensEX