from os import mkdir
from os.path import isdir
from warnings import warn
import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter
import numpy as np
from statsmodels.api import OLS, add_constant
from sphractal.constants import PLT_PARAMS
from sphractal.utils import findNN, findSurf, readInp
from sphractal.surfVoxel import voxelBoxCnts
from sphractal.surfExact import exactBoxCnts
# from sphractal.utils import estDuration, annotate
# @annotate('findSlope', color='green')
[docs]def findSlope(scales, counts, npName='', outDir='outputs', trimLen=True,
minSample=6, confLvl=95,
visReg=True, figType='paper', saveFig=False, showPlot=False, verbose=False):
"""
Compute the slope (box counting dimension) from the box-counting data collected.
Parameters
----------
scales : list of floats
Box lengths.
counts : list of floats
Number of boxes that cover the exact spherical surface of interest.
npName : str, optional
Identifier of the measured object, which forms part of the output file name, ideally unique.
outDir : str, optional
Path to the directory to store the output files.
trimLen : bool, optional
Whether to remove the box counts obtained using boxes of extreme sizes.
minSample : int, optional
Minimum number of box count data points to be retained for slope estimation from the linear regression fitting.
confLvl : Union[int, float]
Confidence level of confidence interval in percentage.
visReg : bool, optional
Whether to generate figures from the linear regression fitting process.
figType : {'paper', 'poster', 'talk'}
Type of figures to be generated.
saveFig : bool, optional
Whether to save the final figure generated, only works when 'visReg' is True.
showPlot : bool, optional
Whether to show the figures generated, only works when 'visReg' is True.
verbose : bool, optional
Whether to display the details.
Returns
-------
r2score : float
Coefficient of determination from determination of the dimension of point clouds surface.
boxCntDim : float
Box-counting dimension of the point clouds representation of the surface.
slopeCI : 1D ndarray of floats
Confidence interval of the box-counting dimension of the point clouds surface.
minMaxLens : tuple of floats
Minimum and maximum box lengths used to determine slope.
"""
assert isinstance(minSample, int), 'minSample should be integer!'
if visReg:
plt.rc('font', family='sans-serif')
plt.rc('xtick', labelsize='x-small')
plt.rc('ytick', labelsize='x-small')
params = PLT_PARAMS[figType]
figSize, dpi, fontSize, labelSize = params['figSize'], params['dpi'], params['fontSize'], params['labelSize']
legendSize, lineWidth, markerSize = params['legendSize'], params['lineWidth'], params['markerSize']
else:
figSize = dpi = fontSize = labelSize = legendSize = lineWidth = markerSize = None
# Remove invalid entries in the box counts data collected
while np.nan in counts:
nanIdx = counts.index(np.nan)
del counts[nanIdx]
del scales[nanIdx]
if abs(confLvl) > 100:
warn(f"Confidence level out of range, confidence intervals are unreliable! 'confLvl' should be within [0, 100) "
f"instead of {confLvl}")
alphaCI = 1 - confLvl/100
firstPointIdx, lastPointIdx, removeSmallBoxes = 0, len(scales), True
r2score, boxCntDim, slopeCI, minMaxLens = 0.0, 0.0, np.array((np.inf, np.inf)), (scales[0], scales[-1])
r2scorePrev, boxCntDimPrev, slopeCIPrev, minMaxLensPrev = 0.0, 0.0, np.array((np.inf, np.inf)), (scales[0], scales[-1])
while len(scales[firstPointIdx:lastPointIdx]) >= minSample:
x, y = scales[firstPointIdx:lastPointIdx], counts[firstPointIdx:lastPointIdx]
regModel = OLS(endog=y, exog=add_constant(x)).fit()
r2score, boxCntDim, slopeCI = regModel.rsquared, regModel.params[1], regModel.conf_int(alpha=alphaCI)[1]
minMaxLens = (x[0], x[-1])
yPred = regModel.predict() # Returns ndarray, allowing subtraction later
if visReg:
plt.close()
fig = plt.figure(figsize=figSize, dpi=dpi)
ax = fig.add_subplot(1, 1, 1)
handleScatter = ax.scatter(x, y, marker='o', s=markerSize, c='r', alpha=1, edgecolors='k', linewidths=1.2,
zorder=3)
handleBestFit = ax.plot(x, yPred, linestyle='-', linewidth=1., color='k', label='OLS')
ax.grid(linestyle='dotted')
# Compute confidence bands
predOLS = regModel.get_prediction()
lowCIs, upCIs = predOLS.summary_frame()['mean_ci_lower'], predOLS.summary_frame()['mean_ci_upper']
handleConfBand = ax.plot(x, upCIs, linestyle='--', linewidth=lineWidth, color='b')
ax.plot(x, lowCIs, linestyle='--', linewidth=lineWidth, color='b')
ax.fill_between(x, upCIs, lowCIs, alpha=0.2)
ax.set_xlabel(r'log$(1/\epsilon)$', fontsize=labelSize)
ax.set_ylabel(r'log$(N)$', fontsize=labelSize)
ax.yaxis.set_major_formatter(FormatStrFormatter('% 1.1f'))
# ax.set_title('', fontsize=fontSize)
ax.legend(handles=(handleScatter, handleBestFit[0], handleConfBand[0]),
labels=('Actual box counts', fr"Best fit line ($R^2$: {r2score:.3f})",
f"{confLvl}% confidence bands"),
title=fr"$D_{{box}}$: {boxCntDim:.3f} [{slopeCI[0]:.3f}, {slopeCI[1]:.3f}]",
title_fontsize=legendSize,
fontsize=legendSize)
plt.tight_layout()
# Removal of next point (beware of weird behaviour in middle range)
# lstSqErrs = np.subtract(y, yPred) ** 2
# if len(y) % 2 == 0:
# lowBoundErrSum, upBoundErrSum = lstSqErrs[:len(y) // 2].sum(), lstSqErrs[len(y) // 2:].sum()
# else:
# lowBoundErrSum, upBoundErrSum = lstSqErrs[:len(y) // 2].sum(), lstSqErrs[len(y) // 2 + 1:].sum()
# if lowBoundErrSum > upBoundErrSum: firstPointIdx += 1
# else: lastPointIdx -= 1
if saveFig:
boxCntDimsDir = f"{outDir}/boxCntDims"
if not isdir(boxCntDimsDir):
if not isdir(outDir):
mkdir(outDir)
mkdir(boxCntDimsDir)
plt.savefig(f"{boxCntDimsDir}/{npName}_boxCntDim.png", bbox_inches='tight')
if showPlot:
plt.show()
if trimLen:
if removeSmallBoxes:
if round(r2score, 3) < round(r2scorePrev, 3):
removeSmallBoxes = False
lastPointIdx -= 1
else:
if round(r2score, 3) < round(r2scorePrev, 3):
if verbose:
print(f" D_Box: {boxCntDimPrev:.4f} [{slopeCIPrev[0]:.4f}, {slopeCIPrev[1]:.4f}], R2: {r2scorePrev:.4f}, boxLens: ({minMaxLensPrev[0]:.4f}, {minMaxLensPrev[1]:.4f})\n")
return r2scorePrev, boxCntDimPrev, slopeCIPrev, minMaxLensPrev
firstPointIdx += 1
else:
break
r2scorePrev, boxCntDimPrev, slopeCIPrev, minMaxLensPrev = r2score, boxCntDim, slopeCI, minMaxLens
if verbose:
print(f" D_Box: {boxCntDim:.4f} [{slopeCI[0]:.4f}, {slopeCI[1]:.4f}], R2: {r2score:.4f}, boxLens: ({minMaxLens[0]:.4f}, {minMaxLens[1]:.4f})\n")
return r2score, boxCntDim, slopeCI, minMaxLens
# @annotate('runCase', color='cyan')
# @estDuration
[docs]def runBoxCnt(inpFilePath,
radType='atomic', radMult=1.2, calcBL=False, findSurfAlg='alphaShape', alphaMult=2.0, bulkCN=12,
outDir='outputs', trimLen=True, minSample=6, confLvl=95,
rmInSurf=True, vis=True, figType='paper', saveFig=False, showPlot=False, verbose=False,
voxelSurf=True, numPoints=10000, gridNum=1024, exePath='$FASTBC', genPCD=False,
exactSurf=True, minLenMult=0.25, maxLenMult=1, numCPUs=8, numBoxLen=10, bufferDist=5.0, writeBox=True):
"""
Run box-counting algorithm on the surface of a given atomistic object consisting of a set of spheres represented as either a voxelised point cloud or mathematically precise object.
Parameters
----------
inpFilePath : str
Path to xyz file containing Cartesian coordinates of a set of atoms.
radType : {'atomic', 'metallic'}, optional
Type of radii to use for the atoms.
radMult : Union[int, float], optional
Multiplier to the radii of atoms to identify their neighbouring atoms.
calcBL : bool, optional
Whether to compute the average distance from its neighbours for each atom.
findSurfAlg : {'alphaShape', 'convexHull', 'numNeigh'}, optional
Algorithm to identify the surface atoms.
alphaMult : Union[int, float], optional
Multiplier to the minimum radius to decide 'alpha' value for the alpha shape algorithm, only used if
'findSurfAlg' is 'alphaShape'. Recommendation:
2.0 * 100% ATOMIC_RAD == 5/3 * 120% ATOMIC_RAD ~= 100% METALLIC_RAD * 2.5
bulkCN : int, optional
Minimum number of neighbouring atoms for non-surface atoms.
outDir : str, optional
Path to directory to store the output files.
trimLen : bool, optional
Whether to remove the box counts obtained using boxes of extreme sizes.
minSample : int, optional
Minimum number of data points to retain for slope estimation from the linear regression fitting.
confLvl : Union[int, float], optional
Confidence level of confidence intervals (%).
rmInSurf : bool, optional
Whether to remove the inner surface points.
vis : bool, optional
Whether to generate files for visualisation.
figType : {'paper', 'poster', 'talk', 'notebook'}
Research purpose of the figures generated.
saveFig : bool, optional
Whether to save the figures generated from linear regression fitting, only used if 'vis' is True.
showPlot : bool, optional
Whether to show the figures generated from linear regression fitting, only used if 'vis' is True.
verbose : bool, optional
Whether to display the details.
voxelSurf : bool, optional
Whether to represent the surface as voxelised point clouds.
numPoints : int, optional
Number of surface points to generate around each atom.
gridNum : int, optional
Resolution of the 3D binary image.
exePath : str, optional
Path to the compiled executable of the C++ code for box-counting on 3D binary image written by Ruiz de Miras et al.
genPCD : bool, optional
Whether to generate point cloud data (pcd) file.
exactSurf : bool, optional
Whether to represent the surface in a mathematically exact manner.
minLenMult : float, optional
Multiplier to the minimum radius to determine the minimum box length.
maxLenMult : float, optional
Multiplier to the maximum radius to determine the maximum box length.
numCPUs : int, optional
Number of cores to be used for parallelisation.
numBoxLen : int, optional
Number of box length samples for the collection of the box count data, spaced evenly on logarithmic scale.
bufferDist : Union[int,float]
Buffer distance from the borders of the largest box (Angstrom).
writeBox : bool, optional
Whether to generate output file containing coordinates of examined boxes.
Returns
-------
r2VX : float
Coefficient of determination from linear regression fitting to the box-counting data obtained from the voxelised point cloud surface representation.
bcDimVX : float
Box-counting dimension of the voxelised point cloud representation of the surface.
confIntVX : 1D ndarray of floats
Confidence interval of the slope estimated for the voxelised point cloud surface representation.
minMaxLensVX : tuple of floats
Minimum and maximum box lengths for the final slope determination for the voxelised point cloud surface representation.
r2EX : float
Coefficient of determination from linear regression fitting to the box-counting data obtained from the mathematically exact surface representation.
bcDimEX : float
Box-counting dimension of the mathematically exact representation of the surface.
confIntEX : 1D ndarray of floats
Confidence interval of the slope estimated for the mathematically exact surface representation.
minMaxLensEX : tuple of floats
Minimum and maximum box lengths for final slope determination for the mathematically exact surface representation.
Examples
--------
>>> r2VX, bcDimVX, confIntVX, minMaxLensVX, r2EX, bcDimEX, confIntEX, minMaxLensEX = runBoxCnt('example.xyz')
"""
atomsEle, atomsRad, atomsXYZ, maxRange, minXYZ, maxXYZ = readInp(inpFilePath, radType)
atomsNeighIdxs, atomsAvgBondLen = findNN(atomsRad, atomsXYZ, minXYZ, maxXYZ, atomsRad.max(), radMult, calcBL)
atomsSurfIdxs = findSurf(atomsXYZ, atomsNeighIdxs, findSurfAlg, alphaMult * atomsRad.min(), bulkCN)
testCase = inpFilePath.split('/')[-1][:-4]
if verbose:
print(f"\n{testCase}")
r2VX, bcDimVX, confIntVX, minMaxLensVX = np.nan, np.nan, (np.nan, np.nan), (np.nan, np.nan)
r2EX, bcDimEX, confIntEX, minMaxLensEX = np.nan, np.nan, (np.nan, np.nan), (np.nan, np.nan)
if not isdir(outDir):
mkdir(outDir)
if voxelSurf:
scalesVX, countsVX = voxelBoxCnts(atomsEle, atomsRad, atomsSurfIdxs, atomsXYZ, atomsNeighIdxs,
testCase, outDir, numCPUs, exePath,
radType, numPoints, gridNum,
rmInSurf, vis, verbose, genPCD)
r2VX, bcDimVX, confIntVX, minMaxLensVX = findSlope(scalesVX, countsVX, f"{testCase}_VX", outDir, trimLen,
minSample, confLvl, vis, figType, saveFig, showPlot, verbose)
if exactSurf:
minAtomRad = atomsRad.min()
scalesEX, countsEX = exactBoxCnts(atomsEle, atomsRad, atomsSurfIdxs, atomsXYZ, atomsNeighIdxs,
maxRange, (minAtomRad * minLenMult, minAtomRad * maxLenMult),
minXYZ, testCase, outDir, numCPUs, numBoxLen, bufferDist,
rmInSurf, writeBox, verbose)
r2EX, bcDimEX, confIntEX, minMaxLensEX = findSlope(scalesEX, countsEX, f"{testCase}_EX", outDir, trimLen,
minSample, confLvl, vis, figType, saveFig, showPlot, verbose)
return r2VX, bcDimVX, confIntVX, minMaxLensVX, r2EX, bcDimEX, confIntEX, minMaxLensEX