Skip to content

Commit

Permalink
Merge pull request #306 from N720720/improvement-of-memory-usage
Browse files Browse the repository at this point in the history
Improvement of memory usage
  • Loading branch information
N720720 committed Jan 21, 2022
2 parents 1932edd + 996388f commit cc4b4c9
Show file tree
Hide file tree
Showing 9 changed files with 89 additions and 63 deletions.
7 changes: 5 additions & 2 deletions .gitignore
Expand Up @@ -615,6 +615,9 @@ healthchecksdb
MigrationBackup/

# End of https://www.gitignore.io/api/osx,python,pycharm,windows,visualstudio,visualstudiocode
lindemann/main.py
lindemann_per_atom.txt

.vscode/launch.json
lindemann_per_frame.txt
lindemann_per_frame.pdf
lindemann_per_atom.txt

10 changes: 9 additions & 1 deletion lindemann/index/mem_use.py
Expand Up @@ -3,6 +3,14 @@


def in_gb(frames: npt.NDArray[np.float64]) -> str:
"""Shows the size of the array in memory in GB.
Args:
frames (npt.NDArray[np.float64]): numpy array of shape(frames,atoms)
Returns:
str: Size of array in GB.
"""
natoms = len(frames[0])
nframes = len(frames)
return f"This will use {np.round((np.zeros((nframes, natoms, natoms)).nbytes/1024**3),4)} GB" # type: ignore[no-untyped-call]
return f"This will use {np.round((np.zeros((natoms, natoms)).nbytes/1024**3),4)} GB" # type: ignore[no-untyped-call]
42 changes: 20 additions & 22 deletions lindemann/index/per_atoms.py
Expand Up @@ -3,32 +3,31 @@
import numba as nb
import numpy as np
import numpy.typing as npt
from numba import float32


@nb.njit(fastmath=True, error_model="numpy") # type: ignore # , cache=True) #(parallel=True)
def lindemann_per_frames(frames: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:
def calculate(frames: npt.NDArray[np.float32]) -> npt.NDArray[np.float32]:

"""Calculate the contribution of each atom to the lindemann index over the frames
Args:
frames: numpy array of shape(frames,atoms)
Returns:
npt.NDArray[np.float32]: Returns 1D array with the progression of the lindeman index per frame of shape(frames, atoms)
"""
Calculate the lindemann index for each atom AND FRAME

Return a ndarray of shape (len_frames, natoms, natoms)
Warning this can produce extremly large ndarrays in memory
depending on the size of the cluster and the ammount of frames.
"""
first = True
# natoms = natoms
dt = frames.dtype
natoms = len(frames[0])
nframes = len(frames)
len_frames = len(frames)
array_mean = np.zeros((natoms, natoms), dtype=dt)
array_var = np.zeros((natoms, natoms), dtype=dt)
# array_distance = np.zeros((natoms, natoms))
iframe = dt.type(1)
lindex_array = np.zeros((len_frames, natoms, natoms), dtype=dt)
lindex_array = np.zeros((len_frames, natoms), dtype=dt)
for q, coords in enumerate(frames):
# print("processing frame {}/{}".format(iframe, nframes))
# print(q)
n, p = coords.shape
array_distance = np.zeros((n, n), dtype=dt)
for i in range(n):
Expand All @@ -52,7 +51,7 @@ def lindemann_per_frames(frames: npt.NDArray[np.float64]) -> npt.NDArray[np.floa
array_mean[i, j] = mean + delta / iframe
# update variance
array_var[i, j] = var + delta * (xn - array_mean[i, j])
iframe += 1
iframe += 1 # type: ignore[assignment]
if iframe > nframes + 1:
break

Expand All @@ -61,15 +60,14 @@ def lindemann_per_frames(frames: npt.NDArray[np.float64]) -> npt.NDArray[np.floa
array_mean[j, i] = array_mean[i, j]
array_var[j, i] = array_var[i, j]

lindemann_indices = np.divide(np.sqrt(np.divide(array_var, nframes)), array_mean)
# lindemann_indices = np.nanmean(np.sqrt(array_var/nframes)/array_mean, axis=1)
if first:
lindemann_indices = np.zeros((natoms), dtype=dt)
first = False
else:
np.fill_diagonal(array_mean, 1)
lindemann_indices = np.zeros((natoms), dtype=dt)
lindemann_indices = np.divide(np.sqrt(np.divide(array_var, nframes)), array_mean)
lindemann_indices = np.asarray([np.mean(lin[lin != 0]) for lin in lindemann_indices])

lindex_array[q] = lindemann_indices
return lindex_array


def calculate(indices: npt.NDArray[np.float64]) -> List[npt.NDArray[np.float64]]:
"""
Small helper function, since numba has not implemented the np.nanmean with axis parameter
I cant implemnet this in the jit function for now.
"""
return [np.nanmean(i, axis=1) for i in lindemann_per_frames(indices)] # type: ignore[no-untyped-call]
43 changes: 21 additions & 22 deletions lindemann/index/per_frames.py
Expand Up @@ -6,28 +6,26 @@


@nb.njit(fastmath=True, error_model="numpy") # type: ignore # , cache=True) #(parallel=True)
def lindemann_per_frames_for_each_atom(frames: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:
def calculate(frames: npt.NDArray[np.float32]) -> npt.NDArray[np.float32]:

"""calculate the progression of the lindemann index over the frames.
Args:
frames: numpy array of shape(frames,atoms)
Returns:
npt.NDArray[np.float32]: Returns 1D array with the progression of the lindeman index per frame of shape(frames)
"""
Calculate the lindemann index for each atom AND FRAME

Return a ndarray of shape (len_frames, natoms, natoms)
Warning this can produce extremly large ndarrays in memory
depending on the size of the cluster and the ammount of frames.
"""
# natoms = natoms
first = True
dt = frames.dtype
natoms = len(frames[0])
nframes = len(frames)
len_frames = len(frames)
array_mean = np.zeros((natoms, natoms), dtype=dt)
array_var = np.zeros((natoms, natoms), dtype=dt)
# array_distance = np.zeros((natoms, natoms))
iframe = dt.type(1)
lindex_array = np.zeros((len_frames, natoms, natoms), dtype=dt)
lindex_array = np.zeros((len_frames), dtype=dt)
for q, coords in enumerate(frames):
# print(q)
n, p = coords.shape
array_distance = np.zeros((n, n), dtype=dt)
for i in range(n):
Expand All @@ -51,7 +49,7 @@ def lindemann_per_frames_for_each_atom(frames: npt.NDArray[np.float64]) -> npt.N
array_mean[i, j] = mean + delta / iframe
# update variance
array_var[i, j] = var + delta * (xn - array_mean[i, j])
iframe += 1
iframe += 1 # type: ignore[assignment]
if iframe > nframes + 1:
break

Expand All @@ -60,15 +58,16 @@ def lindemann_per_frames_for_each_atom(frames: npt.NDArray[np.float64]) -> npt.N
array_mean[j, i] = array_mean[i, j]
array_var[j, i] = array_var[i, j]

lindemann_indices = np.divide(np.sqrt(np.divide(array_var, nframes)), array_mean)
# lindemann_indices = np.nanmean(np.sqrt(array_var/nframes)/array_mean, axis=1)
if first:
lindemann_indices = 0
first = False
else:
np.fill_diagonal(array_mean, 1)
lindemann_indices = np.zeros((natoms), dtype=dt) # type: ignore[assignment]
lindemann_indices = np.divide(np.sqrt(np.divide(array_var, nframes)), array_mean) # type: ignore[assignment]
lindemann_indices = np.mean(
np.asarray([np.mean(lin[lin != 0]) for lin in lindemann_indices]) # type: ignore[attr-defined]
)

lindex_array[q] = lindemann_indices
return lindex_array


def calculate(indices: npt.NDArray[np.float64]) -> List[npt.NDArray[np.float64]]:
"""
Small helper function, since numba has not implemented the np.nanmean with axis parameter
I cant implemnet this in the jit function for now.
"""
return [np.mean(np.nanmean(i, axis=1)) for i in lindemann_per_frames_for_each_atom(indices)] # type: ignore[no-untyped-call]
17 changes: 9 additions & 8 deletions lindemann/index/per_trj.py
Expand Up @@ -9,15 +9,20 @@


@nb.njit(fastmath=True, error_model="numpy") # type: ignore
def lindemann_per_atom(frames: npt.NDArray[np.float64]) -> Any:
def lindemann_per_atom(frames: npt.NDArray[np.float32]) -> Any:

"""Calculate the lindeman index
Args:
frames: numpy array of shape(frames,atoms)
Returns:
float32: returns the lindeman index
"""

"""Calculates the lindemann index for """
dt = frames.dtype
natoms = len(frames[0])
nframes = len(frames)
array_mean = np.zeros((natoms, natoms), dtype=dt)
array_var = np.zeros((natoms, natoms), dtype=dt)
# array_distance = np.zeros((natoms, natoms),dtype=dt)
iframe = dt.type(1)
for coords in frames:

Expand All @@ -43,7 +48,7 @@ def lindemann_per_atom(frames: npt.NDArray[np.float64]) -> Any:
delta = xn - mean
array_mean[i, j] = mean + delta / iframe
array_var[i, j] = var + delta * (xn - array_mean[i, j])
iframe += 1.0
iframe += 1.0 # type: ignore[assignment]
if iframe > nframes:
break

Expand All @@ -57,9 +62,5 @@ def lindemann_per_atom(frames: npt.NDArray[np.float64]) -> Any:


def calculate(frames: npt.NDArray[np.float64]) -> float:
"""
Small helper function, since numba has not implemented the np.nanmean with axis parameter
I cant implemnet this in the jit function for now.
"""

return np.mean(bn.nanmean(lindemann_per_atom(frames), axis=1)) # type: ignore[no-any-return, no-untyped-call]
7 changes: 2 additions & 5 deletions lindemann/main.py
Expand Up @@ -80,9 +80,6 @@ def main(
as the progression of the Lindemann index per frame or per atom and frame of temperature ramps
for phase transition analysis.
"""
# frames = read.frames(trjfile)
# frames = lindemann.trajectory.read.frames(trjfile)
start = time.time()

n_cores = cpu_count()
len_trjfiles = len(trjfile)
Expand Down Expand Up @@ -156,9 +153,9 @@ def main(
raise typer.Exit()

elif timeit and single_process:

# we use float32 here since float64 is not needed for my purposes and it enables us to use nb fastmath. Change to np.float64 if you need more precision.
start = time.time()
linde_for_time = per_trj.calculate(tjr_frames)
linde_for_time = per_trj.calculate(tjr_frames.astype(np.float32))
time_diff = time.time() - start

console.print(
Expand Down
21 changes: 20 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion pyproject.toml
Expand Up @@ -6,7 +6,7 @@ build-backend = "poetry.masonry.api"

[tool.poetry]
name = "lindemann"
version = "0.4.1"
version = "0.5.0"
description = "lindemann is a python package to calculate the Lindemann index of a lammps trajectory as well as the progression of the Lindemann index per frame of temperature ramps for phase transition analysis."
readme = "README.md"
authors = [
Expand Down Expand Up @@ -43,6 +43,7 @@ numba = ">=0.52"
numpy = ">=1.18, <1.22"
pathlib = "^1.0.1"
icc-rt = "^2020.0.133"
Bottleneck = "^1.3.2"
psutil = "^5.9.0"

[tool.poetry.dev-dependencies]
Expand Down
2 changes: 1 addition & 1 deletion tests/test_example/test_cli.py
Expand Up @@ -45,7 +45,7 @@ def test_t_flag():

def test_m_flag():
flag = "-m"
res_str = "memory use: This will use 0.7864 GB"
res_str = "memory use: This will use 0.0016 GB"
trajectory = ["tests/test_example/459_02.lammpstrj"]
single_process_and_multiprocess(trajectory, flag, res_str)

Expand Down

0 comments on commit cc4b4c9

Please sign in to comment.