# Writing benchmarks

With Transonic, writting benchmarks for the different accelerators is very simple. We present an example in this page.

Other examples can be found here:

## Comparison Numba vs Pythran (JIT)

We take this file with only pure-Numpy code from this blog post by Florian LE BOURDAIS.

```import numpy as np

def laplace_numpy(image):
"""Laplace operator in NumPy for 2D images."""
laplacian = (
image[:-2, 1:-1]
+ image[2:, 1:-1]
+ image[1:-1, :-2]
+ image[1:-1, 2:]
- 4 * image[1:-1, 1:-1]
)
thresh = np.abs(laplacian) > 0.05
return thresh

def laplace_loops(image):
"""Laplace operator for 2D images."""
h = image.shape[0]
w = image.shape[1]
laplacian = np.empty((h - 2, w - 2), np.uint8)
for i in range(1, h - 1):
for j in range(1, w - 1):
laplacian[i - 1, j - 1] = (
np.abs(
image[i - 1, j]
+ image[i + 1, j]
+ image[i, j - 1]
+ image[i, j + 1]
- 4 * image[i, j]
)
> 0.05
)
return laplacian
```

Our code for a benchmark in JIT mode:

```from transonic import jit
import numba

from pure_numpy import laplace_numpy, laplace_loops

laplace_transonic_pythran = jit(native=True, xsimd=True)(laplace_numpy)
laplace_transonic_python = jit(backend="python")(laplace_numpy)
laplace_transonic_numba = jit(backend="numba")(laplace_numpy)
laplace_numba = numba.njit(laplace_numpy)

laplace_transonic_pythran_loops = jit(native=True, xsimd=True)(laplace_loops)
laplace_transonic_python_loops = jit(backend="python")(laplace_loops)
laplace_transonic_numba_loops = jit(backend="numba")(laplace_loops)
laplace_numba_loops = numba.njit(laplace_loops)

if __name__ == "__main__":
from transonic import wait_for_all_extensions

from skimage.data import astronaut
from skimage.color import rgb2gray

image = astronaut()
image = rgb2gray(image)

# warm the functions
laplace_transonic_python(image)
laplace_transonic_pythran(image)
laplace_transonic_pythran_loops(image)
laplace_transonic_numba(image)
laplace_transonic_numba_loops(image)
laplace_numba(image)
laplace_numba_loops(image)

wait_for_all_extensions()

# again warming
laplace_transonic_numba(image)
laplace_transonic_numba_loops(image)

from transonic.util import timeit
from transonic import __version__
import pythran

loc = locals()

def bench(call, norm=None):
ret = result = timeit(call, globals=loc)
if norm is None:
norm = result
result /= norm
print(f"{call.split('(')[0]:33s}: {result:.2f}")
return ret

print(
f"transonic {__version__}\n"
f"pythran {pythran.__version__}\n"
f"numba {numba.__version__}\n"
)

norm = bench("laplace_transonic_pythran(image)")
print(f"norm = {norm:.2e} s")
bench("laplace_transonic_pythran_loops(image)", norm=norm)
bench("laplace_numba(image)", norm=norm)
bench("laplace_transonic_numba(image)", norm=norm)
bench("laplace_numba_loops(image)", norm=norm)
bench("laplace_transonic_numba_loops(image)", norm=norm)
bench("laplace_numpy(image)", norm=norm)
bench("laplace_transonic_python(image)", norm=norm)
```

gives:

```transonic 0.4.0
pythran 0.9.3post1
numba 0.45.1

laplace_transonic_pythran        : 1.00
norm = 1.44e-04 s
laplace_transonic_pythran_loops  : 0.94
laplace_numba                    : 8.82
laplace_transonic_numba          : 8.80
laplace_numba_loops              : 0.94
laplace_transonic_numba_loops    : 0.94
laplace_numpy                    : 6.94
laplace_transonic_python         : 7.03
```

The warmup is much longer for Transonic-Pythran but remember that it is a cached JIT so it is an issue only for the first call of the function. When we reimport the module, there is no warmup.

Then we see that Pythran is very good to optimize high-level NumPy code! In contrast (with my setup and on my computer), Numba has not been able to optimize this function. However, Numba is good to speedup the code with loops!

Note that the Transonic overhead is negligible even for this very small case (the shape of the image is `(512, 512)`).

Note

We don’t use the `fastmath` option of Numba because the Numba backend does not support it yet!

```from transonic import boost, Array
import numba

import numpy as np

Image = Array[np.float64, "2d", "C"]

def laplace_numpy(image: Image):
"""Laplace operator in NumPy for 2D images."""
laplacian = (
image[:-2, 1:-1]
+ image[2:, 1:-1]
+ image[1:-1, :-2]
+ image[1:-1, 2:]
- 4 * image[1:-1, 1:-1]
)
thresh = np.abs(laplacian) > 0.05
return thresh

def laplace_loops(image: Image):
"""Laplace operator for 2D images."""
h = image.shape[0]
w = image.shape[1]
laplacian = np.empty((h - 2, w - 2), np.uint8)
for i in range(1, h - 1):
for j in range(1, w - 1):
laplacian[i - 1, j - 1] = (
np.abs(
image[i - 1, j]
+ image[i + 1, j]
+ image[i, j - 1]
+ image[i, j + 1]
- 4 * image[i, j]
)
> 0.05
)
return laplacian

laplace_transonic_pythran = boost(backend="pythran")(laplace_numpy)
laplace_transonic_cython = boost(backend="cython")(laplace_numpy)
laplace_transonic_numba = boost(backend="numba")(laplace_numpy)
laplace_transonic_python = boost(backend="python")(laplace_numpy)
laplace_numba = numba.njit(laplace_numpy)

laplace_loops_transonic_pythran = boost(backend="pythran")(laplace_loops)
laplace_loops_transonic_python = boost(backend="python")(laplace_loops)
laplace_loops_transonic_numba = boost(backend="numba")(laplace_loops)
laplace_loops_numba = numba.njit(laplace_loops)

# For Cython, we need to add more type annotations

@boost(backend="cython", boundscheck=False, wraparound=False)
def laplace_loops_transonic_cython(image: Image):
"""Laplace operator for 2D images."""
i: int
j: int
h: int = image.shape[0]
w: int = image.shape[1]
laplacian: Array[np.uint8, "2d"] = np.empty((h - 2, w - 2), np.uint8)
for i in range(1, h - 1):
for j in range(1, w - 1):
laplacian[i - 1, j - 1] = (
abs(
image[i - 1, j]
+ image[i + 1, j]
+ image[i, j - 1]
+ image[i, j + 1]
- 4 * image[i, j]
)
> 0.05
)
return laplacian

if __name__ == "__main__":

from skimage.data import astronaut
from skimage.color import rgb2gray

image = astronaut()
image = rgb2gray(image)

# call these functions to warm them
laplace_transonic_numba(image)
laplace_loops_transonic_numba(image)
laplace_numba(image)
laplace_loops_numba(image)

from transonic.util import timeit
from transonic import __version__
import pythran

loc = locals()

def bench(call, norm=None):
ret = result = timeit(call, globals=loc)
if norm is None:
norm = result
result /= norm
print(f"{call.split('(')[0]:33s}: {result:.2f}")
return ret

print(
f"transonic {__version__}\n"
f"pythran {pythran.__version__}\n"
f"numba {numba.__version__}\n"
)

norm = bench("laplace_transonic_pythran(image)")
print(f"norm = {norm:.2e} s")
bench("laplace_loops_transonic_pythran(image)", norm=norm)
bench("laplace_transonic_cython(image)", norm=norm)
bench("laplace_loops_transonic_cython(image)", norm=norm)
bench("laplace_numba(image)", norm=norm)
bench("laplace_transonic_numba(image)", norm=norm)
bench("laplace_loops_numba(image)", norm=norm)
bench("laplace_loops_transonic_numba(image)", norm=norm)
bench("laplace_numpy(image)", norm=norm)
bench("laplace_transonic_python(image)", norm=norm)
```

The results are:

```transonic 0.4.0
pythran 0.9.3post1
numba 0.45.1

laplace_transonic_pythran        : 1.00
norm = 1.42e-04 s
laplace_loops_transonic_pythran  : 0.95
laplace_transonic_cython         : 8.36
laplace_loops_transonic_cython   : 2.61
laplace_numba                    : 8.94
laplace_transonic_numba          : 8.93
laplace_loops_numba              : 0.95
laplace_loops_transonic_numba    : 0.95
laplace_numpy                    : 7.01
laplace_transonic_python         : 7.00
```