New Target

In order to implement a new target for npbackend, we need to implement the functions in interface.py:

"""
===================================
Interface for ``npbackend`` targets
===================================

Implementing a ``npbackend`` target, starts with fleshing out::

    import interface

    class View(interface.View):
        ...

    class Base(interface.Base):
        ...

And then implementing each of the methods described in ``interface.py``,
documented below:
"""

class Base(object):
    """
    Abstract base array handle (an array has only one base)
    Encapsulates memory allocated for an array.

    :param int size: Number of elements in the array
    :param numpy.dtype dtype: Data type of the elements
    """
    def __init__(self, size, dtype):
        self.size = size        # Total number of elements
        self.dtype = dtype      # Data type

class View(object):
    """
    Abstract array view handle.
    Encapsulates meta-data of an array.

    :param int ndim: Number of dimensions / rank of the view
    :param int start: Offset from base (in elements), converted to bytes upon construction.
    :param tuple(int*ndim) shape: Number of elements in each dimension of the array.
    :param tuple(int*ndim) strides: Stride for each dimension (in elements), converted to bytes upon construction.
    :param interface.Base base: Base associated with array.
    """
    def __init__(self, ndim, start, shape, strides, base):
        self.ndim = ndim        # Number of dimensions
        self.shape = shape      # Tuple of dimension sizes
        self.base = base        # The base array this view refers to
        self.dtype = base.dtype
        self.start = start * base.dtype.itemsize # Offset from base (in bytes)
        self.strides = [x * base.dtype.itemsize for x in strides] #Tuple of strides (in bytes)

def runtime_flush():
    """Flush the runtime system"""
    pass

def tally():
    """Tally the runtime system"""
    pass

def get_data_pointer(ary, allocate=False, nullify=False):
    """
    Return a C-pointer to the array data (represented as a Python integer).

    .. note:: One way of implementing this would be to return a ndarray.ctypes.data.

    :param Mixed ary: The array to retrieve a data-pointer for.
    :param bool allocate: When true the target is expected to allocate the data prior to returning.
    :param bool nullify: TODO
    :returns: A pointer to memory associated with the given 'ary'
    :rtype: int
    """
    raise NotImplementedError()

def set_bhc_data_from_ary(self, ary):
    """
    Copy data from 'ary' into the array 'self'

    :param Mixed self: The array to copy data to.
    :param Mixed ary: The array to copy data from.
    :rtype: None
    """
    raise NotImplementedError()

def ufunc(op, *args):
    """
    Perform the ufunc 'op' on the 'args' arrays

    :param bohrium.ufunc.Ufunc op: The ufunc operation to apply to args.
    :param Mixed args: Args to the ufunc operation.
    :rtype: None
    """
    raise NotImplementedError()

def reduce(op, out, ary, axis):
    """
    Reduce 'axis' dimension of 'ary' and write the result to out

    :param op bohrium.ufunc.Ufunc: The ufunc operation to apply to args.
    :param out Mixed: The array to reduce "into".
    :param ary Mixed: The array to reduce.
    :param axis Mixed: The axis to apply the reduction over.
    :rtype: None
    """
    raise NotImplementedError()

def accumulate(op, out, ary, axis):
    """
    Accumulate/scan 'axis' dimension of 'ary' and write the result to 'out'.

    :param bohrium.ufunc.Ufunc op: The element-wise operator to accumulate.
    :param Mixed out: The array to accumulate/scan "into".
    :param Mixed ary: The array to accumulate/scan.
    :param Mixed axis: The axis to apply the accumulation/scan over.
    :rtype: None
    """
    raise NotImplementedError()

def extmethod(name, out, in1, in2):
    """
    Apply the extension method 'name'.

    :param Mixed out: The array to write results to.
    :param Mixed in1: First input array.
    :param Mixed in2: Second input array.
    :rtype: None
    """
    raise NotImplementedError()

def range(size, dtype):
    """
    Create a new array containing the values [0:size[.

    :param int size: Number of elements in the returned array.
    :param np.dtype dtype: Type of elements in the returned range.
    :rtype: Mixed
    """

    raise NotImplementedError()

def random123(size, start_index, key):
    """
    Create a new random array using the random123 algorithm.
    The dtype is uint64 always.

    :param int size: Number of elements in the returned array.
    :param int start_index: TODO
    :param int key: TODO
    """
    raise NotImplementedError()

def gather(out, ary, indexes):
    """
    Gather elements from 'ary' selected by 'indexes'.
    ary.shape == indexes.shape.

    :param Mixed out: The array to write results to.
    :param Mixed ary: Input array.
    :param Mixed indexes: Array of indexes (uint64).
    """
    raise NotImplementedError()

NumPy Example

An example of a target implementation is target_numpy.py that uses NumPy as a backend. Now, implementing a NumPy backend that targets NumPy does not make that much sense but it is a good example.

Note

In some cases using NumPy as a backend will output native NumPy because of memory allocation reuse.

"""
The Computation Backend

"""
from .. import bhc
from .._util import dtype_name
import numpy as np
import mmap
import time
import ctypes
from . import interface
import os

VCACHE = []
VCACHE_SIZE = int(os.environ.get("VCACHE_SIZE", 10))

class Base(interface.Base):
    """base array handle"""

    def __init__(self, size, dtype):
        super(Base, self).__init__(size, dtype)
        self.mmap_valid = True
        size *= dtype.itemsize
        for i, (vc_size, vc_mem) in enumerate(VCACHE):
            if vc_size == size:
                self.mmap = vc_mem
                VCACHE.pop(i)
                return

        self.mmap = mmap.mmap(-1, size, mmap.MAP_PRIVATE)

    def __str__(self):
        if self.mmap_valid:
            s = mmap
        else:
            s = "NULL"
        return "<base memory at %s>"%s

    def __del__(self):
        if self.mmap_valid:
            if len(VCACHE) < VCACHE_SIZE:
                VCACHE.append((self.size*self.dtype.itemsize, self.mmap))
                return
        self.mmap.close()

class View(interface.View):
    """array view handle"""

    def __init__(self, ndim, start, shape, strides, base):
        super(View, self).__init__(ndim, start, shape, strides, base)
        buf = np.frombuffer(self.base.mmap, dtype=self.dtype, offset=self.start)
        self.ndarray = np.lib.stride_tricks.as_strided(buf, shape, self.strides)

def views2numpy(views):
    """Extract the ndarray from the view."""

    ret = []
    for view in views:
        if isinstance(view, View):
            ret.append(view.ndarray)
        else:
            ret.append(view)
    return ret

def get_data_pointer(ary, allocate=False, nullify=False):
    """
    Extract the data-pointer from the given View (ary).

    :param target_numpy.View ary: The View to extract the ndarray form.
    :returns: Pointer to data associated with the 'ary'.
    :rtype: ctypes pointer
    """
    ret = ary.ndarray.ctypes.data
    if nullify:
        ary.base.mmap_valid = False
    return ret

def set_bhc_data_from_ary(self, ary):
    ptr = get_data_pointer(self, allocate=True, nullify=False)
    ctypes.memmove(ptr, ary.ctypes.data, ary.dtype.itemsize * ary.size)

def ufunc(op, *args):
    """Apply the 'op' on args, which is the output followed by one or two inputs"""

    args = views2numpy(args)
    if op.info['name'] == "identity":
        if np.isscalar(args[1]):
            exec("args[0][...] = args[1]")
        else:
            exec("args[0][...] = args[1][...]")
    else:
        func = eval("np.%s" % op.info['name'])
        func(*args[1:], out=args[0])

def reduce(op, out, ary, axis):
    """reduce 'axis' dimension of 'ary' and write the result to out"""

    func = eval("np.%s.reduce" % op.info['name'])
    (ary, out) = views2numpy((ary, out))
    if ary.ndim == 1:
        keepdims = True
    else:
        keepdims = False
    func(ary, axis=axis, out=out, keepdims=keepdims)

def accumulate(op, out, ary, axis):
    """accumulate 'axis' dimension of 'ary' and write the result to out"""

    func = eval("np.%s.accumulate" % op.info['name'])
    (ary, out) = views2numpy((ary, out))
    if ary.ndim == 1:
        keepdims = True
    else:
        keepdims = False
    func(ary, axis=axis, out=out, keepdims=keepdims)

def extmethod(name, out, in1, in2):
    """Apply the extended method 'name' """

    (out, in1, in2) = views2numpy((out, in1, in2))
    if name == "matmul":
        out[:] = np.dot(in1, in2)
    else:
        raise NotImplementedError("The current runtime system does not support "
                                  "the extension method '%s'" % name)

def range(size, dtype):
    """create a new array containing the values [0:size["""
    return np.arange((size,), dtype=dtype)

def random123(size, start_index, key):
    """Create a new random array using the random123 algorithm.
    The dtype is uint64 always."""
    return np.random.random(size)

Now, let’s try to run a small Python/NumPy example:

import numpy as np

a = np.ones(100000000)
for _ in xrange(100):
    a = a + 42

First with native NumPy:

time -p python tt.py
real 26.69
user 10.20
sys 16.50

And then with npbackend using NumPy as backend:

NPBE_TARGET="numpy" time -p python -m bohrium tt.py
real 14.36
user 14.02
sys 0.34

Because of the memory allocation reuse, npbackend actually outperforms native NumPy. However, this is only the case because we allocate and free a significant number of large arrays.