################################################################################
# Copyright (c) 2014-2021, National Research Foundation (SARAO)
#
# Licensed under the BSD 3-Clause License (the "License"); you may not use
# this file except in compliance with the License. You may obtain a copy
# of the License at
#
# https://opensource.org/licenses/BSD-3-Clause
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################
"""Fill device array with a constant value."""
from typing import Any, Callable, Mapping, Optional, Tuple
import numpy as np
try:
from numpy.typing import DTypeLike
except ImportError:
DTypeLike = Any # type: ignore
from . import accel, tune
from .abc import AbstractCommandQueue, AbstractContext
[docs]class FillTemplate:
"""Fill a device array with a constant value.
The pad elements are also filled with this value.
.. note::
To fill with zeros, use :meth:`.DeviceArray.zero`
Parameters
----------
context
Context for which kernels will be compiled
dtype
Type of data elements
ctype
Type (in C/CUDA, not numpy) of data elements
tuning
Kernel tuning parameters; if omitted, will autotune. The possible
parameters are
- wgs: number of workitems per workgroup
"""
def __init__(
self,
context: AbstractContext,
dtype: DTypeLike,
ctype: str,
tuning: Optional[Mapping[str, Any]] = None,
) -> None:
self.context = context
self.dtype: np.dtype = np.dtype(dtype)
self.ctype = ctype
if tuning is None:
tuning = self.autotune(context, dtype, ctype)
self.wgs = tuning["wgs"]
self.program = accel.build(context, "fill.mako", {"wgs": self.wgs, "ctype": ctype})
[docs] @classmethod
@tune.autotuner(test={"wgs": 128})
def autotune(cls, context: AbstractContext, dtype: DTypeLike, ctype: str) -> Mapping[str, Any]:
queue = context.create_tuning_command_queue()
shape = (1048576,)
data = accel.DeviceArray(context, shape, dtype=dtype)
def generate(wgs: int) -> Callable[[int], float]:
fn = cls(context, dtype, ctype, {"wgs": wgs}).instantiate(queue, shape)
fn.bind(data=data)
return tune.make_measure(queue, fn)
return tune.autotune(generate, wgs=[64, 128, 256, 512])
[docs] def instantiate(
self,
command_queue: AbstractCommandQueue,
shape: Tuple[int, ...],
allocator: Optional[accel.AbstractAllocator] = None,
) -> "Fill":
return Fill(self, command_queue, shape, allocator)
[docs]class Fill(accel.Operation):
"""Concrete instance of :class:`FillTemplate`.
.. rubric:: Slots
**data**
Array to be filled (padding will be filled too)
Parameters
----------
template
Operation template
command_queue
Command queue for the operation
shape
Shape for the data slot
allocator
Allocator used to allocate unbound slots
"""
def __init__(
self,
template: FillTemplate,
command_queue: AbstractCommandQueue,
shape: Tuple[int, ...],
allocator: Optional[accel.AbstractAllocator] = None,
) -> None:
super().__init__(command_queue, allocator)
self.template = template
self.kernel = template.program.get_kernel("fill")
self.shape = shape
self.slots["data"] = accel.IOSlot(shape, self.template.dtype)
self.value = self.template.dtype.type()
[docs] def set_value(self, value: Any) -> None:
self.value = self.template.dtype.type(value)
def _run(self) -> None:
data = self.buffer("data")
elements = int(np.prod(data.padded_shape))
global_size = accel.roundup(elements, self.template.wgs)
self.command_queue.enqueue_kernel(
self.kernel,
[data.buffer, np.uint32(elements), self.value],
global_size=(global_size,),
local_size=(self.template.wgs,),
)
[docs] def parameters(self) -> Mapping[str, Any]:
return {
"dtype": self.template.dtype,
"ctype": self.template.ctype,
"shape": self.slots["data"].shape, # type: ignore
"value": self.value,
}