# Copyright (C) 2020-2021 Jørgen S. Dokken
#
# This file is part of DOLFINX_MPC
#
# SPDX-License-Identifier: MIT
from __future__ import annotations
from typing import List, Optional, Tuple, Union
from petsc4py import PETSc as _PETSc
import cffi
import dolfinx
import dolfinx.cpp as _cpp
import dolfinx.fem as _fem
import numpy
import numpy.typing as npt
from dolfinx.common import Timer
import numba
from dolfinx_mpc.assemble_matrix import create_sparsity_pattern
from dolfinx_mpc.multipointconstraint import MultiPointConstraint
from .helpers import _bcs, _forms, extract_slave_cells, pack_slave_facet_info
from .numba_setup import initialize_petsc, sink
mode = _PETSc.InsertMode.ADD_VALUES # type: ignore
insert = _PETSc.InsertMode.INSERT_VALUES # type: ignore
ffi, set_values_local = initialize_petsc()
[docs]
def assemble_matrix(
form: _forms,
constraint: MultiPointConstraint,
bcs: Optional[List[_bcs]] = None,
diagval: _PETSc.ScalarType = 1.0, # type: ignore
A: Optional[_PETSc.Mat] = None, # type: ignore
):
"""
Assembles a compiled DOLFINx form with given a multi point constraint and possible
Dirichlet boundary conditions.
NOTE: Strong Dirichlet conditions cannot be on master dofs.
Args:
form: The compiled bilinear form
constraint: The multi point constraint
bcs: List of Dirichlet boundary conditions
diagval: Value to set on the diagonal of the matrix
A: PETSc matrix to assemble into (optional)
"""
timer_matrix = Timer("~MPC: Assemble matrix (numba)")
V = constraint.function_space
dofmap = V.dofmap
dofs = dofmap.list
# Pack MPC data for numba kernels
coefficients = constraint.coefficients()[0]
masters_adj = constraint.masters
c_to_s_adj = constraint.cell_to_slaves
cell_to_slave = c_to_s_adj.array
c_to_s_off = c_to_s_adj.offsets
is_slave = constraint.is_slave
mpc_data = (
masters_adj.array,
coefficients,
masters_adj.offsets,
cell_to_slave,
c_to_s_off,
is_slave,
)
slave_cells = extract_slave_cells(c_to_s_off)
# Create 1D bc indicator for matrix assembly
num_dofs_local = (dofmap.index_map.size_local + dofmap.index_map.num_ghosts) * dofmap.index_map_bs
is_bc = numpy.zeros(num_dofs_local, dtype=bool)
bcs = [] if bcs is None else [bc._cpp_object for bc in bcs]
if len(bcs) > 0:
for bc in bcs:
is_bc[bc.dof_indices()[0]] = True
# Get data from mesh
x_dofs = V.mesh.geometry.dofmap
x = V.mesh.geometry.x
# Pack constants and coefficients
form_coeffs = _cpp.fem.pack_coefficients(form._cpp_object)
form_consts = _cpp.fem.pack_constants(form._cpp_object)
# Create sparsity pattern and matrix if not supplied
if A is None:
pattern = create_sparsity_pattern(form, constraint)
pattern.finalize()
A = _cpp.la.petsc.create_matrix(V.mesh.comm, pattern)
A.zeroEntries()
# Assemble the matrix with all entries
_cpp.fem.petsc.assemble_matrix(A, form._cpp_object, form_consts, form_coeffs, bcs, False)
# General assembly data
block_size = dofmap.dof_layout.block_size
num_dofs_per_element = dofmap.dof_layout.num_dofs
tdim = V.mesh.topology.dim
# Assemble over cells
subdomain_ids = form._cpp_object.integral_ids(_fem.IntegralType.cell)
num_cell_integrals = len(subdomain_ids)
e0 = form.function_spaces[0].element
e1 = form.function_spaces[1].element
# Get dof transformations
needs_transformation_data = (
e0.needs_dof_transformations or e1.needs_dof_transformations or form._cpp_object.needs_facet_permutations
)
cell_perms = numpy.array([], dtype=numpy.uint32)
if needs_transformation_data:
V.mesh.topology.create_entity_permutations()
cell_perms = V.mesh.topology.get_cell_permutation_info()
# NOTE: Here we need to add the apply_dof_transformation and apply_dof_transformation transpose functions
# to support more exotic elements
if e0.needs_dof_transformations or e1.needs_dof_transformations:
raise NotImplementedError("Dof transformations not implemented")
if _PETSc.ScalarType == numpy.float32: # type: ignore
nptype = "float32"
elif _PETSc.ScalarType == numpy.float64: # type: ignore
nptype = "float64"
elif _PETSc.ScalarType == numpy.complex64: # type: ignore
nptype = "complex64"
elif _PETSc.ScalarType == numpy.complex128: # type: ignore
nptype = "complex128"
else:
raise RuntimeError(f"Unsupported scalar type {_PETSc.ScalarType}.") # type: ignore
ufcx_form = form.ufcx_form
if num_cell_integrals > 0:
# NOTE: This depends on enum ordering in ufcx.h
cell_form_pos = ufcx_form.form_integral_offsets[0]
V.mesh.topology.create_entity_permutations()
for i, id in enumerate(subdomain_ids):
coeffs_i = form_coeffs[(_fem.IntegralType.cell, id)]
cell_kernel = getattr(ufcx_form.form_integrals[cell_form_pos + i], f"tabulate_tensor_{nptype}")
active_cells = form._cpp_object.domains(_fem.IntegralType.cell, id)
assemble_slave_cells(
A.handle,
cell_kernel,
active_cells[numpy.isin(active_cells, slave_cells)],
(x_dofs, x),
coeffs_i,
form_consts,
cell_perms,
dofs,
block_size,
num_dofs_per_element,
mpc_data,
is_bc,
)
# Assemble over exterior facets
subdomain_ids = form._cpp_object.integral_ids(_fem.IntegralType.exterior_facet)
num_exterior_integrals = len(subdomain_ids)
if num_exterior_integrals > 0:
V.mesh.topology.create_entities(tdim - 1)
V.mesh.topology.create_connectivity(tdim - 1, tdim)
# Get facet permutations if required
facet_perms = numpy.array([], dtype=numpy.uint8)
if form._cpp_object.needs_facet_permutations:
facet_perms = V.mesh.topology.get_facet_permutations()
perm = (cell_perms, form._cpp_object.needs_facet_permutations, facet_perms)
# NOTE: This depends on enum ordering in ufcx.h
ext_facet_pos = ufcx_form.form_integral_offsets[1]
for i, id in enumerate(subdomain_ids):
facet_kernel = getattr(ufcx_form.form_integrals[ext_facet_pos + i], f"tabulate_tensor_{nptype}")
facets = form._cpp_object.domains(_fem.IntegralType.exterior_facet, id)
coeffs_i = form_coeffs[(_fem.IntegralType.exterior_facet, id)]
facet_info = pack_slave_facet_info(facets, slave_cells)
num_facets_per_cell = len(V.mesh.topology.connectivity(tdim, tdim - 1).links(0))
assemble_exterior_slave_facets(
A.handle,
facet_kernel,
(x_dofs, x),
coeffs_i,
form_consts,
perm,
dofs,
block_size,
num_dofs_per_element,
facet_info,
mpc_data,
is_bc,
num_facets_per_cell,
)
# Add mpc entries on diagonal
slaves = constraint.slaves
num_local_slaves = constraint.num_local_slaves
add_diagonal(A.handle, slaves[:num_local_slaves], diagval)
# Add one on diagonal for diriclet bc and slave dofs
# NOTE: In the future one could use a constant in the dirichletbc
if form.function_spaces[0] is form.function_spaces[1]:
A.assemblyBegin(_PETSc.Mat.AssemblyType.FLUSH) # type: ignore
A.assemblyEnd(_PETSc.Mat.AssemblyType.FLUSH) # type: ignore
_cpp.fem.petsc.insert_diagonal(A, form.function_spaces[0], bcs, diagval)
A.assemble()
timer_matrix.stop()
return A
@numba.njit
def add_diagonal(A: int, dofs: numba.int32[:], diagval: _PETSc.ScalarType = 1): # type: ignore
"""
Insert value on diagonal of matrix for given dofs.
"""
ffi_fb = ffi.from_buffer
dof_list = numpy.zeros(1, dtype=numpy.int32)
dof_value = numpy.full(1, diagval, dtype=_PETSc.ScalarType) # type: ignore
for dof in dofs:
dof_list[0] = dof
ierr_loc = set_values_local(A, 1, ffi_fb(dof_list), 1, ffi_fb(dof_list), ffi_fb(dof_value), mode) # type: ignore
assert ierr_loc == 0
sink(dof_list, dof_value)
@numba.njit
def assemble_slave_cells(
A: int,
kernel: cffi.FFI.CData,
active_cells: numba.int32[:],
mesh: Tuple[numba.int32[:, :], Union[numba.float64, numba.float32]],
coeffs: Union[numba.float32[:, :], numba.float64[:, :], numba.complex64[:, :], numba.complex128[:, :]],
constants: Union[numba.float32[:], numba.float64[:], numba.complex64[:], numba.complex128[:]],
permutation_info: numba.uint32[:],
dofmap: numba.int32[:, :],
block_size: int,
num_dofs_per_element: int,
mpc: Tuple[ # type: ignore
numba.int32[:],
Union[numba.float32[:], numba.float64[:], numba.complex64[:], numba.complex128[:]],
numba.int32[:],
numba.int32[:],
numba.int32[:],
numba.int32[:],
],
is_bc: numba.bool_[:],
):
"""
Assemble MPC contributions for cell integrals
"""
ffi_fb = ffi.from_buffer
# Get mesh and geometry data
x_dofmap, x = mesh
# Empty arrays mimicking Nullpointers
facet_index = numpy.zeros(0, dtype=numpy.intc)
facet_perm = numpy.zeros(0, dtype=numpy.uint8)
# NOTE: All cells are assumed to be of the same type
num_xdofs_per_cell = x_dofmap.shape[1]
geometry = numpy.zeros((num_xdofs_per_cell, 3), dtype=dolfinx.default_real_type)
A_local = numpy.zeros(
(block_size * num_dofs_per_element, block_size * num_dofs_per_element),
dtype=_PETSc.ScalarType, # type: ignore
)
masters, coefficients, offsets, c_to_s, c_to_s_off, is_slave = mpc
# Loop over all cells
local_dofs = numpy.zeros(block_size * num_dofs_per_element, dtype=numpy.int32)
for cell in active_cells:
# Compute vertices of cell from mesh data
geometry[:, :] = x[x_dofmap[cell]]
# Assemble local contributions
A_local.fill(0.0)
kernel(
ffi_fb(A_local), # type: ignore
ffi_fb(coeffs[cell, :]),
ffi_fb(constants),
ffi_fb(geometry), # type: ignore
ffi_fb(facet_index), # type: ignore
ffi_fb(facet_perm), # type: ignore
)
# NOTE: Here we need to apply dof transformations
local_blocks = dofmap[cell]
# Remove all contributions for dofs that are in the Dirichlet bcs
for j in range(num_dofs_per_element):
for k in range(block_size):
if is_bc[local_blocks[j] * block_size + k]:
A_local[j * block_size + k, :] = 0
A_local[:, j * block_size + k] = 0
A_local_copy: numpy.typing.NDArray[_PETSc.ScalarType] = A_local.copy() # type: ignore
# Find local position of slaves
slaves = c_to_s[c_to_s_off[cell] : c_to_s_off[cell + 1]]
mpc_cell = (slaves, masters, coefficients, offsets, is_slave)
modify_mpc_cell(A, num_dofs_per_element, block_size, A_local, local_blocks, mpc_cell)
# Remove already assembled contribution to matrix
A_contribution = A_local - A_local_copy
# Expand local blocks to dofs
for i in range(num_dofs_per_element):
for j in range(block_size):
local_dofs[i * block_size + j] = local_blocks[i] * block_size + j
# Insert local contribution
ierr_loc = set_values_local(
A,
block_size * num_dofs_per_element,
ffi_fb(local_dofs), # type: ignore
block_size * num_dofs_per_element,
ffi_fb(local_dofs), # type: ignore
ffi_fb(A_contribution), # type: ignore
mode,
)
assert ierr_loc == 0
sink(A_contribution, local_dofs)
@numba.njit
def modify_mpc_cell(
A: int,
num_dofs: int,
block_size: int,
Ae: Union[numba.float32[:, :], numba.float64[:, :], numba.complex128[:, :], numba.complex64[:, :]],
local_blocks: numba.int32[:],
mpc_cell: Tuple[ # type: ignore
numba.int32[:],
numba.int32[:],
Union[numba.float32[:], numba.float64[:], numba.complex64[:], numba.complex128[:]],
numba.int32[:],
numba.int8[:],
],
):
"""
Given an element matrix Ae, modify the contributions to respect the MPCs, and add contributions to appropriate
places in the global matrix A.
"""
slaves, masters, coefficients, offsets, is_slave = mpc_cell
# Locate which local dofs are slave dofs and compute the local index of the slave
# Additionally count the number of masters we will needed in the flattened structures
local_index0 = numpy.empty(len(slaves), dtype=numpy.int32)
num_flattened_masters = 0
for i in range(num_dofs):
for j in range(block_size):
slave = local_blocks[i] * block_size + j
if is_slave[slave]:
location = numpy.flatnonzero(slaves == slave)[0]
local_index0[location] = i * block_size + j
num_flattened_masters += offsets[slave + 1] - offsets[slave]
# Strip a copy of Ae of all columns and rows belonging to a slave
Ae_original = numpy.copy(Ae)
Ae_stripped = numpy.zeros((block_size * num_dofs, block_size * num_dofs), dtype=_PETSc.ScalarType) # type: ignore
for i in range(num_dofs):
for b in range(block_size):
is_slave0 = is_slave[local_blocks[i] * block_size + b]
for j in range(num_dofs):
for c in range(block_size):
is_slave1 = is_slave[local_blocks[j] * block_size + c]
Ae_stripped[i * block_size + b, j * block_size + c] = (not (is_slave0 and is_slave1)) * Ae_original[
i * block_size + b, j * block_size + c
]
flattened_masters = numpy.zeros(num_flattened_masters, dtype=numpy.int32)
flattened_slaves = numpy.zeros(num_flattened_masters, dtype=numpy.int32)
flattened_coeffs = numpy.zeros(num_flattened_masters, dtype=_PETSc.ScalarType) # type: ignore
c = 0
for i, slave in enumerate(slaves):
local_masters = masters[offsets[slave] : offsets[slave + 1]]
local_coeffs = coefficients[offsets[slave] : offsets[slave + 1]]
num_masters = len(local_masters)
for j in range(num_masters):
flattened_slaves[c + j] = local_index0[i]
flattened_masters[c + j] = local_masters[j]
flattened_coeffs[c + j] = local_coeffs[j]
c += num_masters
m0 = numpy.zeros(1, dtype=numpy.int32)
m1 = numpy.zeros(1, dtype=numpy.int32)
Am0m1 = numpy.zeros((1, 1), dtype=_PETSc.ScalarType) # type: ignore
Arow = numpy.zeros(block_size * num_dofs, dtype=_PETSc.ScalarType) # type: ignore
Acol = numpy.zeros(block_size * num_dofs, dtype=_PETSc.ScalarType) # type: ignore
mpc_dofs = numpy.zeros(block_size * num_dofs, dtype=numpy.int32)
ffi_fb = ffi.from_buffer
for i in range(num_flattened_masters):
local_index = flattened_slaves[i]
master = flattened_masters[i]
coeff = flattened_coeffs[i]
Ae[:, local_index] = 0
Ae[local_index, :] = 0
m0[0] = master
Arow[:] = coeff * Ae_stripped[:, local_index]
Acol[:] = coeff * Ae_stripped[local_index, :]
Am0m1[0, 0] = coeff**2 * Ae_original[local_index, local_index]
for j in range(num_dofs):
for k in range(block_size):
mpc_dofs[j * block_size + k] = local_blocks[j] * block_size + k
mpc_dofs[local_index] = master
ierr_row = set_values_local(
A,
block_size * num_dofs,
ffi_fb(mpc_dofs), # type: ignore
1,
ffi_fb(m0), # type: ignore
ffi_fb(Arow), # type: ignore
mode,
)
assert ierr_row == 0
# Add slave row to master row
ierr_col = set_values_local(
A,
1,
ffi_fb(m0), # type: ignore
block_size * num_dofs,
ffi_fb(mpc_dofs), # type: ignore
ffi_fb(Acol), # type: ignore
mode,
)
assert ierr_col == 0
ierr_master = set_values_local(A, 1, ffi_fb(m0), 1, ffi_fb(m0), ffi_fb(Am0m1), mode) # type: ignore
assert ierr_master == 0
# Add contributions for other masters relating to slaves on the given cell
for j in range(num_flattened_masters):
if i == j:
continue
other_local_index = flattened_slaves[j]
other_master = flattened_masters[j]
other_coeff = flattened_coeffs[j]
m1[0] = other_master
Am0m1[0, 0] = coeff * other_coeff * Ae_original[local_index, other_local_index]
ierr_other_masters = set_values_local(A, 1, ffi_fb(m0), 1, ffi_fb(m1), ffi_fb(Am0m1), mode) # type: ignore
assert ierr_other_masters == 0
sink(Arow, Acol, Am0m1, m0, m1, mpc_dofs)
@numba.njit
def assemble_exterior_slave_facets(
A: int,
kernel: cffi.FFI.CData,
mesh: Tuple[numba.int32[:, :], Union[numba.float64, numba.float32]],
coeffs: Union[numba.float32[:, :], numba.float64[:, :], numba.complex64[:, :], numba.complex128[:, :]],
consts: Union[numba.float32[:], numba.float64[:], numba.complex64[:], numba.complex128[:]],
perm: Tuple[numba.uint32[:], bool, numba.uint8[:]],
dofmap: numba.int32[:, :],
block_size: int,
num_dofs_per_element: int,
facet_info: numba.int32[:, 2],
mpc: Tuple[
numba.int32[:],
Union[numba.float32[:], numba.float64[:], numba.complex64[:], numba.complex128[:]],
numba.int32[:],
numba.int32[:],
numba.int32[:],
numba.int32[:],
],
is_bc: npt.NDArray[numpy.bool_],
num_facets_per_cell: int,
):
"""Assemble MPC contributions over exterior facet integrals"""
# Unpack mpc data
masters, coefficients, offsets, c_to_s, c_to_s_off, is_slave = mpc
# Mesh data
x_dofmap, x = mesh
# Empty arrays for facet information
facet_index = numpy.zeros(1, dtype=numpy.int32)
facet_perm = numpy.zeros(1, dtype=numpy.uint8)
# NOTE: All cells are assumed to be of the same type
geometry = numpy.zeros((x_dofmap.shape[1], 3), dtype=x.dtype)
# Numpy data used in facet loop
A_local = numpy.zeros(
(num_dofs_per_element * block_size, num_dofs_per_element * block_size),
dtype=_PETSc.ScalarType, # type: ignore
)
local_dofs = numpy.zeros(block_size * num_dofs_per_element, dtype=numpy.int32)
# Permutation info
cell_perms, needs_facet_perm, facet_perms = perm
# Loop over all external facets that are active
for i in range(facet_info.shape[0]):
# Get cell index (local to process) and facet index (local to cell)
cell_index, local_facet = facet_info[i]
# Get mesh geometry
facet_index[0] = local_facet
geometry[:, :] = x[x_dofmap[cell_index]]
# Assemble local matrix
A_local.fill(0.0)
if needs_facet_perm:
facet_perm[0] = facet_perms[cell_index * num_facets_per_cell + local_facet]
kernel(
ffi.from_buffer(A_local), # type: ignore
ffi.from_buffer(coeffs[cell_index, :]), # type: ignore
ffi.from_buffer(consts), # type: ignore
ffi.from_buffer(geometry), # type: ignore
ffi.from_buffer(facet_index), # type: ignore
ffi.from_buffer(facet_perm), # type: ignore
)
# NOTE: Here we need to add the apply_dof_transformation and apply_dof_transformation transpose functions
# Extract local blocks of dofs
local_blocks = dofmap[cell_index]
# Remove all contributions for dofs that are in the Dirichlet bcs
for j in range(num_dofs_per_element):
for k in range(block_size):
if is_bc[local_blocks[j] * block_size + k]:
A_local[j * block_size + k, :] = 0
A_local[:, j * block_size + k] = 0
A_local_copy: numpy.typing.NDArray[_PETSc.ScalarType] = A_local.copy() # type: ignore
slaves = c_to_s[c_to_s_off[cell_index] : c_to_s_off[cell_index + 1]]
mpc_cell = (slaves, masters, coefficients, offsets, is_slave)
modify_mpc_cell(A, num_dofs_per_element, block_size, A_local, local_blocks, mpc_cell)
# Remove already assembled contribution to matrix
A_contribution = A_local - A_local_copy
# Expand local blocks to dofs
for i in range(num_dofs_per_element):
for j in range(block_size):
local_dofs[i * block_size + j] = local_blocks[i] * block_size + j
# Insert local contribution
ierr_loc = set_values_local(
A,
block_size * num_dofs_per_element,
ffi.from_buffer(local_dofs), # type: ignore
block_size * num_dofs_per_element,
ffi.from_buffer(local_dofs), # type: ignore
ffi.from_buffer(A_contribution), # type: ignore
mode,
)
assert ierr_loc == 0
sink(A_contribution, local_dofs)