Source code for dolfinx_mpc.numba.assemble_matrix

# Copyright (C) 2020-2021 Jørgen S. Dokken
#
# This file is part of DOLFINX_MPC
#
# SPDX-License-Identifier:    MIT
from __future__ import annotations

from typing import List, Optional, Tuple, Union

from petsc4py import PETSc as _PETSc

import cffi
import dolfinx
import dolfinx.cpp as _cpp
import dolfinx.fem as _fem
import numpy
import numpy.typing as npt
from dolfinx.common import Timer

import numba
from dolfinx_mpc.assemble_matrix import create_sparsity_pattern
from dolfinx_mpc.multipointconstraint import MultiPointConstraint

from .helpers import _bcs, _forms, extract_slave_cells, pack_slave_facet_info
from .numba_setup import initialize_petsc, sink

mode = _PETSc.InsertMode.ADD_VALUES  # type: ignore
insert = _PETSc.InsertMode.INSERT_VALUES  # type: ignore
ffi, set_values_local = initialize_petsc()


[docs] def assemble_matrix( form: _forms, constraint: MultiPointConstraint, bcs: Optional[List[_bcs]] = None, diagval: _PETSc.ScalarType = 1.0, # type: ignore A: Optional[_PETSc.Mat] = None, # type: ignore ): """ Assembles a compiled DOLFINx form with given a multi point constraint and possible Dirichlet boundary conditions. NOTE: Strong Dirichlet conditions cannot be on master dofs. Args: form: The compiled bilinear form constraint: The multi point constraint bcs: List of Dirichlet boundary conditions diagval: Value to set on the diagonal of the matrix A: PETSc matrix to assemble into (optional) """ timer_matrix = Timer("~MPC: Assemble matrix (numba)") V = constraint.function_space dofmap = V.dofmap dofs = dofmap.list # Pack MPC data for numba kernels coefficients = constraint.coefficients()[0] masters_adj = constraint.masters c_to_s_adj = constraint.cell_to_slaves cell_to_slave = c_to_s_adj.array c_to_s_off = c_to_s_adj.offsets is_slave = constraint.is_slave mpc_data = ( masters_adj.array, coefficients, masters_adj.offsets, cell_to_slave, c_to_s_off, is_slave, ) slave_cells = extract_slave_cells(c_to_s_off) # Create 1D bc indicator for matrix assembly num_dofs_local = (dofmap.index_map.size_local + dofmap.index_map.num_ghosts) * dofmap.index_map_bs is_bc = numpy.zeros(num_dofs_local, dtype=bool) bcs = [] if bcs is None else [bc._cpp_object for bc in bcs] if len(bcs) > 0: for bc in bcs: is_bc[bc.dof_indices()[0]] = True # Get data from mesh x_dofs = V.mesh.geometry.dofmap x = V.mesh.geometry.x # Pack constants and coefficients form_coeffs = _cpp.fem.pack_coefficients(form._cpp_object) form_consts = _cpp.fem.pack_constants(form._cpp_object) # Create sparsity pattern and matrix if not supplied if A is None: pattern = create_sparsity_pattern(form, constraint) pattern.finalize() A = _cpp.la.petsc.create_matrix(V.mesh.comm, pattern) A.zeroEntries() # Assemble the matrix with all entries _cpp.fem.petsc.assemble_matrix(A, form._cpp_object, form_consts, form_coeffs, bcs, False) # General assembly data block_size = dofmap.dof_layout.block_size num_dofs_per_element = dofmap.dof_layout.num_dofs tdim = V.mesh.topology.dim # Assemble over cells subdomain_ids = form._cpp_object.integral_ids(_fem.IntegralType.cell) num_cell_integrals = len(subdomain_ids) e0 = form.function_spaces[0].element e1 = form.function_spaces[1].element # Get dof transformations needs_transformation_data = ( e0.needs_dof_transformations or e1.needs_dof_transformations or form._cpp_object.needs_facet_permutations ) cell_perms = numpy.array([], dtype=numpy.uint32) if needs_transformation_data: V.mesh.topology.create_entity_permutations() cell_perms = V.mesh.topology.get_cell_permutation_info() # NOTE: Here we need to add the apply_dof_transformation and apply_dof_transformation transpose functions # to support more exotic elements if e0.needs_dof_transformations or e1.needs_dof_transformations: raise NotImplementedError("Dof transformations not implemented") if _PETSc.ScalarType == numpy.float32: # type: ignore nptype = "float32" elif _PETSc.ScalarType == numpy.float64: # type: ignore nptype = "float64" elif _PETSc.ScalarType == numpy.complex64: # type: ignore nptype = "complex64" elif _PETSc.ScalarType == numpy.complex128: # type: ignore nptype = "complex128" else: raise RuntimeError(f"Unsupported scalar type {_PETSc.ScalarType}.") # type: ignore ufcx_form = form.ufcx_form if num_cell_integrals > 0: # NOTE: This depends on enum ordering in ufcx.h cell_form_pos = ufcx_form.form_integral_offsets[0] V.mesh.topology.create_entity_permutations() for i, id in enumerate(subdomain_ids): coeffs_i = form_coeffs[(_fem.IntegralType.cell, id)] cell_kernel = getattr(ufcx_form.form_integrals[cell_form_pos + i], f"tabulate_tensor_{nptype}") active_cells = form._cpp_object.domains(_fem.IntegralType.cell, id) assemble_slave_cells( A.handle, cell_kernel, active_cells[numpy.isin(active_cells, slave_cells)], (x_dofs, x), coeffs_i, form_consts, cell_perms, dofs, block_size, num_dofs_per_element, mpc_data, is_bc, ) # Assemble over exterior facets subdomain_ids = form._cpp_object.integral_ids(_fem.IntegralType.exterior_facet) num_exterior_integrals = len(subdomain_ids) if num_exterior_integrals > 0: V.mesh.topology.create_entities(tdim - 1) V.mesh.topology.create_connectivity(tdim - 1, tdim) # Get facet permutations if required facet_perms = numpy.array([], dtype=numpy.uint8) if form._cpp_object.needs_facet_permutations: facet_perms = V.mesh.topology.get_facet_permutations() perm = (cell_perms, form._cpp_object.needs_facet_permutations, facet_perms) # NOTE: This depends on enum ordering in ufcx.h ext_facet_pos = ufcx_form.form_integral_offsets[1] for i, id in enumerate(subdomain_ids): facet_kernel = getattr(ufcx_form.form_integrals[ext_facet_pos + i], f"tabulate_tensor_{nptype}") facets = form._cpp_object.domains(_fem.IntegralType.exterior_facet, id) coeffs_i = form_coeffs[(_fem.IntegralType.exterior_facet, id)] facet_info = pack_slave_facet_info(facets, slave_cells) num_facets_per_cell = len(V.mesh.topology.connectivity(tdim, tdim - 1).links(0)) assemble_exterior_slave_facets( A.handle, facet_kernel, (x_dofs, x), coeffs_i, form_consts, perm, dofs, block_size, num_dofs_per_element, facet_info, mpc_data, is_bc, num_facets_per_cell, ) # Add mpc entries on diagonal slaves = constraint.slaves num_local_slaves = constraint.num_local_slaves add_diagonal(A.handle, slaves[:num_local_slaves], diagval) # Add one on diagonal for diriclet bc and slave dofs # NOTE: In the future one could use a constant in the dirichletbc if form.function_spaces[0] is form.function_spaces[1]: A.assemblyBegin(_PETSc.Mat.AssemblyType.FLUSH) # type: ignore A.assemblyEnd(_PETSc.Mat.AssemblyType.FLUSH) # type: ignore _cpp.fem.petsc.insert_diagonal(A, form.function_spaces[0], bcs, diagval) A.assemble() timer_matrix.stop() return A
@numba.njit def add_diagonal(A: int, dofs: numba.int32[:], diagval: _PETSc.ScalarType = 1): # type: ignore """ Insert value on diagonal of matrix for given dofs. """ ffi_fb = ffi.from_buffer dof_list = numpy.zeros(1, dtype=numpy.int32) dof_value = numpy.full(1, diagval, dtype=_PETSc.ScalarType) # type: ignore for dof in dofs: dof_list[0] = dof ierr_loc = set_values_local(A, 1, ffi_fb(dof_list), 1, ffi_fb(dof_list), ffi_fb(dof_value), mode) # type: ignore assert ierr_loc == 0 sink(dof_list, dof_value) @numba.njit def assemble_slave_cells( A: int, kernel: cffi.FFI.CData, active_cells: numba.int32[:], mesh: Tuple[numba.int32[:, :], Union[numba.float64, numba.float32]], coeffs: Union[numba.float32[:, :], numba.float64[:, :], numba.complex64[:, :], numba.complex128[:, :]], constants: Union[numba.float32[:], numba.float64[:], numba.complex64[:], numba.complex128[:]], permutation_info: numba.uint32[:], dofmap: numba.int32[:, :], block_size: int, num_dofs_per_element: int, mpc: Tuple[ # type: ignore numba.int32[:], Union[numba.float32[:], numba.float64[:], numba.complex64[:], numba.complex128[:]], numba.int32[:], numba.int32[:], numba.int32[:], numba.int32[:], ], is_bc: numba.bool_[:], ): """ Assemble MPC contributions for cell integrals """ ffi_fb = ffi.from_buffer # Get mesh and geometry data x_dofmap, x = mesh # Empty arrays mimicking Nullpointers facet_index = numpy.zeros(0, dtype=numpy.intc) facet_perm = numpy.zeros(0, dtype=numpy.uint8) # NOTE: All cells are assumed to be of the same type num_xdofs_per_cell = x_dofmap.shape[1] geometry = numpy.zeros((num_xdofs_per_cell, 3), dtype=dolfinx.default_real_type) A_local = numpy.zeros( (block_size * num_dofs_per_element, block_size * num_dofs_per_element), dtype=_PETSc.ScalarType, # type: ignore ) masters, coefficients, offsets, c_to_s, c_to_s_off, is_slave = mpc # Loop over all cells local_dofs = numpy.zeros(block_size * num_dofs_per_element, dtype=numpy.int32) for cell in active_cells: # Compute vertices of cell from mesh data geometry[:, :] = x[x_dofmap[cell]] # Assemble local contributions A_local.fill(0.0) kernel( ffi_fb(A_local), # type: ignore ffi_fb(coeffs[cell, :]), ffi_fb(constants), ffi_fb(geometry), # type: ignore ffi_fb(facet_index), # type: ignore ffi_fb(facet_perm), # type: ignore ) # NOTE: Here we need to apply dof transformations local_blocks = dofmap[cell] # Remove all contributions for dofs that are in the Dirichlet bcs for j in range(num_dofs_per_element): for k in range(block_size): if is_bc[local_blocks[j] * block_size + k]: A_local[j * block_size + k, :] = 0 A_local[:, j * block_size + k] = 0 A_local_copy: numpy.typing.NDArray[_PETSc.ScalarType] = A_local.copy() # type: ignore # Find local position of slaves slaves = c_to_s[c_to_s_off[cell] : c_to_s_off[cell + 1]] mpc_cell = (slaves, masters, coefficients, offsets, is_slave) modify_mpc_cell(A, num_dofs_per_element, block_size, A_local, local_blocks, mpc_cell) # Remove already assembled contribution to matrix A_contribution = A_local - A_local_copy # Expand local blocks to dofs for i in range(num_dofs_per_element): for j in range(block_size): local_dofs[i * block_size + j] = local_blocks[i] * block_size + j # Insert local contribution ierr_loc = set_values_local( A, block_size * num_dofs_per_element, ffi_fb(local_dofs), # type: ignore block_size * num_dofs_per_element, ffi_fb(local_dofs), # type: ignore ffi_fb(A_contribution), # type: ignore mode, ) assert ierr_loc == 0 sink(A_contribution, local_dofs) @numba.njit def modify_mpc_cell( A: int, num_dofs: int, block_size: int, Ae: Union[numba.float32[:, :], numba.float64[:, :], numba.complex128[:, :], numba.complex64[:, :]], local_blocks: numba.int32[:], mpc_cell: Tuple[ # type: ignore numba.int32[:], numba.int32[:], Union[numba.float32[:], numba.float64[:], numba.complex64[:], numba.complex128[:]], numba.int32[:], numba.int8[:], ], ): """ Given an element matrix Ae, modify the contributions to respect the MPCs, and add contributions to appropriate places in the global matrix A. """ slaves, masters, coefficients, offsets, is_slave = mpc_cell # Locate which local dofs are slave dofs and compute the local index of the slave # Additionally count the number of masters we will needed in the flattened structures local_index0 = numpy.empty(len(slaves), dtype=numpy.int32) num_flattened_masters = 0 for i in range(num_dofs): for j in range(block_size): slave = local_blocks[i] * block_size + j if is_slave[slave]: location = numpy.flatnonzero(slaves == slave)[0] local_index0[location] = i * block_size + j num_flattened_masters += offsets[slave + 1] - offsets[slave] # Strip a copy of Ae of all columns and rows belonging to a slave Ae_original = numpy.copy(Ae) Ae_stripped = numpy.zeros((block_size * num_dofs, block_size * num_dofs), dtype=_PETSc.ScalarType) # type: ignore for i in range(num_dofs): for b in range(block_size): is_slave0 = is_slave[local_blocks[i] * block_size + b] for j in range(num_dofs): for c in range(block_size): is_slave1 = is_slave[local_blocks[j] * block_size + c] Ae_stripped[i * block_size + b, j * block_size + c] = (not (is_slave0 and is_slave1)) * Ae_original[ i * block_size + b, j * block_size + c ] flattened_masters = numpy.zeros(num_flattened_masters, dtype=numpy.int32) flattened_slaves = numpy.zeros(num_flattened_masters, dtype=numpy.int32) flattened_coeffs = numpy.zeros(num_flattened_masters, dtype=_PETSc.ScalarType) # type: ignore c = 0 for i, slave in enumerate(slaves): local_masters = masters[offsets[slave] : offsets[slave + 1]] local_coeffs = coefficients[offsets[slave] : offsets[slave + 1]] num_masters = len(local_masters) for j in range(num_masters): flattened_slaves[c + j] = local_index0[i] flattened_masters[c + j] = local_masters[j] flattened_coeffs[c + j] = local_coeffs[j] c += num_masters m0 = numpy.zeros(1, dtype=numpy.int32) m1 = numpy.zeros(1, dtype=numpy.int32) Am0m1 = numpy.zeros((1, 1), dtype=_PETSc.ScalarType) # type: ignore Arow = numpy.zeros(block_size * num_dofs, dtype=_PETSc.ScalarType) # type: ignore Acol = numpy.zeros(block_size * num_dofs, dtype=_PETSc.ScalarType) # type: ignore mpc_dofs = numpy.zeros(block_size * num_dofs, dtype=numpy.int32) ffi_fb = ffi.from_buffer for i in range(num_flattened_masters): local_index = flattened_slaves[i] master = flattened_masters[i] coeff = flattened_coeffs[i] Ae[:, local_index] = 0 Ae[local_index, :] = 0 m0[0] = master Arow[:] = coeff * Ae_stripped[:, local_index] Acol[:] = coeff * Ae_stripped[local_index, :] Am0m1[0, 0] = coeff**2 * Ae_original[local_index, local_index] for j in range(num_dofs): for k in range(block_size): mpc_dofs[j * block_size + k] = local_blocks[j] * block_size + k mpc_dofs[local_index] = master ierr_row = set_values_local( A, block_size * num_dofs, ffi_fb(mpc_dofs), # type: ignore 1, ffi_fb(m0), # type: ignore ffi_fb(Arow), # type: ignore mode, ) assert ierr_row == 0 # Add slave row to master row ierr_col = set_values_local( A, 1, ffi_fb(m0), # type: ignore block_size * num_dofs, ffi_fb(mpc_dofs), # type: ignore ffi_fb(Acol), # type: ignore mode, ) assert ierr_col == 0 ierr_master = set_values_local(A, 1, ffi_fb(m0), 1, ffi_fb(m0), ffi_fb(Am0m1), mode) # type: ignore assert ierr_master == 0 # Add contributions for other masters relating to slaves on the given cell for j in range(num_flattened_masters): if i == j: continue other_local_index = flattened_slaves[j] other_master = flattened_masters[j] other_coeff = flattened_coeffs[j] m1[0] = other_master Am0m1[0, 0] = coeff * other_coeff * Ae_original[local_index, other_local_index] ierr_other_masters = set_values_local(A, 1, ffi_fb(m0), 1, ffi_fb(m1), ffi_fb(Am0m1), mode) # type: ignore assert ierr_other_masters == 0 sink(Arow, Acol, Am0m1, m0, m1, mpc_dofs) @numba.njit def assemble_exterior_slave_facets( A: int, kernel: cffi.FFI.CData, mesh: Tuple[numba.int32[:, :], Union[numba.float64, numba.float32]], coeffs: Union[numba.float32[:, :], numba.float64[:, :], numba.complex64[:, :], numba.complex128[:, :]], consts: Union[numba.float32[:], numba.float64[:], numba.complex64[:], numba.complex128[:]], perm: Tuple[numba.uint32[:], bool, numba.uint8[:]], dofmap: numba.int32[:, :], block_size: int, num_dofs_per_element: int, facet_info: numba.int32[:, 2], mpc: Tuple[ numba.int32[:], Union[numba.float32[:], numba.float64[:], numba.complex64[:], numba.complex128[:]], numba.int32[:], numba.int32[:], numba.int32[:], numba.int32[:], ], is_bc: npt.NDArray[numpy.bool_], num_facets_per_cell: int, ): """Assemble MPC contributions over exterior facet integrals""" # Unpack mpc data masters, coefficients, offsets, c_to_s, c_to_s_off, is_slave = mpc # Mesh data x_dofmap, x = mesh # Empty arrays for facet information facet_index = numpy.zeros(1, dtype=numpy.int32) facet_perm = numpy.zeros(1, dtype=numpy.uint8) # NOTE: All cells are assumed to be of the same type geometry = numpy.zeros((x_dofmap.shape[1], 3), dtype=x.dtype) # Numpy data used in facet loop A_local = numpy.zeros( (num_dofs_per_element * block_size, num_dofs_per_element * block_size), dtype=_PETSc.ScalarType, # type: ignore ) local_dofs = numpy.zeros(block_size * num_dofs_per_element, dtype=numpy.int32) # Permutation info cell_perms, needs_facet_perm, facet_perms = perm # Loop over all external facets that are active for i in range(facet_info.shape[0]): # Get cell index (local to process) and facet index (local to cell) cell_index, local_facet = facet_info[i] # Get mesh geometry facet_index[0] = local_facet geometry[:, :] = x[x_dofmap[cell_index]] # Assemble local matrix A_local.fill(0.0) if needs_facet_perm: facet_perm[0] = facet_perms[cell_index * num_facets_per_cell + local_facet] kernel( ffi.from_buffer(A_local), # type: ignore ffi.from_buffer(coeffs[cell_index, :]), # type: ignore ffi.from_buffer(consts), # type: ignore ffi.from_buffer(geometry), # type: ignore ffi.from_buffer(facet_index), # type: ignore ffi.from_buffer(facet_perm), # type: ignore ) # NOTE: Here we need to add the apply_dof_transformation and apply_dof_transformation transpose functions # Extract local blocks of dofs local_blocks = dofmap[cell_index] # Remove all contributions for dofs that are in the Dirichlet bcs for j in range(num_dofs_per_element): for k in range(block_size): if is_bc[local_blocks[j] * block_size + k]: A_local[j * block_size + k, :] = 0 A_local[:, j * block_size + k] = 0 A_local_copy: numpy.typing.NDArray[_PETSc.ScalarType] = A_local.copy() # type: ignore slaves = c_to_s[c_to_s_off[cell_index] : c_to_s_off[cell_index + 1]] mpc_cell = (slaves, masters, coefficients, offsets, is_slave) modify_mpc_cell(A, num_dofs_per_element, block_size, A_local, local_blocks, mpc_cell) # Remove already assembled contribution to matrix A_contribution = A_local - A_local_copy # Expand local blocks to dofs for i in range(num_dofs_per_element): for j in range(block_size): local_dofs[i * block_size + j] = local_blocks[i] * block_size + j # Insert local contribution ierr_loc = set_values_local( A, block_size * num_dofs_per_element, ffi.from_buffer(local_dofs), # type: ignore block_size * num_dofs_per_element, ffi.from_buffer(local_dofs), # type: ignore ffi.from_buffer(A_contribution), # type: ignore mode, ) assert ierr_loc == 0 sink(A_contribution, local_dofs)