Commit 81326948 authored by Jonathan Lambrechts's avatar Jonathan Lambrechts
Browse files

perepare merge

parent 7b2d80f6
......@@ -3,7 +3,7 @@ ALL: libmbfluid.so libscontact2.so libscontact3.so
CFLAGS=-Wno-unused-function -O3 -march=native -mtune=native -fPIC -g -std=gnu99
LDFLAGS=-shared -lm
FLUID_C=src/fluid_problem.c src/linear_system_banded_avx.c src/mesh.c src/mesh_find.c src/quadtree.c
FLUID_C=src/fluid_problem.c src/linear_system.c src/mesh.c src/mesh_find.c src/quadtree.c
FLUID_H=src/tools.h src/fluid_problem.h src/linear_system.h src/mesh_find.h src/mesh.h src/quadtree.h src/vector.h
SCONTACT_C=src/quadtree.c src/scontact.c
......
......@@ -17,7 +17,6 @@
static double QP[][2] = {{1./6,1./6},{1./6,2./3},{2./3,1./6}};
static double QW[] = {1./6,1./6,1./6};
int fluid_problem_export(const FluidProblem *problem, const char *output_dir, double t, int iter)
{
char file_name[1024];
......@@ -245,6 +244,20 @@ static void fluid_problem_assemble_system(FluidProblem *problem, const double *s
}
free(local_vector);
free(local_matrix);
for (int ibnd = 0; ibnd < problem->n_strong_boundaries; ++ibnd) {
const StrongBoundary *bnd = problem->strong_boundaries + ibnd;
int iphys;
for (iphys = 0; iphys < mesh->n_phys; ++iphys) {
if (strcmp(bnd->tag, mesh->phys_name[iphys]) == 0)
break;
}
if (iphys == mesh->n_phys)
printf("Boundary tag \"%s\" not found.", bnd->tag);
for (int i = 0; i < mesh->phys_n_nodes[iphys]; ++i){
linear_system_fix_row(lsys, mesh->phys_nodes[iphys][i], bnd->field, 0.);
}
}
}
static void apply_boundary_conditions(const Mesh *mesh, double *solution, int nBnd, StrongBoundary *bnds)
......@@ -398,7 +411,7 @@ FluidProblem *fluid_problem_new(const char *mesh_file_name, double mu, double rh
problem->node_volume = malloc(0);
fluid_problem_compute_node_volume(problem);
// end to remove
problem->linear_system = linear_system_new(mesh, n_fields, problem->n_strong_boundaries, problem->strong_boundaries);
problem->linear_system = linear_system_new(mesh->n_triangles, 3, n_fields, mesh->triangles);
problem->n_particles = 0;
problem->particle_uvw = malloc(0);
problem->particle_element_id = malloc(0);
......@@ -560,7 +573,7 @@ void fluid_problem_adapt_mesh(FluidProblem *problem, double gradmin, double grad
fluid_problem_compute_porosity(problem);
computeEpsilon(problem, autoEpsilon);
linear_system_free(problem->linear_system);
problem->linear_system = linear_system_new(problem->mesh, n_fields, problem->n_strong_boundaries, problem->strong_boundaries);
problem->linear_system = linear_system_new(new_mesh->n_triangles, 3, n_fields, new_mesh->triangles);
}
void fluid_problem_set_particles(FluidProblem *problem, int n, double *mass, double *volume, double *position, double *velocity)
......
......@@ -3,6 +3,13 @@
#include "mesh.h"
#include "linear_system.h"
#include <stdbool.h>
typedef struct {
char *tag;
int field;
double v;
}StrongBoundary;
typedef struct {
double *epsilon;
......
#include <math.h>
#include "mesh.h"
#include <stdlib.h>
#include <stdio.h>
#include "linear_system.h"
#include <stdlib.h>
#include <string.h>
......@@ -16,24 +18,41 @@ void connectivity_insert(int *connectivity, int i, int j)
}
printf("ERROR : node %i has more than %i neighbours\n", i, CONMAX);
}
struct LinearSystemStruct{
double *M;
int *row_start;
int *row_end;
int *row_lu_end;
double **rows;
double *b;
double *x;
int *node_map;
int *elements;
int n_nodes_by_element;
int n_elements;
int n_nodes;
int n_fields;
int n;
};
void reverse_cuthill_mckee(Mesh *mesh, int *ordering)
void reverse_cuthill_mckee(LinearSystem *system, int *ordering)
{
int *node_connectivity = malloc(sizeof(int)*mesh->n_nodes*CONMAX);
for (int i = 0; i < mesh->n_nodes*CONMAX; ++i) {
int *node_connectivity = malloc(sizeof(int)*system->n_nodes*CONMAX);
for (int i = 0; i < system->n_nodes*CONMAX; ++i) {
node_connectivity[i] = -1;
}
for (int i = 0; i < mesh->n_triangles; ++i) {
int *tri = mesh->triangles + i*3;
connectivity_insert(node_connectivity, tri[0], tri[1]);
connectivity_insert(node_connectivity, tri[1], tri[0]);
connectivity_insert(node_connectivity, tri[0], tri[2]);
connectivity_insert(node_connectivity, tri[2], tri[0]);
connectivity_insert(node_connectivity, tri[1], tri[2]);
connectivity_insert(node_connectivity, tri[2], tri[1]);
for (int i = 0; i < system->n_elements; ++i) {
int *el = system->elements + i*system->n_nodes_by_element;
for (int k = 0; k < system->n_nodes_by_element; ++k){
for (int l = 0; l < system->n_nodes_by_element; ++l){
if (k == l) continue;
connectivity_insert(node_connectivity, el[k], el[l]);
connectivity_insert(node_connectivity, el[l], el[k]);
}
}
}
int *node_degree = malloc(sizeof(int)*mesh->n_nodes);
for (int i = 0; i < mesh->n_nodes; ++i) {
int *node_degree = malloc(sizeof(int)*system->n_nodes);
for (int i = 0; i < system->n_nodes; ++i) {
node_degree[i] = 0;
for (int j = 0; j < CONMAX; ++j) {
if (node_connectivity[CONMAX*i+j] == -1)
......@@ -41,9 +60,9 @@ void reverse_cuthill_mckee(Mesh *mesh, int *ordering)
node_degree[i] += 1;
}
}
int *queue = malloc(sizeof(int)*mesh->n_nodes);
int *queue = malloc(sizeof(int)*system->n_nodes);
queue[0] = 0;
for (int i = 0; i < mesh->n_nodes; ++i){
for (int i = 0; i < system->n_nodes; ++i){
ordering[i] = -1;
if (node_degree[queue[0]] > node_degree[i] && node_degree[i] !=0)
queue[0] = i;
......@@ -77,7 +96,7 @@ void reverse_cuthill_mckee(Mesh *mesh, int *ordering)
stage_start = stage_end;
stage_end = queue_end;
}
for (int i = 0; i < mesh->n_nodes; ++i)
for (int i = 0; i < system->n_nodes; ++i)
if(ordering[i] >= 0)
ordering[i] = id-1-ordering[i];
printf("id = %i\n", id);
......@@ -94,60 +113,35 @@ void reverse_cuthill_mckee(Mesh *mesh, int *ordering)
#define PADDING 1
#endif
typedef double aligned_double __attribute__((aligned (PADDING*8)));
#include <immintrin.h>
// y[from:to] += a*x[from:to]
// y and x must be 256-bit aligned
// memory should be allocated in consequence
static void avx_row_axpy(double a, double *__restrict__ x, double *__restrict__ y, int from, int to)
static void row_axpy(double a, aligned_double *__restrict__ x, aligned_double *__restrict__ y, int from, int to)
{
#if defined(__AVX2__)
int pto = (to+3)/4*4;
int pfrom = (from+3)/4*4;
for (int i = from; i < pfrom; ++i)
int i = from;
int pfrom = (from+3)&(~3);
for (; i < pfrom; ++i)
y[i] += a*x[i];
__m256d ma = _mm256_set1_pd(a);
for (int i = pfrom; i < pto; i+=4) {
__m256d mx = _mm256_load_pd(x+i);
__m256d my = _mm256_load_pd(y+i);
_mm256_store_pd(y+i,_mm256_fmadd_pd(ma,mx,my));
for (; i+15 < to; i+=16) {
aligned_double * __restrict__ X = x + i;
aligned_double * __restrict__ Y = y + i;
for (int j = 0; j < 16; ++j){
Y[j] += a * X[j];
}
}
#elif defined(__AVX__)
int pto = (to+1)/2*2;
int pfrom = (from+1)/2*2;
for (int i = from; i < pfrom; ++i)
y[i] += a*x[i];
__m128d ma = _mm_set1_pd(a);
for (int i = pfrom; i < pto; i+=2) {
__m128d mx = _mm_load_pd(x+i);
__m128d my = _mm_load_pd(y+i);
#if defined(__FMA__)
_mm_store_pd(y+i, _mm_fmadd_pd(ma,mx,my));
#else
__m128d mz = _mm_mul_pd(ma, mx);
_mm_store_pd(y+i, _mm_add_pd(my,mz));
#endif
for (; i+3 < to; i+=4) {
aligned_double * __restrict__ X = x + i;
aligned_double * __restrict__ Y = y + i;
for (int j = 0; j < 4; ++j)
Y[j] += a * X[j];
}
#else
for (int i = from; i < to; ++i)
for (; i < to; i++)
y[i] += a*x[i];
#endif
}
struct LinearSystemStruct{
double *M;
int *row_start;
int *row_end;
int *row_lu_end;
double **rows;
double *b;
double *x;
int *node_map;
int n_fields;
int n;
Mesh *mesh;
int *isfixed;
};
static int imin(int a, int b) {
return a < b ? a : b;
}
......@@ -156,6 +150,42 @@ static int imax(int a, int b) {
return a > b ? a : b;
}
static double row_reduction(aligned_double *__restrict__ x, aligned_double *__restrict__ y, int from, int to)
{
int i = from;
double r = 0;
int pfrom = (from+3)&(~3);
for (; i < imin(pfrom, to); ++i)
r += x[i] * y[i];
double R[4];
for (; i+3 < to; i+=4) {
aligned_double * __restrict__ X = x + i;
aligned_double * __restrict__ Y = y + i;
for (int j = 0; j < 4; ++j)
R[j] = X[j]*Y[j];
r += R[0]+R[1]+R[2]+R[3];
}
for (; i < to; ++i)
r += x[i] * y[i];
return r;
}
static void row_zero(aligned_double *__restrict__ x, int from, int to)
{
int i = from;
int pfrom = (from+3)&(~3);
for (; i < imin(pfrom, to); ++i)
x[i] = 0;
for (; i+3 < to; i+=4) {
aligned_double * __restrict__ X = x + i;
for (int j = 0; j < 4; ++j)
X[j] = 0;
}
for (; i < to; ++i)
x[i] = 0;
}
static int LUPDecompose(LinearSystem *system){
int i,j;
int N = system->n;
......@@ -165,7 +195,7 @@ static int LUPDecompose(LinearSystem *system){
if(i < system->row_start[j] || A[j][i] == 0.)
continue;
A[j][i]/=A[i][i];
avx_row_axpy(-A[j][i],A[i],A[j],i+1,system->row_end[i]);
row_axpy(-A[j][i],A[i],A[j],i+1,system->row_end[i]);
}
}
return(1);
......@@ -177,35 +207,31 @@ static void LUPSolve(LinearSystem *system){
int N = system->n;
double **A = system->rows;
for(int i=0;i<N;i++){
//x[i]=b[P[i]];
x[i]=b[i];
for(int k=system->row_start[i];k<i;k++)
x[i]-=A[i][k]*x[k];
x[i] = b[i] - row_reduction(A[i], x, system->row_start[i], i);
}
for(int i=N-1;i>=0;i--){
for(int k=i+1;k<system->row_end[i] && k < N;k++){
x[i]-=A[i][k]*x[k];
}
x[i]=x[i]/A[i][i];
}
}
static void linear_system_zero_row(LinearSystem *system, int i)
{
for (int j = system->row_start[i]; j < system->row_end[i]; ++j) {
system->rows[i][j] = 0.;
x[i] -= row_reduction(A[i], x, i+1, imin(system->row_end[i], N));
x[i] = x[i]/A[i][i];
}
}
LinearSystem *linear_system_new(Mesh *mesh, int n_fields, int n_boundaries, const StrongBoundary *boundaries)
LinearSystem *linear_system_new(int n_elements, int n_nodes_by_element, int n_fields, int *elements)
{
LinearSystem *system = malloc(sizeof(LinearSystem));
system->n_fields = n_fields;
system->mesh = mesh;
system->node_map = malloc(sizeof(int)*mesh->n_nodes);
reverse_cuthill_mckee(mesh, system->node_map);
system->n_nodes_by_element = n_nodes_by_element;
system->n_elements = n_elements;
system->elements = elements;
int n_nodes = 0;
for (int i = 0; i < n_elements*n_nodes_by_element; ++i)
if(elements[i]+1 > n_nodes)
n_nodes = elements[i]+1;
system->n_nodes = n_nodes;
system->n = n_nodes *n_fields;
system->node_map = malloc(sizeof(int)*n_nodes);
reverse_cuthill_mckee(system, system->node_map);
int n_used_nodes = 0;
for (int i = 0; i < mesh->n_nodes; ++i)
for (int i = 0; i < n_nodes; ++i)
if (system->node_map[i] >= 0)
n_used_nodes ++;
......@@ -216,12 +242,13 @@ LinearSystem *linear_system_new(Mesh *mesh, int n_fields, int n_boundaries, cons
node_row_end[i] = 0;
node_row_start[i] = n_used_nodes;
}
for (int i = 0; i < mesh->n_triangles; ++i) {
int *tri = mesh->triangles + i*3;
for (int j = 0; j < 3; ++j) {
int n0 = system->node_map[tri[j]];
for (int k = 0; k < 3; ++k) {
int n1 = system->node_map[tri[k]];
for (int i = 0; i < n_elements; ++i) {
int *el = elements + i*n_nodes_by_element;
for (int j = 0; j < n_nodes_by_element; ++j) {
int n0 = system->node_map[el[j]];
for (int k = 0; k < n_nodes_by_element; ++k) {
int n1 = system->node_map[el[k]];
if (n1 < 0 || n0 < 0) continue;
if (node_row_start[n0] > n1)
node_row_start[n0] = n1;
if (node_row_end[n0] < n1)
......@@ -236,8 +263,8 @@ LinearSystem *linear_system_new(Mesh *mesh, int n_fields, int n_boundaries, cons
for (int i = 0; i < n_used_nodes; ++i) {
for (int j = 0; j < n_fields; ++j) {
int k = i*n_fields +j;
system->row_start[k] = (node_row_start[i]*n_fields)/PADDING*PADDING;
system->row_end[k] = (node_row_end[i]*n_fields+n_fields+(PADDING-1))/PADDING*PADDING;
system->row_start[k] = node_row_start[i]*n_fields;
system->row_end[k] = node_row_end[i]*n_fields+n_fields;
system->row_lu_end[k] = k;
}
}
......@@ -246,54 +273,43 @@ LinearSystem *linear_system_new(Mesh *mesh, int n_fields, int n_boundaries, cons
if (system->row_lu_end[j] < i+1) system->row_lu_end[j] = i+1;
if (system->row_end[i] < system->row_end[j]) system->row_end[i] = system->row_end[j];
}
total_size += system->row_end[i]-system->row_start[i];
}
for (int i = 0; i < system->n; ++i) {
int start = total_size - system->row_start[i];
int padded_start = (start+PADDING-1)&~(PADDING-1);
total_size += system->row_end[i]-system->row_start[i]+(padded_start-start);
}
free(node_row_start);
free(node_row_end);
system->M = _mm_malloc(sizeof(double)*total_size, 32);
system->M = _mm_malloc(sizeof(double)*total_size, PADDING*8);
system->rows = malloc(sizeof(double*)*system->n);
for (int i = 0; i < total_size; ++i)
system->M[i] = 0;
system->rows[0] = system->M;
for (int i = 1; i < system->n; ++i){
system->rows[i] = system->rows[i-1]+system->row_end[i-1]-system->row_start[i];
total_size = 0;
for (int i = 0; i < system->n; ++i){
int start = total_size - system->row_start[i];
int padded_start = (start+PADDING-1)&~(PADDING-1);
total_size += system->row_end[i]-system->row_start[i]+(padded_start-start);
system->rows[i] = system->M + padded_start;
}
system->b = malloc(sizeof(double)*system->n);
system->x = malloc(sizeof(double)*system->n);
system->isfixed = malloc(sizeof(int)*system->n);
for (int i = 0; i < system->n; ++i) {
system->isfixed[i] = 0;
}
for (int ibnd = 0; ibnd < n_boundaries; ++ibnd) {
const StrongBoundary *bnd = boundaries + ibnd;
int iphys;
for (iphys = 0; iphys < mesh->n_phys; ++iphys) {
if (strcmp(bnd->tag, mesh->phys_name[iphys]) == 0)
break;
}
if (iphys == mesh->n_phys){
printf("Boundary tag \"%s\" not found.\n", bnd->tag);
continue;
}
for (int i = 0; i < mesh->phys_n_nodes[iphys]; ++i){
if (system->node_map[mesh->phys_nodes[iphys][i]] >= 0)
system->isfixed[system->node_map[mesh->phys_nodes[iphys][i]]*system->n_fields+bnd->field] = 1;
}
}
system->x = _mm_malloc(sizeof(double)*system->n, PADDING*8);
return system;
}
void linear_system_add_to_matrix(LinearSystem *system, int el0, int el1, const double *local_matrix){
int *tri0 = &system->mesh->triangles[el0*3];
int *tri1 = &system->mesh->triangles[el1*3];
int nn = system->n_nodes_by_element;
int *e0 = system->elements + el0*nn;
int *e1 = system->elements + el1*nn;
int nf = system->n_fields;
for (int i = 0; i < 3; ++i) {
for (int inf = 0; inf < 3; ++inf) {
int ii = system->node_map[tri0[i]]*nf + inf;
for (int j = 0; j < 3; ++j) {
for (int i = 0; i < nn; ++i) {
for (int inf = 0; inf < nf; ++inf) {
int ii = system->node_map[e0[i]]*nf + inf;
for (int j = 0; j < nn; ++j) {
for (int jnf = 0; jnf < nf; ++jnf) {
int jj = system->node_map[tri1[j]]*nf + jnf;
system->rows[ii][jj] += local_matrix[(inf*3+i)*nf*3+jnf*3+j];
int jj = system->node_map[e1[j]]*nf + jnf;
system->rows[ii][jj] += local_matrix[(inf*nn+i)*nf*nn+jnf*nn+j];
}
}
}
......@@ -302,12 +318,13 @@ void linear_system_add_to_matrix(LinearSystem *system, int el0, int el1, const d
void linear_system_add_to_rhs(LinearSystem *system, int el0, const double *local_vector)
{
const int *tri = system->mesh->triangles + el0*3;
int n_fields = system->n_fields;
int nn = system->n_nodes_by_element;
int *e0 = system->elements + el0*nn;
for (int i = 0; i < n_fields; ++i) {
for (int j = 0; j < 3; ++j) {
int m = system->node_map[tri[j]]*n_fields+i;
system->b[m] += local_vector[i*3+j];
for (int j = 0; j < nn; ++j) {
int m = system->node_map[e0[j]]*n_fields+i;
system->b[m] += local_vector[i*nn+j];
}
}
}
......@@ -316,27 +333,26 @@ void linear_system_zero_matrix_and_rhs(LinearSystem *system)
{
for (int i = 0; i < system->n; ++i){
system->b[i] = 0.;
linear_system_zero_row(system, i);
row_zero(system->rows[i], system->row_start[i], system->row_end[i]);
}
}
void linear_system_fix_row(LinearSystem *system, int node, int field, double v)
{
int row = system->node_map[node]*system->n_fields + field;
if (row < 0) return;
row_zero(system->rows[row], system->row_start[row], system->row_end[row]);
system->rows[row][row] = 1.;
system->b[row] = v;
}
void linear_system_solve(LinearSystem *system, double *solution){
double **rows = system->rows;
for (int i = 0; i < system->n; ++i){
if (system->isfixed[i]) {
linear_system_zero_row(system, i);
rows[i][i] = 1.;
system->b[i] = 0;
}
}
LUPDecompose(system);
LUPSolve(system);
for (int i = 0; i < system->mesh->n_nodes; ++i){
for (int i = 0; i < system->n_nodes; ++i){
int ii = system->node_map[i];
if (ii < 0)
continue;
for (int j = 0; j < system->n_fields; ++j){
solution[i*system->n_fields+j] = system->x[ii*system->n_fields+j];
solution[i*system->n_fields+j] = ii < 0 ? 0. : system->x[ii*system->n_fields+j];
}
}
}
......@@ -350,7 +366,6 @@ void linear_system_free(LinearSystem *system)
free(system->row_start);
free(system->row_end);
free(system->row_lu_end);
free(system->isfixed);
free(system->node_map);
free(system);
}
......@@ -359,7 +374,6 @@ double linear_system_get_rhs_norm(LinearSystem *system)
{
double norm = 0;
for (int i = 0; i < system->n;++i)
if (!system->isfixed[i])
norm += system->b[i]*system->b[i];
return sqrt(norm);
}
......
#ifndef LINEAR_SYSTEM_H
#define LINEAR_SYSTEM_H
#include "mesh.h"
typedef struct LinearSystemStruct LinearSystem;
typedef struct {
char *tag;
int field;
double v;
}StrongBoundary;
LinearSystem *linear_system_new(Mesh *mesh, int n_fields, int n_boundaries, const StrongBoundary *boundaries);
LinearSystem *linear_system_new(int n_element, int n_node_by_elements, int n_fields, int *element);
void linear_system_add_to_matrix(LinearSystem *lsys, int el0, int el1, const double *local_matrix);
void linear_system_add_to_rhs(LinearSystem *lsys, int el0, const double *local_vector);
void linear_system_zero_matrix_and_rhs(LinearSystem *lsys);
void linear_system_solve(LinearSystem *lsys, double *solution);
void linear_system_fix_row(LinearSystem *lsys, int node, int field, double v);
void linear_system_free(LinearSystem *lsys);
double linear_system_get_rhs_norm(LinearSystem *lsys);
......
#include <math.h>
#include "mesh.h"
#include "linear_system.h"
#include <stdlib.h>
#include <string.h>
#define CONMAX 12
void connectivity_insert(int *connectivity, int i, int j)
{
for (int k = 0; k < CONMAX; ++k) {
int *p = connectivity + CONMAX*i + k;
if (*p == -1)
*p = j;
if (*p == j)
return;
}
printf("ERROR : node %i has more than %i neighbours\n", i, CONMAX);
}
int reverse_cuthill_mckee(Mesh *mesh, int *ordering)
{
int *node_connectivity = malloc(sizeof(int)*mesh->n_nodes*CONMAX);
for (int i = 0; i < mesh->n_nodes*CONMAX; ++i) {
node_connectivity[i] = -1;
}
for (int i = 0; i < mesh->n_triangles; ++i) {
int *tri = mesh->triangles + i*3;
connectivity_insert(node_connectivity, tri[0], tri[1]);
connectivity_insert(node_connectivity, tri[1], tri[0]);
connectivity_insert(node_connectivity, tri[0], tri[2]);
connectivity_insert(node_connectivity, tri[2], tri[0]);
connectivity_insert(node_connectivity, tri[1], tri[2]);
connectivity_insert(node_connectivity, tri[2], tri[1]);