Убрал параллельность и векторизацию из 14го Задания, начал писать немного иную версию

This commit is contained in:
AZEN-SGG 2025-04-07 06:49:55 +03:00
parent b66f83f2ee
commit fb42c48f7d
15 changed files with 206 additions and 214 deletions

View file

@ -1,21 +1,26 @@
CFLAGS = -mfpmath=sse -fstack-protector-all -W -Wall -Wextra -Wunused \
-Wempty-body -Wlogical-op -Wold-style-declaration -Wmissing-parameter-type \
-Wignored-qualifiers -Winit-self -Wshadow -Wtype-limits \
-Wpointer-arith -Wformat-security -Wmissing-format-attribute -Wformat=1 \
-Wdeclaration-after-statement -Wbad-function-cast -Wnested-externs \
-Wmissing-prototypes -Wmissing-declarations -Wold-style-definition \
-Wcast-align -Werror -pedantic -pedantic-errors -Wfloat-equal \
-Wwrite-strings -Wno-long-long -std=gnu99 -Wstrict-prototypes \
-Wmissing-field-initializers -Wpointer-sign -fopenmp -O3
CFLAGS = -mfpmath=sse -fstack-protector-all -std=gnu99 -O3
WFLAGS = -W -Wall -Wextra -Wunused -Wempty-body -Wlogical-op \
-Wold-style-declaration -Wmissing-parameter-type \
-Wignored-qualifiers -Winit-self -Wshadow -Wtype-limits \
-Wpointer-arith -Wformat-security -Wmissing-format-attribute \
-Wformat=1 -Wdeclaration-after-statement -Wbad-function-cast \
-Wnested-externs -Wmissing-prototypes -Wmissing-declarations \
-Wold-style-definition -Wcast-align -Werror -pedantic \
-pedantic-errors -Wfloat-equal -Wwrite-strings -Wno-long-long \
-Wstrict-prototypes -Wmissing-field-initializers -Wpointer-sign
TARGET = a14.out
OBJ = main.o solve.o array_io.o init_f.o matrix.o
%.o: %.c
gcc $(CFLAGS) -c $< -o $@
gcc $(CFLAGS) $(WFLAGS) -c $< -o $@
$(TARGET): $(OBJ)
gcc $^ -o $@ -lm -fopenmp
gcc $^ -o $@ -lm
gdb: CFLAGS = -mfpmath=sse -fstack-protector-all -std=gnu99 -g
gdb: clean $(TARGET)
clean:
rm *.o *.out
rm -f *.o *.out

2
2025.04.04/14Ex/a.txt Normal file
View file

@ -0,0 +1,2 @@
1 2
3 4

View file

@ -1,22 +1,29 @@
#include <stdio.h>
#include <string.h>
#include <omp.h>
#include "array_io.h"
#include "io_status.h"
io_status read_matrix(double *a, int n, const char *name)
io_status read_matrix(double *a, int n, const char *name, bool transpose)
{
int i, j;
FILE *fp;
if (!(fp = fopen(name, "r"))) return ERROR_OPEN;
for (i = 0; i < n; i++)
for (j = 0; j < n; j++)
if (fscanf(fp, "%lf", a + i * n + j) != 1)
{fclose(fp); return ERROR_READ;}
{
if (transpose)
{
if (fscanf(fp, "%lf", a + j * n + i) != 1)
{fclose(fp); return ERROR_READ;}
} else
if (fscanf(fp, "%lf", a + i * n + j) != 1)
{fclose(fp); return ERROR_READ;}
}
fclose(fp);
return SUCCESS;
}
void print_matrix(const double *a, int n, int p)
void print_matrix(const double *a, int n, int p, bool transpose)
{
int np = (n > p ? p : n);
int i, j;
@ -24,12 +31,17 @@ void print_matrix(const double *a, int n, int p)
for (i = 0; i < np; i++)
{
for (j = 0; j < np; j++)
printf(" %10.3e", a[i * n + j]);
{
if (transpose)
printf(" %10.3e", a[j * n + i]);
else
printf(" %10.3e", a[i * n + j]);
}
printf("\n");
}
}
void init_matrix(double *a, int n, int k)
void init_matrix(double *a, int n, int k, bool transpose)
{
double (*q)(int, int, int, int);
double (*f[])(int, int, int, int) = {f1, f2, f3, f4};
@ -37,14 +49,43 @@ void init_matrix(double *a, int n, int k)
q = f[k-1];
for (i = 0; i < n; i++)
for (j = 0; j < n; j++)
a[i * n + j] = q(n, n, i+1, j+1);
{
if (transpose)
a[i * n + j] = q(n, n, j+1, i+1);
else
a[i * n + j] = q(n, n, j+1, i+1);
}
}
void init_identity_matrix(double *a, int n)
{
a = memset(a, 0, n * n * sizeof(double));
#pragma omp simd
for (int i = 0; i < n; ++i)
a[i*n + i] = 1.0;
}
int read_or_init_matrix(double *a, char *name, int k, int n, bool transpose)
{
if (name)
{ /* из файла */
io_status ret;
ret = read_matrix(a, n, name, transpose);
do {
switch (ret)
{
case SUCCESS:
continue;
case ERROR_OPEN:
printf("Cannot open %s\n", name);
break;
case ERROR_READ:
printf("Cannot read %s\n", name);
}
return 3;
} while (0);
} else init_matrix(a, n, k, transpose);
return 0;
}

View file

@ -1,12 +1,14 @@
#ifndef ARRAY_IO_H
#define ARRAY_IO_H
#include <stdbool.h>
#include "io_status.h"
#include "init_f.h"
io_status read_matrix(double *a, int n, const char *name);
void print_matrix(const double *a, int n, int p);
void init_matrix(double *a, int n, int k);
io_status read_matrix(double *a, int n, const char *name, bool transpose);
void print_matrix(const double *a, int n, int p, bool transpose);
void init_matrix(double *a, int n, int k, bool transpose);
void init_identity_matrix(double *a, int n);
int read_or_init_matrix(double *a, char *name, int k, int n, bool transpose);
#endif

View file

@ -1,7 +1,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <time.h>
#include <omp.h>
#include "array_io.h"
#include "io_status.h"
#include "matrix.h"
@ -48,41 +48,28 @@ int main(int argc, char *argv[])
return 2;
}
if (name)
{ /* из файла */
io_status ret;
ret = read_matrix(a, n, name);
do {
switch (ret)
{
case SUCCESS:
continue;
case ERROR_OPEN:
printf("Cannot open %s\n", name);
break;
case ERROR_READ:
printf("Cannot read %s\n", name);
}
free(a);
free(x);
free(c);
return 3;
} while (0);
} else init_matrix(a, n, k);
res = read_or_init_matrix(a, name, k, n, true);
if (res)
{
free(a);
free(x);
free(c);
return res;
}
init_identity_matrix(x, n);
#pragma omp simd
for (int i = 0; i < n; ++i)
c[i] = i;
printf("Initial matrix:\n");
print_matrix(a, n, p);
print_matrix(a, n, p, true);
t = omp_get_wtime();
t = clock();
res = t14_solve(n, a, x, c);
t = omp_get_wtime() - t;
t = (clock() - t) / CLOCKS_PER_SEC;
if (res == SINGULAR)
{
free(a);
@ -93,33 +80,21 @@ int main(int argc, char *argv[])
return 4;
}
if (name)
{ /* из файла */
io_status ret;
ret = read_matrix(a, n, name);
do {
switch (ret)
{
case SUCCESS:
continue;
case ERROR_OPEN:
printf("Cannot open %s\n", name);
break;
case ERROR_READ:
printf("Cannot read %s\n", name);
}
free(a);
free(x);
free(c);
return 3;
} while (0);
} else init_matrix(a, n, k);
res = read_or_init_matrix(a, name, k, n, false);
if (res)
{
free(a);
free(x);
free(c);
return res;
}
r1 = get_r1(n, a, x);
r2 = get_r2(n, a, x);
printf("Inverse matrix:\n");
print_matrix(x, n, p);
print_matrix(x, n, p, false);
printf("%s : Task = %d Res1 = %e Res2 = %e Elapsed = %.2f K = %d N = %d\n", argv[0], task, r1, r2, t, k, n);
free(a);

View file

@ -3,12 +3,10 @@
void init_vec_b(const double * restrict a, double * restrict b, int n)
{
#pragma omp parallel for
for (int i = 0; i < n; ++i)
{
double sum = 0;
#pragma omp simd reduction(+:sum)
for (int k = 0; k < n; k+=2)
sum += a[i * n + k];
@ -18,11 +16,10 @@ void init_vec_b(const double * restrict a, double * restrict b, int n)
void matvec_mul(int n, const double * restrict A, const double * restrict x, double * restrict x_k)
{
#pragma omp parallel for
for (int i = 0; i < n; i++)
{
double sum = 0;
#pragma omp simd reduction(+:sum)
for (int j = 0; j < n; j++)
sum += A[i * n + j] * x[j];
x_k[i] = sum;
@ -35,7 +32,6 @@ double get_r1(const int n, const double * restrict A, const double * restrict X)
if (n > 4000) return 0;
#pragma omp parallel for reduction(max:maximum)
for (int j = 0; j < n; ++j)
{
double sum = 0;
@ -45,7 +41,6 @@ double get_r1(const int n, const double * restrict A, const double * restrict X)
int in = i*n;
double sum_ij = - (i == j);
#pragma omp simd reduction(+:sum_ij)
for (int k = 0; k < n; ++k)
sum_ij += A[in + k] * X[k*n + j];
@ -65,7 +60,6 @@ double get_r2(const int n, const double * restrict A, const double * restrict X)
if (n > 4000) return 0;
#pragma omp parallel for reduction(max:maximum)
for (int j = 0; j < n; ++j)
{
double sum = 0;
@ -75,7 +69,6 @@ double get_r2(const int n, const double * restrict A, const double * restrict X)
int in = i*n;
double sum_ij = - (i == j);
#pragma omp simd reduction(+:sum_ij)
for (int k = 0; k < n; ++k)
sum_ij += X[in + k] * A[k*n + j];
@ -89,3 +82,18 @@ double get_r2(const int n, const double * restrict A, const double * restrict X)
return maximum;
}
double get_matrix_norm(const int n, const double * restrict A)
{
double norm = 0;
for (int i = 0; i < n; ++i)
{
double sum = 0;
for (int j = 0; j < n; ++j)
sum += fabs(A[i*n + j]);
if (sum > norm)
norm = sum;
}
return norm;
}

View file

@ -5,5 +5,6 @@ void init_vec_b(const double * restrict a, double * restrict b, int n);
void matvec_mul(int n, const double * restrict A, const double * restrict x, double * restrict x_k);
double get_r1(const int n, const double * restrict A, const double * restrict X);
double get_r2(const int n, const double * restrict A, const double * restrict X);
double get_matrix_norm(const int n, const double * restrict A);
#endif

View file

@ -3,47 +3,34 @@
#include <float.h>
#include <math.h>
#include "array_io.h"
#include "matrix.h"
#include <stdio.h>
// c - changes in rows
int t14_solve(int n, double * restrict A, double * restrict X, int * restrict c)
{
double norm = get_matrix_norm(n, A);
// Проходимся по главным минорам
for (int k = 0; k < n; ++k) {
double maximum = -1.;
int max_i = 0, max_j = 0;
// Ищем максимальный элемент минора
#pragma omp parallel
{
double local_max = -1.;
int loc_i = 0, loc_j = 0;
#pragma omp for collapse(2) nowait
for (int i = k; i < n; ++i)
for (int j = k; j < n; ++j)
{
double aij = fabs(A[i * n + j]);
if (aij > local_max) {
local_max = aij;
loc_i = i;
loc_j = j;
}
for (int i = k; i < n; ++i)
for (int j = k; j < n; ++j)
{
double aij = fabs(A[i * n + j]);
if (aij > maximum) {
maximum = aij;
max_i = i;
max_j = j;
}
#pragma omp critical
{
if (local_max > maximum) {
maximum = local_max;
max_i = loc_i;
max_j = loc_j;
}
}
}
}
// Если максимальный по модулю элемент равен нулю, значит матрица вырождена
if (fabs(maximum) < DBL_EPSILON)
if (fabs(maximum) < DBL_EPSILON * norm)
return SINGULAR;
// Меняем строки местами, если максимум находится не в k строке
@ -52,7 +39,6 @@ int t14_solve(int n, double * restrict A, double * restrict X, int * restrict c)
int kn = k*n;
int in = max_i*n;
#pragma omp simd
for (int i = 0; i < k; ++i)
{
int kni = kn+i, ini = in+i;
@ -61,7 +47,6 @@ int t14_solve(int n, double * restrict A, double * restrict X, int * restrict c)
X[ini] = swap;
}
#pragma omp parallel for simd
for (int i = k; i < n; ++i)
{
int kni = kn+i, ini = in+i;
@ -82,7 +67,6 @@ int t14_solve(int n, double * restrict A, double * restrict X, int * restrict c)
c[max_j] = c[k];
c[k] = swap_temp;
#pragma omp simd
for (int in = 0; in < n*n; in+=n)
{
double swap = A[in + k];
@ -105,7 +89,6 @@ int t14_solve(int n, double * restrict A, double * restrict X, int * restrict c)
{
int pnt_nxt = 0;
#pragma omp parallel for
for (int j = 0; j < n; ++j)
{
int loc_cur = pnt_cur;
@ -156,7 +139,6 @@ void gauss_inverse(const int n, const int k, double * restrict A, double * restr
if (fabs(xij) > DBL_EPSILON) X[ij] = xij*inv_akk;
}
#pragma omp parallel for
for (int i = k+1; i < n; ++i)
{
const int in = i*n;
@ -164,11 +146,9 @@ void gauss_inverse(const int n, const int k, double * restrict A, double * restr
A[in + k] = 0;
X[in + k] -= X[kk] * aik;
#pragma omp simd
for (int j = 0; j < k; ++j)
X[in + j] -= X[kn + j] * aik;
#pragma omp simd
for (int j = k+1; j < n; ++j)
{
A[in + j] -= A[kn + j] * aik;
@ -185,14 +165,12 @@ void gauss_back_substitution(const int n, double * restrict A, double * restrict
{
const int kn = k * n;
#pragma omp parallel for
for (int i = 0; i < k; ++i)
{
const int in = i*n;
const double aik = A[in + k];
A[in + k] = 0;
#pragma omp simd
for (int j = 0; j < n; ++j)
X[in + j] -= X[kn + j] * aik;
}

View file

@ -1,4 +1,4 @@
CFLAGS = -mfpmath=sse -fstack-protector-all -W -Wall -Wextra -Wunused \
WFLAGS = -fstack-protector-all -W -Wall -Wextra -Wunused \
-Wempty-body -Wlogical-op -Wold-style-declaration -Wmissing-parameter-type \
-Wignored-qualifiers -Winit-self -Wshadow -Wtype-limits \
-Wpointer-arith -Wformat-security -Wmissing-format-attribute -Wformat=1 \
@ -6,16 +6,18 @@ CFLAGS = -mfpmath=sse -fstack-protector-all -W -Wall -Wextra -Wunused \
-Wmissing-prototypes -Wmissing-declarations -Wold-style-definition \
-Wcast-align -Werror -pedantic -pedantic-errors -Wfloat-equal \
-Wwrite-strings -Wno-long-long -std=gnu99 -Wstrict-prototypes \
-Wmissing-field-initializers -Wpointer-sign -fopenmp -O3
-Wmissing-field-initializers -Wpointer-sign
CFLAGS = -mfpmath=sse -std=gnu99 -O3
TARGET = a.out
OBJ = main.o solve.o array_io.o init_f.o matrix.o
%.o: %.c
gcc $(CFLAGS) -c $< -o $@
gcc $(CFLAGS) $(WFLAGS) -c $< -o $@
$(TARGET): $(OBJ)
gcc $^ -o $@ -lm -fopenmp
gcc $^ -o $@ -lm
clean:
rm *.o *.out
rm -f *.o *.out

View file

@ -1,6 +1,5 @@
#include <stdio.h>
#include <string.h>
#include <omp.h>
#include "array_io.h"
io_status read_matrix(double *a, int n, const char *name)
@ -44,7 +43,30 @@ void init_identity_matrix(double *a, int n)
{
a = memset(a, 0, n * n * sizeof(double));
#pragma omp simd
for (int i = 0; i < n; ++i)
a[i*n + i] = 1.0;
}
int read_or_init_matrix(double *a, char *name, int k, int n)
{
if (name)
{ /* из файла */
io_status ret;
ret = read_matrix(a, n, name);
do {
switch (ret)
{
case SUCCESS:
continue;
case ERROR_OPEN:
printf("Cannot open %s\n", name);
break;
case ERROR_READ:
printf("Cannot read %s\n", name);
}
return 3;
} while (0);
} else init_matrix(a, n, k);
return 0;
}

View file

@ -8,5 +8,6 @@ io_status read_matrix(double *a, int n, const char *name);
void print_matrix(const double *a, int n, int p);
void init_matrix(double *a, int n, int k);
void init_identity_matrix(double *a, int n);
int read_or_init_matrix(double *a, char *name, int k, int n);
#endif

View file

@ -1,7 +1,6 @@
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <omp.h>
#include "array_io.h"
#include "io_status.h"
#include "matrix.h"
@ -48,40 +47,27 @@ int main(int argc, char *argv[])
return 2;
}
if (name)
{ /* из файла */
io_status ret;
ret = read_matrix(a, n, name);
do {
switch (ret)
{
case SUCCESS:
continue;
case ERROR_OPEN:
printf("Cannot open %s\n", name);
break;
case ERROR_READ:
printf("Cannot read %s\n", name);
}
free(a);
free(x);
free(c);
return 3;
} while (0);
} else init_matrix(a, n, k);
res = read_or_init_matrix(a, name, k, n);
if (res)
{
free(a);
free(x);
free(c);
return res;
}
init_identity_matrix(x, n);
#pragma omp simd
for (int i = 0; i < n; ++i)
c[i] = i;
printf("Initial matrix:\n");
print_matrix(a, n, p);
t = omp_get_wtime();
t = clock();
res = t14_solve(n, a, x, c);
t = omp_get_wtime() - t;
t = clock() - t;
if (res == SINGULAR)
{
@ -93,27 +79,15 @@ int main(int argc, char *argv[])
return 4;
}
if (name)
{ /* из файла */
io_status ret;
ret = read_matrix(a, n, name);
do {
switch (ret)
{
case SUCCESS:
continue;
case ERROR_OPEN:
printf("Cannot open %s\n", name);
break;
case ERROR_READ:
printf("Cannot read %s\n", name);
}
free(a);
free(x);
free(c);
return 3;
} while (0);
} else init_matrix(a, n, k);
res = read_or_init_matrix(a, name, k, n);
if (res)
{
free(a);
free(x);
free(c);
return res;
}
r1 = get_r1(n, a, x);
r2 = get_r2(n, a, x);

View file

@ -3,12 +3,10 @@
void init_vec_b(const double * restrict a, double * restrict b, int n)
{
#pragma omp parallel for
for (int i = 0; i < n; ++i)
{
double sum = 0;
#pragma omp simd reduction(+:sum)
for (int k = 0; k < n; k+=2)
sum += a[i * n + k];
@ -18,11 +16,9 @@ void init_vec_b(const double * restrict a, double * restrict b, int n)
void matvec_mul(int n, const double * restrict A, const double * restrict x, double * restrict x_k)
{
#pragma omp parallel for
for (int i = 0; i < n; i++)
{
double sum = 0;
#pragma omp simd reduction(+:sum)
for (int j = 0; j < n; j++)
sum += A[i * n + j] * x[j];
x_k[i] = sum;
@ -35,7 +31,6 @@ double get_r1(const int n, const double * restrict A, const double * restrict X)
if (n > 4000) return 0;
#pragma omp parallel for reduction(max:maximum)
for (int j = 0; j < n; ++j)
{
double sum = 0;
@ -45,7 +40,6 @@ double get_r1(const int n, const double * restrict A, const double * restrict X)
int in = i*n;
double sum_ij = - (i == j);
#pragma omp simd reduction(+:sum_ij)
for (int k = 0; k < n; ++k)
sum_ij += A[in + k] * X[k*n + j];
@ -65,7 +59,6 @@ double get_r2(const int n, const double * restrict A, const double * restrict X)
if (n > 4000) return 0;
#pragma omp parallel for reduction(max:maximum)
for (int j = 0; j < n; ++j)
{
double sum = 0;
@ -75,7 +68,6 @@ double get_r2(const int n, const double * restrict A, const double * restrict X)
int in = i*n;
double sum_ij = - (i == j);
#pragma omp simd reduction(+:sum_ij)
for (int k = 0; k < n; ++k)
sum_ij += X[in + k] * A[k*n + j];
@ -89,3 +81,18 @@ double get_r2(const int n, const double * restrict A, const double * restrict X)
return maximum;
}
double get_matrix_norm(const int n, const double * restrict A)
{
double norm = 0;
for (int i = 0; i < n; ++i)
{
double sum = 0;
for (int j = 0; j < n; ++j)
sum += fabs(A[i*n + j]);
if (sum > norm)
norm = sum;
}
return norm;
}

View file

@ -5,5 +5,6 @@ void init_vec_b(const double * restrict a, double * restrict b, int n);
void matvec_mul(int n, const double * restrict A, const double * restrict x, double * restrict x_k);
double get_r1(const int n, const double * restrict A, const double * restrict X);
double get_r2(const int n, const double * restrict A, const double * restrict X);
double get_matrix_norm(const int n, const double * restrict A);
#endif

View file

@ -1,9 +1,8 @@
#include "solve.h"
#include "io_status.h"
#include "array_io.h"
#include <float.h>
#include <math.h>
#include "array_io.h"
#include <stdio.h>
// c - changes in rows
int t14_solve(int n, double * restrict A, double * restrict X, int * restrict c)
@ -14,33 +13,16 @@ int t14_solve(int n, double * restrict A, double * restrict X, int * restrict c)
int max_i = 0, max_j = 0;
// Ищем максимальный элемент минора
#pragma omp parallel
{
double local_max = -1.;
int loc_i = 0, loc_j = 0;
#pragma omp for collapse(2) nowait
for (int i = k; i < n; ++i)
for (int j = k; j < n; ++j)
{
double aij = fabs(A[i * n + j]);
if (aij > local_max) {
local_max = aij;
loc_i = i;
loc_j = j;
}
}
#pragma omp critical
{
if (local_max > maximum) {
maximum = local_max;
max_i = loc_i;
max_j = loc_j;
for (int i = k; i < n; ++i)
for (int j = k; j < n; ++j)
{
double aij = fabs(A[i * n + j]);
if (aij > maximum) {
maximum = aij;
max_i = i;
max_j = j;
}
}
}
// Если максимальный по модулю элемент равен нулю, значит матрица вырождена
if (fabs(maximum) < DBL_EPSILON)
@ -52,7 +34,6 @@ int t14_solve(int n, double * restrict A, double * restrict X, int * restrict c)
int kn = k*n;
int in = max_i*n;
#pragma omp simd
for (int i = 0; i < k; ++i)
{
int kni = kn+i, ini = in+i;
@ -61,7 +42,6 @@ int t14_solve(int n, double * restrict A, double * restrict X, int * restrict c)
X[ini] = swap;
}
#pragma omp parallel for simd
for (int i = k; i < n; ++i)
{
int kni = kn+i, ini = in+i;
@ -82,7 +62,6 @@ int t14_solve(int n, double * restrict A, double * restrict X, int * restrict c)
c[max_j] = c[k];
c[k] = swap_temp;
#pragma omp simd
for (int in = 0; in < n*n; in+=n)
{
double swap = A[in + k];
@ -105,7 +84,6 @@ int t14_solve(int n, double * restrict A, double * restrict X, int * restrict c)
{
int pnt_nxt = 0;
#pragma omp parallel for
for (int j = 0; j < n; ++j)
{
int loc_cur = pnt_cur;
@ -156,7 +134,6 @@ void gauss_inverse(const int n, const int k, double * restrict A, double * restr
if (fabs(xij) > DBL_EPSILON) X[ij] = xij*inv_akk;
}
#pragma omp parallel for
for (int i = k+1; i < n; ++i)
{
const int in = i*n;
@ -164,11 +141,9 @@ void gauss_inverse(const int n, const int k, double * restrict A, double * restr
A[in + k] = 0;
X[in + k] -= X[kk] * aik;
#pragma omp simd
for (int j = 0; j < k; ++j)
X[in + j] -= X[kn + j] * aik;
#pragma omp simd
for (int j = k+1; j < n; ++j)
{
A[in + j] -= A[kn + j] * aik;
@ -185,14 +160,12 @@ void gauss_back_substitution(const int n, double * restrict A, double * restrict
{
const int kn = k * n;
#pragma omp parallel for
for (int i = 0; i < k; ++i)
{
const int in = i*n;
const double aik = A[in + k];
A[in + k] = 0;
#pragma omp simd
for (int j = 0; j < n; ++j)
X[in + j] -= X[kn + j] * aik;
}