Tried to speed up programms using SIMD

This commit is contained in:
AZEN-SGG 2025-03-19 18:57:03 +03:00
parent 509787808a
commit 725e44fda5
3 changed files with 71 additions and 55 deletions

View file

@ -5,16 +5,20 @@
int t8_solve(double *a, int m, int n) int t8_solve(double *a, int m, int n)
{ {
const int BS = 30;
int max_i = 0, max_j = 0; int max_i = 0, max_j = 0;
double maximum = 0; double maximum = 0;
for (int j = 0; j < n; j++) for (int jj = 0; jj < n; jj += BS)
for (int j = jj; j < jj + BS && j < n; j++)
{ {
double sum_j = 0; double sum_j = 0;
for (int k = 0; k < m; k++) for (int ii = 0; ii < m; ii += BS)
for (int k = ii; k < ii + BS && k < m; k++)
sum_j += fabs(a[k*n + j]); sum_j += fabs(a[k*n + j]);
for (int i = 0; i < m; i++) for (int ii = 0; ii < m; ii += BS)
for (int i = ii; i < ii + BS && i < m; i++)
{ {
double sum_i = 0; double sum_i = 0;
double aij = fabs(a[i*n + j]); double aij = fabs(a[i*n + j]);
@ -23,7 +27,8 @@ int t8_solve(double *a, int m, int n)
if (j == 0) if (j == 0)
{ {
double num = a[i*n]; double num = a[i*n];
for (int k = 1; k < n; k++) for (int kk = 1; kk < n; kk += BS)
for (int k = kk; k < kk + BS && k < n; k++)
sum_i += fabs(a[i*n + k]); sum_i += fabs(a[i*n + k]);
a[i*n] = copysign(fabs(num) + sum_i, num); a[i*n] = copysign(fabs(num) + sum_i, num);
@ -39,12 +44,14 @@ int t8_solve(double *a, int m, int n)
} }
if (max_j != 0) if (max_j != 0)
for (int i = 0; i < m; i++) for (int ii = 0; ii < m; ii += BS)
for (int i = ii; i < ii + BS && i < m; i++)
{ {
double orig = a[i*n]; double orig = a[i*n];
double num = fabs(orig); double num = fabs(orig);
for (int j = 1; j < n; j++) for (int jj = 1; jj < n; jj += BS)
for (int j = jj; j < jj + BS && j < n; j++)
num -= fabs(a[i*n + j]); num -= fabs(a[i*n + j]);
a[i*n] = copysign(num, orig); a[i*n] = copysign(num, orig);

View file

@ -5,20 +5,25 @@
int t9_solve(double *a, int m, int n) int t9_solve(double *a, int m, int n)
{ {
const int BS = 32;
int max_i = 0, max_j = 0; int max_i = 0, max_j = 0;
double maximum = 0; double maximum = 0;
for (int j = 0; j < n; j++) for (int jj = 0; jj < n; jj += BS)
for (int i = 0; i < m; i++) for (int ii = 0; ii < m; ii += BS)
for (int j = jj; j < jj + BS && j < n; j++)
for (int i = ii; i < ii + BS && i < m; i++)
{ {
double sum_i = 0; double sum_i = 0;
double sum_j = 0; double sum_j = 0;
double aij = a[i*n + j]; double aij = a[i*n + j];
for (int k = 0; k < n; k++) if (k != j) for (int kk = 0; kk < n; kk += BS)
for (int k = kk; k < kk + BS && k < n; k++) if (k != j)
sum_i += fabs(a[i*n + k] - aij); sum_i += fabs(a[i*n + k] - aij);
for (int k = 0; k < m; k++) if (i != k) for (int kk = 0; kk < m; kk += BS)
for (int k = kk; k < kk + BS && k < m; k++) if (i != k)
sum_j += fabs(a[k*n + j] - aij); sum_j += fabs(a[k*n + j] - aij);
if (((sum_j + sum_i) - maximum) > eps) if (((sum_j + sum_i) - maximum) > eps)

View file

@ -5,16 +5,19 @@
int t10_solve(double *a, int m, int n) int t10_solve(double *a, int m, int n)
{ {
const int BS = 30;
int min_i = 0, min_j = 0; int min_i = 0, min_j = 0;
double minimum = DBL_MAX; double minimum = DBL_MAX;
double inn = 1./n, inm = 1./m; double inn = 1./n, inm = 1./m;
for (int i = 0; i < m; i++) for (int ii = 0; ii < m; ii += BS)
for (int i = ii; i < ii + BS && i < m; i++)
{ {
double cur = 0; double cur = 0;
double sum = 0; double sum = 0;
double sq_sum = 0; double sq_sum = 0;
for (int j = 0; j < n; j++) for (int jj = 0; jj < n; jj += BS)
for (int j = jj; j < jj + BS && j < n; j++)
{ {
double temp = a[i*n + j]; double temp = a[i*n + j];
sum += temp; sum += temp;
@ -29,9 +32,10 @@ int t10_solve(double *a, int m, int n)
} }
minimum = DBL_MAX; minimum = DBL_MAX;
for (int j = 0; j < n; j++)
for (int jj = 0; jj < n; jj += BS)
for (int j = jj; j < jj + BS && j < n; j++)
{ {
const int BS = 32;
double cur = 0; double cur = 0;
double sum = 0; double sum = 0;
double sq_sum = 0; double sq_sum = 0;