WIP, but almost done
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -2,3 +2,4 @@
|
||||
.*
|
||||
!/.gitignore
|
||||
main
|
||||
*.pdf
|
||||
|
||||
294
main.c
294
main.c
@@ -1,8 +1,6 @@
|
||||
#include <errno.h>
|
||||
#include <math.h>
|
||||
#include <png.h>
|
||||
#include <pthread.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
@@ -13,7 +11,17 @@ float noise(float x, float y);
|
||||
float linear_interpolation(float a, float b, float t);
|
||||
float quintic_curve(float t);
|
||||
float dot_product(float a[], float b[]);
|
||||
float *pseudo_random_gradient_vector_get(float x, float y);
|
||||
void pseudo_random_gradient_vector_get(float vec[2], float x, float y);
|
||||
void *calculate(void *args);
|
||||
void single_threaded(unsigned char *buffer, int height, int width);
|
||||
void multi_threaded(unsigned char *buffer, int height, int width,
|
||||
int thread_no);
|
||||
void panic(const char *message);
|
||||
void benchmark_calculations(unsigned char *buffer, int height, int width,
|
||||
int thread_no);
|
||||
void write_image_with_timing(png_image *img, const unsigned char *buffer);
|
||||
void print_timediff(const char *message, struct timespec start,
|
||||
struct timespec end);
|
||||
|
||||
static unsigned char random_bytes[1024];
|
||||
|
||||
@@ -34,7 +42,117 @@ struct multi_threaded_arg {
|
||||
int width;
|
||||
};
|
||||
|
||||
void *calculate(void *args) { // TODO: reorder functions by call order
|
||||
int main(int argc, char *argv[]) {
|
||||
int n = getrandom(random_bytes, 1024, 0);
|
||||
if (n != 1024) {
|
||||
fprintf(stderr, "err: getrandom didn't work out as planned\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (argc < 3) {
|
||||
fprintf(stderr, "usage: width height [threads] \n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
int width = atoi(argv[1]), height = atoi(argv[2]);
|
||||
int threads = 1;
|
||||
if (argc == 4) {
|
||||
threads = atoi(argv[3]);
|
||||
|
||||
if (height < threads) {
|
||||
fprintf(stderr,
|
||||
"err: height < threads. expected: height >= threads. aborting\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
printf("rows per thread: %u\n", height / threads);
|
||||
}
|
||||
|
||||
png_image img;
|
||||
memset(&img, 0, sizeof(img));
|
||||
img.format = PNG_FORMAT_GRAY;
|
||||
img.width = width;
|
||||
img.height = height;
|
||||
img.version = PNG_IMAGE_VERSION;
|
||||
|
||||
unsigned char *buffer;
|
||||
buffer = malloc(PNG_IMAGE_SIZE(img));
|
||||
|
||||
benchmark_calculations(buffer, height, width, threads);
|
||||
|
||||
write_image_with_timing(&img, buffer);
|
||||
|
||||
free(buffer);
|
||||
}
|
||||
|
||||
void benchmark_calculations(unsigned char *buffer, int height, int width,
|
||||
int threads) {
|
||||
struct timespec start, end;
|
||||
|
||||
for (int i = 0; i < 3; i++) {
|
||||
printf("performing warmup run #%u, stay tuned!\n", i + 1);
|
||||
|
||||
clock_gettime(CLOCK_MONOTONIC, &start);
|
||||
if (threads < 2) {
|
||||
single_threaded(buffer, height, width);
|
||||
} else {
|
||||
multi_threaded(buffer, height, width, threads);
|
||||
}
|
||||
clock_gettime(CLOCK_MONOTONIC, &end);
|
||||
print_timediff("warmup calculation time: ", start, end);
|
||||
}
|
||||
|
||||
puts("performing 10 runs!");
|
||||
for (int i = 0; i < 10; i++) {
|
||||
clock_gettime(CLOCK_MONOTONIC, &start);
|
||||
if (threads < 2) {
|
||||
single_threaded(buffer, height, width);
|
||||
} else {
|
||||
multi_threaded(buffer, height, width, threads);
|
||||
}
|
||||
clock_gettime(CLOCK_MONOTONIC, &end);
|
||||
print_timediff("calculation time: ", start, end);
|
||||
}
|
||||
}
|
||||
|
||||
void single_threaded(unsigned char *buffer, int height, int width) {
|
||||
struct single_threaded_arg arg = {
|
||||
.type = THRD_SINGLE, .buffer = buffer, .height = height, .width = width};
|
||||
calculate(&arg);
|
||||
}
|
||||
|
||||
void multi_threaded(unsigned char *buffer, int height, int width,
|
||||
int thread_no) {
|
||||
pthread_t threads[thread_no];
|
||||
const unsigned int rows_per_thread = height / thread_no;
|
||||
|
||||
struct multi_threaded_arg *all_args[thread_no];
|
||||
for (int i = 0; i < thread_no; i++) {
|
||||
const int row_start = i * rows_per_thread;
|
||||
int row_end = (i + 1) * rows_per_thread;
|
||||
if (i == thread_no - 1) {
|
||||
row_end = height;
|
||||
}
|
||||
|
||||
struct multi_threaded_arg *arg;
|
||||
arg = malloc(sizeof *arg);
|
||||
|
||||
*arg = (struct multi_threaded_arg){.type = THRD_MULTI,
|
||||
.buffer = buffer,
|
||||
.height_start = row_start,
|
||||
.height_end = row_end,
|
||||
.width = width};
|
||||
pthread_create(&threads[i], NULL, calculate, arg);
|
||||
all_args[i] = arg;
|
||||
}
|
||||
|
||||
for (int i = 0; i < thread_no; ++i) {
|
||||
pthread_join(threads[i], NULL);
|
||||
free(all_args[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void *calculate(void *args) {
|
||||
int x = 0;
|
||||
int height = 0, width = 0;
|
||||
unsigned char *buffer;
|
||||
@@ -56,15 +174,17 @@ void *calculate(void *args) { // TODO: reorder functions by call order
|
||||
}
|
||||
}
|
||||
|
||||
/* minimum space needed for any variation of error msg */
|
||||
char msg[31 + 13 + 11 + 1];
|
||||
if (height == 0 || width == 0) {
|
||||
printf("things have got seriously wrong!");
|
||||
strcat(msg, "things have got seriously wrong");
|
||||
if (height == 0) {
|
||||
printf(" height is 0!");
|
||||
strcat(msg, " height is 0!");
|
||||
} else {
|
||||
printf(" width is 0!");
|
||||
strcat(msg, " width is 0!");
|
||||
}
|
||||
printf(" aborting\n");
|
||||
exit(1);
|
||||
strcat(msg, " aborting\n");
|
||||
panic(msg);
|
||||
}
|
||||
|
||||
for (; x < height; ++x) {
|
||||
@@ -79,101 +199,9 @@ void *calculate(void *args) { // TODO: reorder functions by call order
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void single_threaded(unsigned char *buffer, int height, int width) {
|
||||
struct single_threaded_arg arg = {
|
||||
.type = THRD_SINGLE, .buffer = buffer, .height = height, .width = width};
|
||||
calculate(&arg);
|
||||
}
|
||||
|
||||
void multi_threaded(unsigned char *buffer, int height, int width,
|
||||
int thread_no) {
|
||||
if (thread_no == 1) {
|
||||
single_threaded(buffer, height, width);
|
||||
return;
|
||||
}
|
||||
|
||||
pthread_t threads[thread_no];
|
||||
const unsigned int rows_per_thread = height / thread_no;
|
||||
printf("rows per thread: %u\n", rows_per_thread);
|
||||
struct multi_threaded_arg *args_arr[thread_no];
|
||||
for (int i = 0; i < thread_no; i++) {
|
||||
const int height_start = i * rows_per_thread;
|
||||
int height_end = (i + 1) * rows_per_thread;
|
||||
if (i == thread_no - 1) {
|
||||
height_end = height;
|
||||
}
|
||||
|
||||
struct multi_threaded_arg *arg;
|
||||
arg = malloc(sizeof *arg);
|
||||
|
||||
*arg = (struct multi_threaded_arg){.type = THRD_MULTI,
|
||||
.buffer = buffer,
|
||||
.height_start = height_start,
|
||||
.height_end = height_end,
|
||||
.width = width};
|
||||
pthread_create(&threads[i], NULL, calculate, arg);
|
||||
args_arr[i] = arg;
|
||||
}
|
||||
|
||||
for (int i = 0; i < thread_no; ++i) {
|
||||
pthread_join(threads[i], NULL);
|
||||
free(args_arr[i]);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
int n = getrandom(random_bytes, 1024, 0);
|
||||
if (n != 1024) {
|
||||
fprintf(stderr, "err: getrandom didn't work out as planned\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (argc < 3) {
|
||||
fprintf(stderr, "err: must supply 3 params!\n");
|
||||
return 1;
|
||||
}
|
||||
int width = atoi(argv[1]), height = atoi(argv[2]);
|
||||
int threads = 1;
|
||||
if (argc == 4) {
|
||||
threads = atoi(argv[3]);
|
||||
|
||||
if (height < threads) {
|
||||
fprintf(stderr,
|
||||
"err: rows < threads. expected: rows >= threads. aborting\n");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
png_image img;
|
||||
memset(&img, 0, sizeof(img));
|
||||
img.format = PNG_FORMAT_GRAY;
|
||||
img.width = width;
|
||||
img.height = height;
|
||||
img.version = PNG_IMAGE_VERSION;
|
||||
|
||||
unsigned char *buffer;
|
||||
printf("PNG IMAGE SIZE: %u bytes\n", PNG_IMAGE_SIZE(img));
|
||||
buffer = malloc(PNG_IMAGE_SIZE(img));
|
||||
|
||||
struct timespec start, end;
|
||||
|
||||
clock_gettime(CLOCK_MONOTONIC, &start);
|
||||
if (threads < 2)
|
||||
single_threaded(buffer, height, width);
|
||||
else
|
||||
multi_threaded(buffer, height, width, threads);
|
||||
clock_gettime(CLOCK_MONOTONIC, &end);
|
||||
printf("calculation time: %fs.\n", (double)(end.tv_sec - start.tv_sec) +
|
||||
(end.tv_nsec - start.tv_nsec) / 1e9);
|
||||
|
||||
clock_gettime(CLOCK_MONOTONIC, &start);
|
||||
png_image_write_to_file(&img, "noise.png", 0, buffer, 0, NULL);
|
||||
clock_gettime(CLOCK_MONOTONIC, &end);
|
||||
printf("write to file time (single-threaded): %fs.\n",
|
||||
(double)(end.tv_sec - start.tv_sec) +
|
||||
(end.tv_nsec - start.tv_nsec) / 1e9);
|
||||
|
||||
free(buffer);
|
||||
void panic(const char *message) {
|
||||
fputs(message, stderr);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
float noise(float x, float y) {
|
||||
@@ -183,12 +211,12 @@ float noise(float x, float y) {
|
||||
float point_in_quad_x = x - left;
|
||||
float point_in_quad_y = y - top;
|
||||
|
||||
float *top_left_gradient = pseudo_random_gradient_vector_get(left, top);
|
||||
float *top_right_gradient = pseudo_random_gradient_vector_get(left + 1, top);
|
||||
float *bottom_left_gradient =
|
||||
pseudo_random_gradient_vector_get(left, top + 1);
|
||||
float *bottom_right_gradient =
|
||||
pseudo_random_gradient_vector_get(left + 1, top + 1);
|
||||
float top_left_gradient[2], top_right_gradient[2], bottom_left_gradient[2],
|
||||
bottom_right_gradient[2];
|
||||
pseudo_random_gradient_vector_get(top_left_gradient, left, top);
|
||||
pseudo_random_gradient_vector_get(top_right_gradient, left + 1, top);
|
||||
pseudo_random_gradient_vector_get(bottom_left_gradient, left, top + 1);
|
||||
pseudo_random_gradient_vector_get(bottom_right_gradient, left + 1, top + 1);
|
||||
|
||||
float distance_to_top_left[] = {point_in_quad_x, point_in_quad_y};
|
||||
float distance_to_top_right[] = {point_in_quad_x - 1, point_in_quad_y};
|
||||
@@ -207,11 +235,6 @@ float noise(float x, float y) {
|
||||
float bx = linear_interpolation(bx1, bx2, point_in_quad_x);
|
||||
float tb = linear_interpolation(tx, bx, point_in_quad_y);
|
||||
|
||||
free(top_left_gradient);
|
||||
free(top_right_gradient);
|
||||
free(bottom_left_gradient);
|
||||
free(bottom_right_gradient);
|
||||
|
||||
return tb;
|
||||
}
|
||||
|
||||
@@ -223,10 +246,7 @@ float quintic_curve(float t) { return t * t * t * (t * (t * 6 - 15) + 10); }
|
||||
|
||||
float dot_product(float a[2], float b[2]) { return a[0] * b[0] + a[1] * b[1]; }
|
||||
|
||||
float *pseudo_random_gradient_vector_get(float x, float y) {
|
||||
float *a;
|
||||
a = malloc(sizeof *a * 2);
|
||||
|
||||
void pseudo_random_gradient_vector_get(float vec[2], float x, float y) {
|
||||
int xi = (int)x & 1023;
|
||||
int yi = (int)y & 1023;
|
||||
|
||||
@@ -235,22 +255,38 @@ float *pseudo_random_gradient_vector_get(float x, float y) {
|
||||
|
||||
switch (v) {
|
||||
case 0:
|
||||
a[0] = 1;
|
||||
a[1] = 0;
|
||||
vec[0] = 1;
|
||||
vec[1] = 0;
|
||||
break;
|
||||
case 1:
|
||||
a[0] = -1;
|
||||
a[1] = 0;
|
||||
vec[0] = -1;
|
||||
vec[1] = 0;
|
||||
break;
|
||||
case 2:
|
||||
a[0] = 0;
|
||||
a[1] = 1;
|
||||
vec[0] = 0;
|
||||
vec[1] = 1;
|
||||
break;
|
||||
case 3:
|
||||
a[0] = 0;
|
||||
a[1] = -1;
|
||||
vec[0] = 0;
|
||||
vec[1] = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
return a;
|
||||
}
|
||||
|
||||
void write_image_with_timing(png_image *img, const unsigned char *buffer) {
|
||||
struct timespec start, end;
|
||||
|
||||
clock_gettime(CLOCK_MONOTONIC, &start);
|
||||
png_image_write_to_file(img, "noise.png", 0, buffer, 0, NULL);
|
||||
clock_gettime(CLOCK_MONOTONIC, &end);
|
||||
|
||||
print_timediff("write to file time (single-threaded): ", start, end);
|
||||
}
|
||||
|
||||
void print_timediff(const char *message, struct timespec start,
|
||||
struct timespec end) {
|
||||
double secs = end.tv_sec - start.tv_sec;
|
||||
double nsecs = (end.tv_nsec - start.tv_nsec) / 1e9;
|
||||
|
||||
printf("%s%fs.\n", message, secs + nsecs);
|
||||
}
|
||||
|
||||
10
measurements.csv
Normal file
10
measurements.csv
Normal file
@@ -0,0 +1,10 @@
|
||||
x,theoretical,practical
|
||||
1,6.48301,6.48301
|
||||
2,3.24151,3.25753
|
||||
4,1.62075,1.66985
|
||||
6,1.08050,1.34714
|
||||
8,0.81038,1.06098
|
||||
10,0.64830,1.18834
|
||||
12,0.54025,1.15044
|
||||
14,0.46307,1.12474
|
||||
16,0.40519,1.09344
|
||||
|
40
plot.py
Normal file
40
plot.py
Normal file
@@ -0,0 +1,40 @@
|
||||
import matplotlib.pyplot as plt
|
||||
import pandas as pd
|
||||
|
||||
df = pd.read_csv("measurements.csv")
|
||||
|
||||
x = df["x"]
|
||||
theoretical = df["theoretical"]
|
||||
practical = df["practical"]
|
||||
|
||||
plt.figure(figsize=(10, 6))
|
||||
plt.plot(
|
||||
x,
|
||||
theoretical,
|
||||
marker="o",
|
||||
linestyle="-",
|
||||
linewidth=2,
|
||||
markersize=8,
|
||||
label="Theoretical",
|
||||
color="blue",
|
||||
)
|
||||
plt.plot(
|
||||
x,
|
||||
practical,
|
||||
marker="s",
|
||||
linestyle="--",
|
||||
linewidth=2,
|
||||
markersize=8,
|
||||
label="Practical",
|
||||
color="red",
|
||||
)
|
||||
|
||||
plt.xlabel("Processors", fontsize=12, fontweight="bold")
|
||||
plt.ylabel("T_p", fontsize=12, fontweight="bold")
|
||||
plt.title("Theoretical vs Practical Values", fontsize=14, fontweight="bold")
|
||||
plt.legend(fontsize=11)
|
||||
plt.grid(True, alpha=0.3)
|
||||
plt.xticks(x)
|
||||
|
||||
plt.tight_layout()
|
||||
plt.show()
|
||||
297
ТВО_КНТ_22_1_Орлов_ЛБ1.typ
Normal file
297
ТВО_КНТ_22_1_Орлов_ЛБ1.typ
Normal file
@@ -0,0 +1,297 @@
|
||||
#import "@local/nure:0.1.0": *
|
||||
|
||||
#show: pz-lb.with(..yaml("doc.yaml"), worknumber: 1, title: "Розробка багатопоточних паралельних додатків")
|
||||
|
||||
|
||||
== Мета роботи
|
||||
#v(-spacing)
|
||||
Вивчити способи організації паралельних багатопоточних додатків на
|
||||
прикладі розробки програмного засобу обробки зображення, накладання фільтру,
|
||||
тощо.
|
||||
|
||||
== Хід роботи
|
||||
#v(-spacing)
|
||||
/* TODO: work
|
||||
3. Розрахувати прискорення та ефективність для розробленого паралельного алгоритму.
|
||||
// 4. Написати програмний засіб обробки зображення, якій накладає фільтр згідно з отриманим завданням.
|
||||
// 5. Зібрати статистику витрат часу на обробку зображенні в одному та декількох потоках (1, 2, 4, 6, 8, 10, 12, 14, 16).
|
||||
6. Побудувати графік залежності часу обробки зображення та створених потоків.
|
||||
7. Порівняти отримані практичні результати з теоретичними.
|
||||
*/
|
||||
|
||||
Для виконання роботи було обрано тему "Шум Перліна". Шум Перліна -- це алгоритм
|
||||
генерації процедурного шуму, розроблений Кеном Перліномб який
|
||||
створює природно виглядаючі випадкові текстури та рельєфи. На відміну від
|
||||
звичайного випадкового шуму, шум Перліна генерує плавні, безперервні градієнти
|
||||
шляхом інтерполяції між випадковими векторами градієнтів на регулярній сітці,
|
||||
що дає органічний вигляд хмар, ландшафтів, мармуру та інших природних візерунків.
|
||||
|
||||
Нижче наведено формулу розрахунку часу виконання алгоритму у ідеальному випадку ($E_p = 1$).
|
||||
Такий випадок, звісно, дуже малоймовірний у реальному житті, але ми використаємо цю формулу для подальших теоретичних розрахунків.
|
||||
$ T_p = T_1/p $
|
||||
|
||||
Використаємо стандартну формулу розрахунку прискорення, підставивши значення часу виконання на декількох процесорах з минулого рівняння.
|
||||
$ S_p = T_1/T_p = T_1/(T_1/p) = p $
|
||||
|
||||
Для конкретних розрахунків візьмемо час виконання на одному потоці з реальної роботи програми (середнє значення з 10 запусків):
|
||||
$ T_1 = 6.48301 (sec) $
|
||||
|
||||
#let t1 = decimal("6.48301")
|
||||
#figure(
|
||||
table(
|
||||
columns: 3,
|
||||
table.header([$p$], [$S_p$ (разів)], [$T_p$ (теор.)]),
|
||||
..(1, ..range(2, 16 + 1, step: 2))
|
||||
.map(i => {
|
||||
([$#i$], [$#(i)$], [$#(calc.round(t1 / decimal(i), digits: 5))$])
|
||||
})
|
||||
.flatten(),
|
||||
),
|
||||
caption: [Теоретичні результати розрахунків],
|
||||
)
|
||||
|
||||
/* 1 THREAD{{{
|
||||
performing warmup run #1, stay tuned!
|
||||
warmup calculation time: 6.498818s.
|
||||
performing warmup run #2, stay tuned!
|
||||
warmup calculation time: 6.481534s.
|
||||
performing warmup run #3, stay tuned!
|
||||
warmup calculation time: 6.480006s.
|
||||
performing 10 runs!
|
||||
6.485858
|
||||
6.481472
|
||||
6.489044
|
||||
6.478120
|
||||
6.478225
|
||||
6.477820
|
||||
6.497720
|
||||
6.480843
|
||||
6.476514
|
||||
6.484445
|
||||
|
||||
|
||||
6.48301
|
||||
write to file time (single-threaded): 5.777791s.
|
||||
*/
|
||||
// }}}
|
||||
|
||||
/* 2 THREADS{{{
|
||||
rows per thread: 4096
|
||||
performing warmup run #1, stay tuned!
|
||||
warmup calculation time: 3.256548s.
|
||||
performing warmup run #2, stay tuned!
|
||||
warmup calculation time: 3.255972s.
|
||||
performing warmup run #3, stay tuned!
|
||||
warmup calculation time: 3.244320s.
|
||||
performing 10 runs!
|
||||
3.256898
|
||||
3.254369
|
||||
3.273234
|
||||
3.253884
|
||||
3.253703
|
||||
3.257059
|
||||
3.253436
|
||||
3.254229
|
||||
3.255219
|
||||
3.263258
|
||||
|
||||
|
||||
3.25753
|
||||
write to file time (single-threaded): 5.813933s.
|
||||
*/// }}}
|
||||
|
||||
/* 4 THREADS{{{
|
||||
rows per thread: 2048
|
||||
performing warmup run #1, stay tuned!
|
||||
warmup calculation time: 1.656087s.
|
||||
performing warmup run #2, stay tuned!
|
||||
warmup calculation time: 1.656606s.
|
||||
performing warmup run #3, stay tuned!
|
||||
warmup calculation time: 1.651284s.
|
||||
performing 10 runs!
|
||||
1.653661
|
||||
1.680330
|
||||
1.698069
|
||||
1.663428
|
||||
1.721254
|
||||
1.651755
|
||||
1.644072
|
||||
1.671594
|
||||
1.659138
|
||||
1.655216
|
||||
|
||||
|
||||
1.66985
|
||||
write to file time (single-threaded): 5.817255s.
|
||||
*/// }}}
|
||||
|
||||
/* 6 THREADS{{{
|
||||
rows per thread: 1365
|
||||
performing warmup run #1, stay tuned!
|
||||
warmup calculation time: 1.338387s.
|
||||
performing warmup run #2, stay tuned!
|
||||
warmup calculation time: 1.351296s.
|
||||
performing warmup run #3, stay tuned!
|
||||
warmup calculation time: 1.348886s.
|
||||
performing 10 runs!
|
||||
1.349408
|
||||
1.344556
|
||||
1.354476
|
||||
1.370135
|
||||
1.340126
|
||||
1.338552
|
||||
1.343501
|
||||
1.347066
|
||||
1.341447
|
||||
1.342093
|
||||
|
||||
|
||||
1.34714
|
||||
write to file time (single-threaded): 5.843012s.
|
||||
*/// }}}
|
||||
|
||||
/* 8 THREADS{{{
|
||||
rows per thread: 1024
|
||||
performing warmup run #1, stay tuned!
|
||||
warmup calculation time: 1.056011s.
|
||||
performing warmup run #2, stay tuned!
|
||||
warmup calculation time: 1.058755s.
|
||||
performing warmup run #3, stay tuned!
|
||||
warmup calculation time: 1.062063s.
|
||||
performing 10 runs!
|
||||
1.055696
|
||||
1.054573
|
||||
1.062060
|
||||
1.061573
|
||||
1.060853
|
||||
1.071647
|
||||
1.060886
|
||||
1.060964
|
||||
1.061237
|
||||
1.060295
|
||||
|
||||
|
||||
1.06098
|
||||
write to file time (single-threaded): 5.849602s.
|
||||
*/// }}}
|
||||
|
||||
/* 10 THREADS{{{
|
||||
rows per thread: 819
|
||||
performing warmup run #1, stay tuned!
|
||||
warmup calculation time: 1.184474s.
|
||||
performing warmup run #2, stay tuned!
|
||||
warmup calculation time: 1.110894s.
|
||||
performing warmup run #3, stay tuned!
|
||||
warmup calculation time: 1.236176s.
|
||||
performing 10 runs!
|
||||
1.240800
|
||||
1.190502
|
||||
1.149874
|
||||
1.145224
|
||||
1.166882
|
||||
1.191344
|
||||
1.197383
|
||||
1.175277
|
||||
1.177462
|
||||
1.248642
|
||||
|
||||
|
||||
1.18834
|
||||
write to file time (single-threaded): 5.804186s.
|
||||
*/// }}}
|
||||
|
||||
/* 12 THREADS{{{
|
||||
rows per thread: 682
|
||||
performing warmup run #1, stay tuned!
|
||||
warmup calculation time: 1.157902s.
|
||||
performing warmup run #2, stay tuned!
|
||||
warmup calculation time: 1.151911s.
|
||||
performing warmup run #3, stay tuned!
|
||||
warmup calculation time: 1.146184s.
|
||||
performing 10 runs!
|
||||
1.145244
|
||||
1.128753
|
||||
1.141891
|
||||
1.172025
|
||||
1.143231
|
||||
1.139481
|
||||
1.152526
|
||||
1.168733
|
||||
1.172086
|
||||
1.140409
|
||||
|
||||
|
||||
1.15044
|
||||
write to file time (single-threaded): 5.844610s.
|
||||
*/// }}}
|
||||
|
||||
/* 14 THREADS{{{
|
||||
rows per thread: 585
|
||||
performing warmup run #1, stay tuned!
|
||||
warmup calculation time: 1.101157s.
|
||||
performing warmup run #2, stay tuned!
|
||||
warmup calculation time: 1.131439s.
|
||||
performing warmup run #3, stay tuned!
|
||||
warmup calculation time: 1.124504s.
|
||||
performing 10 runs!
|
||||
1.135715
|
||||
1.116137
|
||||
1.154087
|
||||
1.127029
|
||||
1.116847
|
||||
1.145039
|
||||
1.126409
|
||||
1.108209
|
||||
1.118908
|
||||
1.098983
|
||||
|
||||
|
||||
1.12474
|
||||
write to file time (single-threaded): 5.839399s.
|
||||
*/// }}}
|
||||
|
||||
/* 16 THREADS{{{
|
||||
rows per thread: 512
|
||||
performing warmup run #1, stay tuned!
|
||||
warmup calculation time: 1.066169s.
|
||||
performing warmup run #2, stay tuned!
|
||||
warmup calculation time: 1.087413s.
|
||||
performing warmup run #3, stay tuned!
|
||||
warmup calculation time: 1.095996s.
|
||||
performing 10 runs!
|
||||
1.080556
|
||||
1.079299
|
||||
1.097067
|
||||
1.088530
|
||||
1.078512
|
||||
1.094765
|
||||
1.112028
|
||||
1.090462
|
||||
1.104614
|
||||
1.108524
|
||||
|
||||
|
||||
1.09344
|
||||
write to file time (single-threaded): 5.833199s.
|
||||
*/// }}}
|
||||
|
||||
/*
|
||||
NOTE: cpu info:
|
||||
Architecture: x86_64
|
||||
CPU op-mode(s): 32-bit, 64-bit
|
||||
Address sizes: 43 bits physical, 48 bits virtual
|
||||
Byte Order: Little Endian
|
||||
CPU(s): 8
|
||||
On-line CPU(s) list: 0-7
|
||||
Vendor ID: AuthenticAMD
|
||||
Model name: AMD Ryzen 5 3400G with Radeon Vega Graphics
|
||||
CPU family: 23
|
||||
Model: 24
|
||||
Thread(s) per core: 2
|
||||
Core(s) per socket: 4
|
||||
Socket(s): 1
|
||||
*/
|
||||
|
||||
== Висновки
|
||||
#v(-spacing)
|
||||
// TODO: conclusions
|
||||
Reference in New Issue
Block a user