WIP, but almost done

This commit is contained in:
2025-11-19 15:56:00 +02:00
parent 2d3ebea187
commit 0e84d04e1e
6 changed files with 514 additions and 129 deletions

1
.gitignore vendored
View File

@@ -2,3 +2,4 @@
.*
!/.gitignore
main
*.pdf

1
doc.yaml Symbolic link
View File

@@ -0,0 +1 @@
../doc-lb.yaml

294
main.c
View File

@@ -1,8 +1,6 @@
#include <errno.h>
#include <math.h>
#include <png.h>
#include <pthread.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -13,7 +11,17 @@ float noise(float x, float y);
float linear_interpolation(float a, float b, float t);
float quintic_curve(float t);
float dot_product(float a[], float b[]);
float *pseudo_random_gradient_vector_get(float x, float y);
void pseudo_random_gradient_vector_get(float vec[2], float x, float y);
void *calculate(void *args);
void single_threaded(unsigned char *buffer, int height, int width);
void multi_threaded(unsigned char *buffer, int height, int width,
int thread_no);
void panic(const char *message);
void benchmark_calculations(unsigned char *buffer, int height, int width,
int thread_no);
void write_image_with_timing(png_image *img, const unsigned char *buffer);
void print_timediff(const char *message, struct timespec start,
struct timespec end);
static unsigned char random_bytes[1024];
@@ -34,7 +42,117 @@ struct multi_threaded_arg {
int width;
};
void *calculate(void *args) { // TODO: reorder functions by call order
int main(int argc, char *argv[]) {
int n = getrandom(random_bytes, 1024, 0);
if (n != 1024) {
fprintf(stderr, "err: getrandom didn't work out as planned\n");
return 1;
}
if (argc < 3) {
fprintf(stderr, "usage: width height [threads] \n");
return 1;
}
int width = atoi(argv[1]), height = atoi(argv[2]);
int threads = 1;
if (argc == 4) {
threads = atoi(argv[3]);
if (height < threads) {
fprintf(stderr,
"err: height < threads. expected: height >= threads. aborting\n");
return 1;
}
printf("rows per thread: %u\n", height / threads);
}
png_image img;
memset(&img, 0, sizeof(img));
img.format = PNG_FORMAT_GRAY;
img.width = width;
img.height = height;
img.version = PNG_IMAGE_VERSION;
unsigned char *buffer;
buffer = malloc(PNG_IMAGE_SIZE(img));
benchmark_calculations(buffer, height, width, threads);
write_image_with_timing(&img, buffer);
free(buffer);
}
void benchmark_calculations(unsigned char *buffer, int height, int width,
int threads) {
struct timespec start, end;
for (int i = 0; i < 3; i++) {
printf("performing warmup run #%u, stay tuned!\n", i + 1);
clock_gettime(CLOCK_MONOTONIC, &start);
if (threads < 2) {
single_threaded(buffer, height, width);
} else {
multi_threaded(buffer, height, width, threads);
}
clock_gettime(CLOCK_MONOTONIC, &end);
print_timediff("warmup calculation time: ", start, end);
}
puts("performing 10 runs!");
for (int i = 0; i < 10; i++) {
clock_gettime(CLOCK_MONOTONIC, &start);
if (threads < 2) {
single_threaded(buffer, height, width);
} else {
multi_threaded(buffer, height, width, threads);
}
clock_gettime(CLOCK_MONOTONIC, &end);
print_timediff("calculation time: ", start, end);
}
}
void single_threaded(unsigned char *buffer, int height, int width) {
struct single_threaded_arg arg = {
.type = THRD_SINGLE, .buffer = buffer, .height = height, .width = width};
calculate(&arg);
}
void multi_threaded(unsigned char *buffer, int height, int width,
int thread_no) {
pthread_t threads[thread_no];
const unsigned int rows_per_thread = height / thread_no;
struct multi_threaded_arg *all_args[thread_no];
for (int i = 0; i < thread_no; i++) {
const int row_start = i * rows_per_thread;
int row_end = (i + 1) * rows_per_thread;
if (i == thread_no - 1) {
row_end = height;
}
struct multi_threaded_arg *arg;
arg = malloc(sizeof *arg);
*arg = (struct multi_threaded_arg){.type = THRD_MULTI,
.buffer = buffer,
.height_start = row_start,
.height_end = row_end,
.width = width};
pthread_create(&threads[i], NULL, calculate, arg);
all_args[i] = arg;
}
for (int i = 0; i < thread_no; ++i) {
pthread_join(threads[i], NULL);
free(all_args[i]);
}
}
void *calculate(void *args) {
int x = 0;
int height = 0, width = 0;
unsigned char *buffer;
@@ -56,15 +174,17 @@ void *calculate(void *args) { // TODO: reorder functions by call order
}
}
/* minimum space needed for any variation of error msg */
char msg[31 + 13 + 11 + 1];
if (height == 0 || width == 0) {
printf("things have got seriously wrong!");
strcat(msg, "things have got seriously wrong");
if (height == 0) {
printf(" height is 0!");
strcat(msg, " height is 0!");
} else {
printf(" width is 0!");
strcat(msg, " width is 0!");
}
printf(" aborting\n");
exit(1);
strcat(msg, " aborting\n");
panic(msg);
}
for (; x < height; ++x) {
@@ -79,101 +199,9 @@ void *calculate(void *args) { // TODO: reorder functions by call order
return NULL;
}
void single_threaded(unsigned char *buffer, int height, int width) {
struct single_threaded_arg arg = {
.type = THRD_SINGLE, .buffer = buffer, .height = height, .width = width};
calculate(&arg);
}
void multi_threaded(unsigned char *buffer, int height, int width,
int thread_no) {
if (thread_no == 1) {
single_threaded(buffer, height, width);
return;
}
pthread_t threads[thread_no];
const unsigned int rows_per_thread = height / thread_no;
printf("rows per thread: %u\n", rows_per_thread);
struct multi_threaded_arg *args_arr[thread_no];
for (int i = 0; i < thread_no; i++) {
const int height_start = i * rows_per_thread;
int height_end = (i + 1) * rows_per_thread;
if (i == thread_no - 1) {
height_end = height;
}
struct multi_threaded_arg *arg;
arg = malloc(sizeof *arg);
*arg = (struct multi_threaded_arg){.type = THRD_MULTI,
.buffer = buffer,
.height_start = height_start,
.height_end = height_end,
.width = width};
pthread_create(&threads[i], NULL, calculate, arg);
args_arr[i] = arg;
}
for (int i = 0; i < thread_no; ++i) {
pthread_join(threads[i], NULL);
free(args_arr[i]);
}
}
int main(int argc, char *argv[]) {
int n = getrandom(random_bytes, 1024, 0);
if (n != 1024) {
fprintf(stderr, "err: getrandom didn't work out as planned\n");
return 1;
}
if (argc < 3) {
fprintf(stderr, "err: must supply 3 params!\n");
return 1;
}
int width = atoi(argv[1]), height = atoi(argv[2]);
int threads = 1;
if (argc == 4) {
threads = atoi(argv[3]);
if (height < threads) {
fprintf(stderr,
"err: rows < threads. expected: rows >= threads. aborting\n");
return 1;
}
}
png_image img;
memset(&img, 0, sizeof(img));
img.format = PNG_FORMAT_GRAY;
img.width = width;
img.height = height;
img.version = PNG_IMAGE_VERSION;
unsigned char *buffer;
printf("PNG IMAGE SIZE: %u bytes\n", PNG_IMAGE_SIZE(img));
buffer = malloc(PNG_IMAGE_SIZE(img));
struct timespec start, end;
clock_gettime(CLOCK_MONOTONIC, &start);
if (threads < 2)
single_threaded(buffer, height, width);
else
multi_threaded(buffer, height, width, threads);
clock_gettime(CLOCK_MONOTONIC, &end);
printf("calculation time: %fs.\n", (double)(end.tv_sec - start.tv_sec) +
(end.tv_nsec - start.tv_nsec) / 1e9);
clock_gettime(CLOCK_MONOTONIC, &start);
png_image_write_to_file(&img, "noise.png", 0, buffer, 0, NULL);
clock_gettime(CLOCK_MONOTONIC, &end);
printf("write to file time (single-threaded): %fs.\n",
(double)(end.tv_sec - start.tv_sec) +
(end.tv_nsec - start.tv_nsec) / 1e9);
free(buffer);
void panic(const char *message) {
fputs(message, stderr);
exit(1);
}
float noise(float x, float y) {
@@ -183,12 +211,12 @@ float noise(float x, float y) {
float point_in_quad_x = x - left;
float point_in_quad_y = y - top;
float *top_left_gradient = pseudo_random_gradient_vector_get(left, top);
float *top_right_gradient = pseudo_random_gradient_vector_get(left + 1, top);
float *bottom_left_gradient =
pseudo_random_gradient_vector_get(left, top + 1);
float *bottom_right_gradient =
pseudo_random_gradient_vector_get(left + 1, top + 1);
float top_left_gradient[2], top_right_gradient[2], bottom_left_gradient[2],
bottom_right_gradient[2];
pseudo_random_gradient_vector_get(top_left_gradient, left, top);
pseudo_random_gradient_vector_get(top_right_gradient, left + 1, top);
pseudo_random_gradient_vector_get(bottom_left_gradient, left, top + 1);
pseudo_random_gradient_vector_get(bottom_right_gradient, left + 1, top + 1);
float distance_to_top_left[] = {point_in_quad_x, point_in_quad_y};
float distance_to_top_right[] = {point_in_quad_x - 1, point_in_quad_y};
@@ -207,11 +235,6 @@ float noise(float x, float y) {
float bx = linear_interpolation(bx1, bx2, point_in_quad_x);
float tb = linear_interpolation(tx, bx, point_in_quad_y);
free(top_left_gradient);
free(top_right_gradient);
free(bottom_left_gradient);
free(bottom_right_gradient);
return tb;
}
@@ -223,10 +246,7 @@ float quintic_curve(float t) { return t * t * t * (t * (t * 6 - 15) + 10); }
float dot_product(float a[2], float b[2]) { return a[0] * b[0] + a[1] * b[1]; }
float *pseudo_random_gradient_vector_get(float x, float y) {
float *a;
a = malloc(sizeof *a * 2);
void pseudo_random_gradient_vector_get(float vec[2], float x, float y) {
int xi = (int)x & 1023;
int yi = (int)y & 1023;
@@ -235,22 +255,38 @@ float *pseudo_random_gradient_vector_get(float x, float y) {
switch (v) {
case 0:
a[0] = 1;
a[1] = 0;
vec[0] = 1;
vec[1] = 0;
break;
case 1:
a[0] = -1;
a[1] = 0;
vec[0] = -1;
vec[1] = 0;
break;
case 2:
a[0] = 0;
a[1] = 1;
vec[0] = 0;
vec[1] = 1;
break;
case 3:
a[0] = 0;
a[1] = -1;
vec[0] = 0;
vec[1] = -1;
break;
}
return a;
}
void write_image_with_timing(png_image *img, const unsigned char *buffer) {
struct timespec start, end;
clock_gettime(CLOCK_MONOTONIC, &start);
png_image_write_to_file(img, "noise.png", 0, buffer, 0, NULL);
clock_gettime(CLOCK_MONOTONIC, &end);
print_timediff("write to file time (single-threaded): ", start, end);
}
void print_timediff(const char *message, struct timespec start,
struct timespec end) {
double secs = end.tv_sec - start.tv_sec;
double nsecs = (end.tv_nsec - start.tv_nsec) / 1e9;
printf("%s%fs.\n", message, secs + nsecs);
}

10
measurements.csv Normal file
View File

@@ -0,0 +1,10 @@
x,theoretical,practical
1,6.48301,6.48301
2,3.24151,3.25753
4,1.62075,1.66985
6,1.08050,1.34714
8,0.81038,1.06098
10,0.64830,1.18834
12,0.54025,1.15044
14,0.46307,1.12474
16,0.40519,1.09344
1 x theoretical practical
2 1 6.48301 6.48301
3 2 3.24151 3.25753
4 4 1.62075 1.66985
5 6 1.08050 1.34714
6 8 0.81038 1.06098
7 10 0.64830 1.18834
8 12 0.54025 1.15044
9 14 0.46307 1.12474
10 16 0.40519 1.09344

40
plot.py Normal file
View File

@@ -0,0 +1,40 @@
import matplotlib.pyplot as plt
import pandas as pd
df = pd.read_csv("measurements.csv")
x = df["x"]
theoretical = df["theoretical"]
practical = df["practical"]
plt.figure(figsize=(10, 6))
plt.plot(
x,
theoretical,
marker="o",
linestyle="-",
linewidth=2,
markersize=8,
label="Theoretical",
color="blue",
)
plt.plot(
x,
practical,
marker="s",
linestyle="--",
linewidth=2,
markersize=8,
label="Practical",
color="red",
)
plt.xlabel("Processors", fontsize=12, fontweight="bold")
plt.ylabel("T_p", fontsize=12, fontweight="bold")
plt.title("Theoretical vs Practical Values", fontsize=14, fontweight="bold")
plt.legend(fontsize=11)
plt.grid(True, alpha=0.3)
plt.xticks(x)
plt.tight_layout()
plt.show()

View File

@@ -0,0 +1,297 @@
#import "@local/nure:0.1.0": *
#show: pz-lb.with(..yaml("doc.yaml"), worknumber: 1, title: "Розробка багатопоточних паралельних додатків")
== Мета роботи
#v(-spacing)
Вивчити способи організації паралельних багатопоточних додатків на
прикладі розробки програмного засобу обробки зображення, накладання фільтру,
тощо.
== Хід роботи
#v(-spacing)
/* TODO: work
3. Розрахувати прискорення та ефективність для розробленого паралельного алгоритму.
// 4. Написати програмний засіб обробки зображення, якій накладає фільтр згідно з отриманим завданням.
// 5. Зібрати статистику витрат часу на обробку зображенні в одному та декількох потоках (1, 2, 4, 6, 8, 10, 12, 14, 16).
6. Побудувати графік залежності часу обробки зображення та створених потоків.
7. Порівняти отримані практичні результати з теоретичними.
*/
Для виконання роботи було обрано тему "Шум Перліна". Шум Перліна -- це алгоритм
генерації процедурного шуму, розроблений Кеном Перліномб який
створює природно виглядаючі випадкові текстури та рельєфи. На відміну від
звичайного випадкового шуму, шум Перліна генерує плавні, безперервні градієнти
шляхом інтерполяції між випадковими векторами градієнтів на регулярній сітці,
що дає органічний вигляд хмар, ландшафтів, мармуру та інших природних візерунків.
Нижче наведено формулу розрахунку часу виконання алгоритму у ідеальному випадку ($E_p = 1$).
Такий випадок, звісно, дуже малоймовірний у реальному житті, але ми використаємо цю формулу для подальших теоретичних розрахунків.
$ T_p = T_1/p $
Використаємо стандартну формулу розрахунку прискорення, підставивши значення часу виконання на декількох процесорах з минулого рівняння.
$ S_p = T_1/T_p = T_1/(T_1/p) = p $
Для конкретних розрахунків візьмемо час виконання на одному потоці з реальної роботи програми (середнє значення з 10 запусків):
$ T_1 = 6.48301 (sec) $
#let t1 = decimal("6.48301")
#figure(
table(
columns: 3,
table.header([$p$], [$S_p$ (разів)], [$T_p$ (теор.)]),
..(1, ..range(2, 16 + 1, step: 2))
.map(i => {
([$#i$], [$#(i)$], [$#(calc.round(t1 / decimal(i), digits: 5))$])
})
.flatten(),
),
caption: [Теоретичні результати розрахунків],
)
/* 1 THREAD{{{
performing warmup run #1, stay tuned!
warmup calculation time: 6.498818s.
performing warmup run #2, stay tuned!
warmup calculation time: 6.481534s.
performing warmup run #3, stay tuned!
warmup calculation time: 6.480006s.
performing 10 runs!
6.485858
6.481472
6.489044
6.478120
6.478225
6.477820
6.497720
6.480843
6.476514
6.484445
6.48301
write to file time (single-threaded): 5.777791s.
*/
// }}}
/* 2 THREADS{{{
rows per thread: 4096
performing warmup run #1, stay tuned!
warmup calculation time: 3.256548s.
performing warmup run #2, stay tuned!
warmup calculation time: 3.255972s.
performing warmup run #3, stay tuned!
warmup calculation time: 3.244320s.
performing 10 runs!
3.256898
3.254369
3.273234
3.253884
3.253703
3.257059
3.253436
3.254229
3.255219
3.263258
3.25753
write to file time (single-threaded): 5.813933s.
*/// }}}
/* 4 THREADS{{{
rows per thread: 2048
performing warmup run #1, stay tuned!
warmup calculation time: 1.656087s.
performing warmup run #2, stay tuned!
warmup calculation time: 1.656606s.
performing warmup run #3, stay tuned!
warmup calculation time: 1.651284s.
performing 10 runs!
1.653661
1.680330
1.698069
1.663428
1.721254
1.651755
1.644072
1.671594
1.659138
1.655216
1.66985
write to file time (single-threaded): 5.817255s.
*/// }}}
/* 6 THREADS{{{
rows per thread: 1365
performing warmup run #1, stay tuned!
warmup calculation time: 1.338387s.
performing warmup run #2, stay tuned!
warmup calculation time: 1.351296s.
performing warmup run #3, stay tuned!
warmup calculation time: 1.348886s.
performing 10 runs!
1.349408
1.344556
1.354476
1.370135
1.340126
1.338552
1.343501
1.347066
1.341447
1.342093
1.34714
write to file time (single-threaded): 5.843012s.
*/// }}}
/* 8 THREADS{{{
rows per thread: 1024
performing warmup run #1, stay tuned!
warmup calculation time: 1.056011s.
performing warmup run #2, stay tuned!
warmup calculation time: 1.058755s.
performing warmup run #3, stay tuned!
warmup calculation time: 1.062063s.
performing 10 runs!
1.055696
1.054573
1.062060
1.061573
1.060853
1.071647
1.060886
1.060964
1.061237
1.060295
1.06098
write to file time (single-threaded): 5.849602s.
*/// }}}
/* 10 THREADS{{{
rows per thread: 819
performing warmup run #1, stay tuned!
warmup calculation time: 1.184474s.
performing warmup run #2, stay tuned!
warmup calculation time: 1.110894s.
performing warmup run #3, stay tuned!
warmup calculation time: 1.236176s.
performing 10 runs!
1.240800
1.190502
1.149874
1.145224
1.166882
1.191344
1.197383
1.175277
1.177462
1.248642
1.18834
write to file time (single-threaded): 5.804186s.
*/// }}}
/* 12 THREADS{{{
rows per thread: 682
performing warmup run #1, stay tuned!
warmup calculation time: 1.157902s.
performing warmup run #2, stay tuned!
warmup calculation time: 1.151911s.
performing warmup run #3, stay tuned!
warmup calculation time: 1.146184s.
performing 10 runs!
1.145244
1.128753
1.141891
1.172025
1.143231
1.139481
1.152526
1.168733
1.172086
1.140409
1.15044
write to file time (single-threaded): 5.844610s.
*/// }}}
/* 14 THREADS{{{
rows per thread: 585
performing warmup run #1, stay tuned!
warmup calculation time: 1.101157s.
performing warmup run #2, stay tuned!
warmup calculation time: 1.131439s.
performing warmup run #3, stay tuned!
warmup calculation time: 1.124504s.
performing 10 runs!
1.135715
1.116137
1.154087
1.127029
1.116847
1.145039
1.126409
1.108209
1.118908
1.098983
1.12474
write to file time (single-threaded): 5.839399s.
*/// }}}
/* 16 THREADS{{{
rows per thread: 512
performing warmup run #1, stay tuned!
warmup calculation time: 1.066169s.
performing warmup run #2, stay tuned!
warmup calculation time: 1.087413s.
performing warmup run #3, stay tuned!
warmup calculation time: 1.095996s.
performing 10 runs!
1.080556
1.079299
1.097067
1.088530
1.078512
1.094765
1.112028
1.090462
1.104614
1.108524
1.09344
write to file time (single-threaded): 5.833199s.
*/// }}}
/*
NOTE: cpu info:
Architecture: x86_64
CPU op-mode(s): 32-bit, 64-bit
Address sizes: 43 bits physical, 48 bits virtual
Byte Order: Little Endian
CPU(s): 8
On-line CPU(s) list: 0-7
Vendor ID: AuthenticAMD
Model name: AMD Ryzen 5 3400G with Radeon Vega Graphics
CPU family: 23
Model: 24
Thread(s) per core: 2
Core(s) per socket: 4
Socket(s): 1
*/
== Висновки
#v(-spacing)
// TODO: conclusions