WIP, but almost done

2025-11-19 15:56:00 +02:00
parent 2d3ebea187
commit 0e84d04e1e
6 changed files with 514 additions and 129 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,4 @@
 .*
 !/.gitignore
 main
+*.pdf
--- a/doc.yaml
+++ b/doc.yaml
@@ -0,0 +1 @@
+../doc-lb.yaml
--- a/main.c
+++ b/main.c
@@ -1,8 +1,6 @@
-#include <errno.h>
 #include <math.h>
 #include <png.h>
 #include <pthread.h>
-#include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -13,7 +11,17 @@ float noise(float x, float y);
 float linear_interpolation(float a, float b, float t);
 float quintic_curve(float t);
 float dot_product(float a[], float b[]);
-float *pseudo_random_gradient_vector_get(float x, float y);
+void pseudo_random_gradient_vector_get(float vec[2], float x, float y);
+void *calculate(void *args);
+void single_threaded(unsigned char *buffer, int height, int width);
+void multi_threaded(unsigned char *buffer, int height, int width,
+                    int thread_no);
+void panic(const char *message);
+void benchmark_calculations(unsigned char *buffer, int height, int width,
+                            int thread_no);
+void write_image_with_timing(png_image *img, const unsigned char *buffer);
+void print_timediff(const char *message, struct timespec start,
+                    struct timespec end);

 static unsigned char random_bytes[1024];

@@ -34,7 +42,117 @@ struct multi_threaded_arg {
  int width;
 };

-void *calculate(void *args) { // TODO: reorder functions by call order
+int main(int argc, char *argv[]) {
+  int n = getrandom(random_bytes, 1024, 0);
+  if (n != 1024) {
+    fprintf(stderr, "err: getrandom didn't work out as planned\n");
+    return 1;
+  }
+
+  if (argc < 3) {
+    fprintf(stderr, "usage: width height [threads] \n");
+    return 1;
+  }
+
+  int width = atoi(argv[1]), height = atoi(argv[2]);
+  int threads = 1;
+  if (argc == 4) {
+    threads = atoi(argv[3]);
+
+    if (height < threads) {
+      fprintf(stderr,
+              "err: height < threads. expected: height >= threads. aborting\n");
+      return 1;
+    }
+
+    printf("rows per thread: %u\n", height / threads);
+  }
+
+  png_image img;
+  memset(&img, 0, sizeof(img));
+  img.format = PNG_FORMAT_GRAY;
+  img.width = width;
+  img.height = height;
+  img.version = PNG_IMAGE_VERSION;
+
+  unsigned char *buffer;
+  buffer = malloc(PNG_IMAGE_SIZE(img));
+
+  benchmark_calculations(buffer, height, width, threads);
+
+  write_image_with_timing(&img, buffer);
+
+  free(buffer);
+}
+
+void benchmark_calculations(unsigned char *buffer, int height, int width,
+                            int threads) {
+  struct timespec start, end;
+
+  for (int i = 0; i < 3; i++) {
+    printf("performing warmup run #%u, stay tuned!\n", i + 1);
+
+    clock_gettime(CLOCK_MONOTONIC, &start);
+    if (threads < 2) {
+      single_threaded(buffer, height, width);
+    } else {
+      multi_threaded(buffer, height, width, threads);
+    }
+    clock_gettime(CLOCK_MONOTONIC, &end);
+    print_timediff("warmup calculation time: ", start, end);
+  }
+
+  puts("performing 10 runs!");
+  for (int i = 0; i < 10; i++) {
+    clock_gettime(CLOCK_MONOTONIC, &start);
+    if (threads < 2) {
+      single_threaded(buffer, height, width);
+    } else {
+      multi_threaded(buffer, height, width, threads);
+    }
+    clock_gettime(CLOCK_MONOTONIC, &end);
+    print_timediff("calculation time: ", start, end);
+  }
+}
+
+void single_threaded(unsigned char *buffer, int height, int width) {
+  struct single_threaded_arg arg = {
+      .type = THRD_SINGLE, .buffer = buffer, .height = height, .width = width};
+  calculate(&arg);
+}
+
+void multi_threaded(unsigned char *buffer, int height, int width,
+                    int thread_no) {
+  pthread_t threads[thread_no];
+  const unsigned int rows_per_thread = height / thread_no;
+
+  struct multi_threaded_arg *all_args[thread_no];
+  for (int i = 0; i < thread_no; i++) {
+    const int row_start = i * rows_per_thread;
+    int row_end = (i + 1) * rows_per_thread;
+    if (i == thread_no - 1) {
+      row_end = height;
+    }
+
+    struct multi_threaded_arg *arg;
+    arg = malloc(sizeof *arg);
+
+    *arg = (struct multi_threaded_arg){.type = THRD_MULTI,
+                                       .buffer = buffer,
+                                       .height_start = row_start,
+                                       .height_end = row_end,
+                                       .width = width};
+    pthread_create(&threads[i], NULL, calculate, arg);
+    all_args[i] = arg;
+  }
+
+  for (int i = 0; i < thread_no; ++i) {
+    pthread_join(threads[i], NULL);
+    free(all_args[i]);
+  }
+}
+
+void *calculate(void *args) {
  int x = 0;
  int height = 0, width = 0;
  unsigned char *buffer;
@@ -56,15 +174,17 @@ void *calculate(void *args) { // TODO: reorder functions by call order
    }
  }

+  /* minimum space needed for any variation of error msg */
+  char msg[31 + 13 + 11 + 1];
  if (height == 0 || width == 0) {
-    printf("things have got seriously wrong!");
+    strcat(msg, "things have got seriously wrong");
    if (height == 0) {
-      printf(" height is 0!");
+      strcat(msg, " height is 0!");
    } else {
-      printf(" width is 0!");
+      strcat(msg, " width is 0!");
    }
-    printf(" aborting\n");
-    exit(1);
+    strcat(msg, " aborting\n");
+    panic(msg);
  }

  for (; x < height; ++x) {
@@ -79,101 +199,9 @@ void *calculate(void *args) { // TODO: reorder functions by call order
  return NULL;
 }

-void single_threaded(unsigned char *buffer, int height, int width) {
-  struct single_threaded_arg arg = {
-      .type = THRD_SINGLE, .buffer = buffer, .height = height, .width = width};
-  calculate(&arg);
-}
-
-void multi_threaded(unsigned char *buffer, int height, int width,
-                    int thread_no) {
-  if (thread_no == 1) {
-    single_threaded(buffer, height, width);
-    return;
-  }
-
-  pthread_t threads[thread_no];
-  const unsigned int rows_per_thread = height / thread_no;
-  printf("rows per thread: %u\n", rows_per_thread);
-  struct multi_threaded_arg *args_arr[thread_no];
-  for (int i = 0; i < thread_no; i++) {
-    const int height_start = i * rows_per_thread;
-    int height_end = (i + 1) * rows_per_thread;
-    if (i == thread_no - 1) {
-      height_end = height;
-    }
-
-    struct multi_threaded_arg *arg;
-    arg = malloc(sizeof *arg);
-
-    *arg = (struct multi_threaded_arg){.type = THRD_MULTI,
-                                       .buffer = buffer,
-                                       .height_start = height_start,
-                                       .height_end = height_end,
-                                       .width = width};
-    pthread_create(&threads[i], NULL, calculate, arg);
-    args_arr[i] = arg;
-  }
-
-  for (int i = 0; i < thread_no; ++i) {
-    pthread_join(threads[i], NULL);
-    free(args_arr[i]);
-  }
-}
-
-int main(int argc, char *argv[]) {
-  int n = getrandom(random_bytes, 1024, 0);
-  if (n != 1024) {
-    fprintf(stderr, "err: getrandom didn't work out as planned\n");
-    return 1;
-  }
-
-  if (argc < 3) {
-    fprintf(stderr, "err: must supply 3 params!\n");
-    return 1;
-  }
-  int width = atoi(argv[1]), height = atoi(argv[2]);
-  int threads = 1;
-  if (argc == 4) {
-    threads = atoi(argv[3]);
-
-    if (height < threads) {
-      fprintf(stderr,
-              "err: rows < threads. expected: rows >= threads. aborting\n");
-      return 1;
-    }
-  }
-
-  png_image img;
-  memset(&img, 0, sizeof(img));
-  img.format = PNG_FORMAT_GRAY;
-  img.width = width;
-  img.height = height;
-  img.version = PNG_IMAGE_VERSION;
-
-  unsigned char *buffer;
-  printf("PNG IMAGE SIZE: %u bytes\n", PNG_IMAGE_SIZE(img));
-  buffer = malloc(PNG_IMAGE_SIZE(img));
-
-  struct timespec start, end;
-
-  clock_gettime(CLOCK_MONOTONIC, &start);
-  if (threads < 2)
-    single_threaded(buffer, height, width);
-  else
-    multi_threaded(buffer, height, width, threads);
-  clock_gettime(CLOCK_MONOTONIC, &end);
-  printf("calculation time: %fs.\n", (double)(end.tv_sec - start.tv_sec) +
-                                         (end.tv_nsec - start.tv_nsec) / 1e9);
-
-  clock_gettime(CLOCK_MONOTONIC, &start);
-  png_image_write_to_file(&img, "noise.png", 0, buffer, 0, NULL);
-  clock_gettime(CLOCK_MONOTONIC, &end);
-  printf("write to file time (single-threaded): %fs.\n",
-         (double)(end.tv_sec - start.tv_sec) +
-             (end.tv_nsec - start.tv_nsec) / 1e9);
-
-  free(buffer);
+void panic(const char *message) {
+  fputs(message, stderr);
+  exit(1);
 }

 float noise(float x, float y) {
@@ -183,12 +211,12 @@ float noise(float x, float y) {
  float point_in_quad_x = x - left;
  float point_in_quad_y = y - top;

-  float *top_left_gradient = pseudo_random_gradient_vector_get(left, top);
-  float *top_right_gradient = pseudo_random_gradient_vector_get(left + 1, top);
-  float *bottom_left_gradient =
-      pseudo_random_gradient_vector_get(left, top + 1);
-  float *bottom_right_gradient =
-      pseudo_random_gradient_vector_get(left + 1, top + 1);
+  float top_left_gradient[2], top_right_gradient[2], bottom_left_gradient[2],
+      bottom_right_gradient[2];
+  pseudo_random_gradient_vector_get(top_left_gradient, left, top);
+  pseudo_random_gradient_vector_get(top_right_gradient, left + 1, top);
+  pseudo_random_gradient_vector_get(bottom_left_gradient, left, top + 1);
+  pseudo_random_gradient_vector_get(bottom_right_gradient, left + 1, top + 1);

  float distance_to_top_left[] = {point_in_quad_x, point_in_quad_y};
  float distance_to_top_right[] = {point_in_quad_x - 1, point_in_quad_y};
@@ -207,11 +235,6 @@ float noise(float x, float y) {
  float bx = linear_interpolation(bx1, bx2, point_in_quad_x);
  float tb = linear_interpolation(tx, bx, point_in_quad_y);

-  free(top_left_gradient);
-  free(top_right_gradient);
-  free(bottom_left_gradient);
-  free(bottom_right_gradient);
-
  return tb;
 }

@@ -223,10 +246,7 @@ float quintic_curve(float t) { return t * t * t * (t * (t * 6 - 15) + 10); }

 float dot_product(float a[2], float b[2]) { return a[0] * b[0] + a[1] * b[1]; }

-float *pseudo_random_gradient_vector_get(float x, float y) {
-  float *a;
-  a = malloc(sizeof *a * 2);
-
+void pseudo_random_gradient_vector_get(float vec[2], float x, float y) {
  int xi = (int)x & 1023;
  int yi = (int)y & 1023;

@@ -235,22 +255,38 @@ float *pseudo_random_gradient_vector_get(float x, float y) {

  switch (v) {
  case 0:
-    a[0] = 1;
-    a[1] = 0;
+    vec[0] = 1;
+    vec[1] = 0;
    break;
  case 1:
-    a[0] = -1;
-    a[1] = 0;
+    vec[0] = -1;
+    vec[1] = 0;
    break;
  case 2:
-    a[0] = 0;
-    a[1] = 1;
+    vec[0] = 0;
+    vec[1] = 1;
    break;
  case 3:
-    a[0] = 0;
-    a[1] = -1;
+    vec[0] = 0;
+    vec[1] = -1;
    break;
  }
-
-  return a;
+}
+
+void write_image_with_timing(png_image *img, const unsigned char *buffer) {
+  struct timespec start, end;
+
+  clock_gettime(CLOCK_MONOTONIC, &start);
+  png_image_write_to_file(img, "noise.png", 0, buffer, 0, NULL);
+  clock_gettime(CLOCK_MONOTONIC, &end);
+
+  print_timediff("write to file time (single-threaded): ", start, end);
+}
+
+void print_timediff(const char *message, struct timespec start,
+                    struct timespec end) {
+  double secs = end.tv_sec - start.tv_sec;
+  double nsecs = (end.tv_nsec - start.tv_nsec) / 1e9;
+
+  printf("%s%fs.\n", message, secs + nsecs);
 }
--- a/measurements.csv
+++ b/measurements.csv
@@ -0,0 +1,10 @@
+x,theoretical,practical
+1,6.48301,6.48301
+2,3.24151,3.25753
+4,1.62075,1.66985
+6,1.08050,1.34714
+8,0.81038,1.06098
+10,0.64830,1.18834
+12,0.54025,1.15044
+14,0.46307,1.12474
+16,0.40519,1.09344
--- a/plot.py
+++ b/plot.py
@@ -0,0 +1,40 @@
+import matplotlib.pyplot as plt
+import pandas as pd
+
+df = pd.read_csv("measurements.csv")
+
+x = df["x"]
+theoretical = df["theoretical"]
+practical = df["practical"]
+
+plt.figure(figsize=(10, 6))
+plt.plot(
+    x,
+    theoretical,
+    marker="o",
+    linestyle="-",
+    linewidth=2,
+    markersize=8,
+    label="Theoretical",
+    color="blue",
+)
+plt.plot(
+    x,
+    practical,
+    marker="s",
+    linestyle="--",
+    linewidth=2,
+    markersize=8,
+    label="Practical",
+    color="red",
+)
+
+plt.xlabel("Processors", fontsize=12, fontweight="bold")
+plt.ylabel("T_p", fontsize=12, fontweight="bold")
+plt.title("Theoretical vs Practical Values", fontsize=14, fontweight="bold")
+plt.legend(fontsize=11)
+plt.grid(True, alpha=0.3)
+plt.xticks(x)
+
+plt.tight_layout()
+plt.show()
--- a/ТВО_КНТ_22_1_Орлов_ЛБ1.typ
+++ b/ТВО_КНТ_22_1_Орлов_ЛБ1.typ
@@ -0,0 +1,297 @@
+#import "@local/nure:0.1.0": *
+
+#show: pz-lb.with(..yaml("doc.yaml"), worknumber: 1, title: "Розробка багатопоточних паралельних додатків")
+
+
+== Мета роботи
+#v(-spacing)
+Вивчити способи організації паралельних багатопоточних додатків на
+прикладі розробки програмного засобу обробки зображення, накладання фільтру,
+тощо.
+
+== Хід роботи
+#v(-spacing)
+/* TODO: work
+    3. Розрахувати прискорення та ефективність для розробленого паралельного алгоритму.
+    // 4. Написати програмний засіб обробки зображення, якій накладає фільтр згідно з отриманим завданням.
+    // 5. Зібрати статистику витрат часу на обробку зображенні в одному та декількох потоках (1, 2, 4, 6, 8, 10, 12, 14, 16).
+    6. Побудувати графік залежності часу обробки зображення та створених потоків.
+    7. Порівняти отримані практичні результати з теоретичними.
+*/
+
+Для виконання роботи було обрано тему "Шум Перліна". Шум Перліна -- це алгоритм
+генерації процедурного шуму, розроблений Кеном Перліномб який
+створює природно виглядаючі випадкові текстури та рельєфи. На відміну від
+звичайного випадкового шуму, шум Перліна генерує плавні, безперервні градієнти
+шляхом інтерполяції між випадковими векторами градієнтів на регулярній сітці,
+що дає органічний вигляд хмар, ландшафтів, мармуру та інших природних візерунків.
+
+Нижче наведено формулу розрахунку часу виконання алгоритму у ідеальному випадку ($E_p = 1$).
+Такий випадок, звісно, дуже малоймовірний у реальному житті, але ми використаємо цю формулу для подальших теоретичних розрахунків.
+$ T_p = T_1/p $
+
+Використаємо стандартну формулу розрахунку прискорення, підставивши значення часу виконання на декількох процесорах з минулого рівняння.
+$ S_p = T_1/T_p = T_1/(T_1/p) = p $
+
+Для конкретних розрахунків візьмемо час виконання на одному потоці з реальної роботи програми (середнє значення з 10 запусків):
+$ T_1 = 6.48301 (sec) $
+
+#let t1 = decimal("6.48301")
+#figure(
+  table(
+    columns: 3,
+    table.header([$p$], [$S_p$ (разів)], [$T_p$ (теор.)]),
+    ..(1, ..range(2, 16 + 1, step: 2))
+      .map(i => {
+        ([$#i$], [$#(i)$], [$#(calc.round(t1 / decimal(i), digits: 5))$])
+      })
+      .flatten(),
+  ),
+  caption: [Теоретичні результати розрахунків],
+)
+
+/* 1 THREAD{{{
+performing warmup run #1, stay tuned!
+warmup calculation time: 6.498818s.
+performing warmup run #2, stay tuned!
+warmup calculation time: 6.481534s.
+performing warmup run #3, stay tuned!
+warmup calculation time: 6.480006s.
+performing 10 runs!
+6.485858
+6.481472
+6.489044
+6.478120
+6.478225
+6.477820
+6.497720
+6.480843
+6.476514
+6.484445
+
+
+6.48301
+write to file time (single-threaded): 5.777791s.
+*/
+// }}}
+
+/* 2 THREADS{{{
+rows per thread: 4096
+performing warmup run #1, stay tuned!
+warmup calculation time: 3.256548s.
+performing warmup run #2, stay tuned!
+warmup calculation time: 3.255972s.
+performing warmup run #3, stay tuned!
+warmup calculation time: 3.244320s.
+performing 10 runs!
+3.256898
+3.254369
+3.273234
+3.253884
+3.253703
+3.257059
+3.253436
+3.254229
+3.255219
+3.263258
+
+
+3.25753
+write to file time (single-threaded): 5.813933s.
+*/// }}}
+
+/* 4 THREADS{{{
+rows per thread: 2048
+performing warmup run #1, stay tuned!
+warmup calculation time: 1.656087s.
+performing warmup run #2, stay tuned!
+warmup calculation time: 1.656606s.
+performing warmup run #3, stay tuned!
+warmup calculation time: 1.651284s.
+performing 10 runs!
+1.653661
+1.680330
+1.698069
+1.663428
+1.721254
+1.651755
+1.644072
+1.671594
+1.659138
+1.655216
+
+
+1.66985
+write to file time (single-threaded): 5.817255s.
+*/// }}}
+
+/* 6 THREADS{{{
+rows per thread: 1365
+performing warmup run #1, stay tuned!
+warmup calculation time: 1.338387s.
+performing warmup run #2, stay tuned!
+warmup calculation time: 1.351296s.
+performing warmup run #3, stay tuned!
+warmup calculation time: 1.348886s.
+performing 10 runs!
+1.349408
+1.344556
+1.354476
+1.370135
+1.340126
+1.338552
+1.343501
+1.347066
+1.341447
+1.342093
+
+
+1.34714
+write to file time (single-threaded): 5.843012s.
+*/// }}}
+
+/* 8 THREADS{{{
+rows per thread: 1024
+performing warmup run #1, stay tuned!
+warmup calculation time: 1.056011s.
+performing warmup run #2, stay tuned!
+warmup calculation time: 1.058755s.
+performing warmup run #3, stay tuned!
+warmup calculation time: 1.062063s.
+performing 10 runs!
+1.055696
+1.054573
+1.062060
+1.061573
+1.060853
+1.071647
+1.060886
+1.060964
+1.061237
+1.060295
+
+
+1.06098
+write to file time (single-threaded): 5.849602s.
+*/// }}}
+
+/* 10 THREADS{{{
+rows per thread: 819
+performing warmup run #1, stay tuned!
+warmup calculation time: 1.184474s.
+performing warmup run #2, stay tuned!
+warmup calculation time: 1.110894s.
+performing warmup run #3, stay tuned!
+warmup calculation time: 1.236176s.
+performing 10 runs!
+1.240800
+1.190502
+1.149874
+1.145224
+1.166882
+1.191344
+1.197383
+1.175277
+1.177462
+1.248642
+
+
+1.18834
+write to file time (single-threaded): 5.804186s.
+*/// }}}
+
+/* 12 THREADS{{{
+rows per thread: 682
+performing warmup run #1, stay tuned!
+warmup calculation time: 1.157902s.
+performing warmup run #2, stay tuned!
+warmup calculation time: 1.151911s.
+performing warmup run #3, stay tuned!
+warmup calculation time: 1.146184s.
+performing 10 runs!
+1.145244
+1.128753
+1.141891
+1.172025
+1.143231
+1.139481
+1.152526
+1.168733
+1.172086
+1.140409
+
+
+1.15044
+write to file time (single-threaded): 5.844610s.
+*/// }}}
+
+/* 14 THREADS{{{
+rows per thread: 585
+performing warmup run #1, stay tuned!
+warmup calculation time: 1.101157s.
+performing warmup run #2, stay tuned!
+warmup calculation time: 1.131439s.
+performing warmup run #3, stay tuned!
+warmup calculation time: 1.124504s.
+performing 10 runs!
+1.135715
+1.116137
+1.154087
+1.127029
+1.116847
+1.145039
+1.126409
+1.108209
+1.118908
+1.098983
+
+
+1.12474
+write to file time (single-threaded): 5.839399s.
+*/// }}}
+
+/* 16 THREADS{{{
+rows per thread: 512
+performing warmup run #1, stay tuned!
+warmup calculation time: 1.066169s.
+performing warmup run #2, stay tuned!
+warmup calculation time: 1.087413s.
+performing warmup run #3, stay tuned!
+warmup calculation time: 1.095996s.
+performing 10 runs!
+1.080556
+1.079299
+1.097067
+1.088530
+1.078512
+1.094765
+1.112028
+1.090462
+1.104614
+1.108524
+
+
+1.09344
+write to file time (single-threaded): 5.833199s.
+*/// }}}
+
+/*
+NOTE: cpu info:
+Architecture:                x86_64
+  CPU op-mode(s):            32-bit, 64-bit
+  Address sizes:             43 bits physical, 48 bits virtual
+  Byte Order:                Little Endian
+CPU(s):                      8
+  On-line CPU(s) list:       0-7
+Vendor ID:                   AuthenticAMD
+  Model name:                AMD Ryzen 5 3400G with Radeon Vega Graphics
+    CPU family:              23
+    Model:                   24
+    Thread(s) per core:      2
+    Core(s) per socket:      4
+    Socket(s):               1
+*/
+
+== Висновки
+#v(-spacing)
+// TODO: conclusions