1
0
Files
nure/semester-4/ОС/lb-7/src/editor/analizer.c
Sytnyk Yehor 62049428c0 OS lb-7
2025-05-15 20:51:24 +03:00

225 lines
5.2 KiB
C

#define _XOPEN_SOURCE
#include <time.h>
#include <fcntl.h>
#include <fts.h>
#include <iconv.h>
#include <stdio.h>
#include <stdlib.h>
#include "shared.h"
typedef enum { ENC_UTF8, ENC_UTF16LE, ENC_UTF16BE, ENC_CP1251 } Encoding;
Encoding get_encoding(char *input, size_t length) {
unsigned char *data = (unsigned char *)input;
if (length >= 2) {
if (data[0] == 0xFE && data[1] == 0xFF) { // UTF-16 BE BOM
return ENC_UTF16BE;
} else if (data[0] == 0xFF && data[1] == 0xFE) { // UTF-16 LE BOM
return ENC_UTF16LE;
}
}
size_t i = 0;
while (i < length) {
unsigned char c = data[i];
if (c < 0x80) { // ASCII
i++;
continue;
}
if ((c & 0xE0) == 0xC0) { // 2-byte UTF8
if (i + 1 >= length || (data[i + 1] & 0xC0) != 0x80 || c < 0xC2)
return ENC_CP1251; // overflow
i += 2;
continue;
}
if ((c & 0xF0) == 0xE0) { // 3-byte UTF8
if (i + 2 >= length || (data[i + 1] & 0xC0) != 0x80 ||
(data[i + 2] & 0xC0) != 0x80 || (c == 0xE0 && data[i + 1] < 0xA0))
return ENC_CP1251; // overflow
i += 3;
continue;
}
if ((c & 0xF8) == 0xF0) { // 4-byte UTF8
if (i + 3 >= length || (data[i + 1] & 0xC0) != 0x80 ||
(data[i + 2] & 0xC0) != 0x80 || (data[i + 3] & 0xC0) != 0x80 ||
(c == 0xF0 && data[i + 1] < 0x90))
return ENC_CP1251; // overflow
i += 4;
continue;
}
return ENC_CP1251; // Invalid utf-8 byte
}
return ENC_UTF8;
}
char *to_utf8(char *input, size_t length, Encoding encoding) {
if (!input || length == 0)
return NULL;
char *encstr;
int skip_bom = 0;
switch (encoding) {
case ENC_CP1251:
encstr = "CP1251";
break;
case ENC_UTF8:
encstr = "UTF-8";
break;
case ENC_UTF16LE:
encstr = "UTF-16LE";
skip_bom = 2;
break;
case ENC_UTF16BE:
encstr = "UTF-16BE";
skip_bom = 2;
break;
}
iconv_t cd = iconv_open("UTF-8", encstr);
if (cd != (iconv_t)-1) {
size_t out_size = length * 4 + 1;
char *output = malloc(out_size);
if (output) {
char *in_ptr = (char *)input + skip_bom;
size_t in_left = length - skip_bom;
char *out_ptr = output;
size_t out_left = out_size - 1;
if (iconv(cd, &in_ptr, &in_left, &out_ptr, &out_left) != (size_t)-1) {
*out_ptr = '\0';
iconv_close(cd);
return output;
}
free(output);
}
iconv_close(cd);
}
return NULL;
}
int main(int argc, char *argv[]) {
if (argc > 2) {
fprintf(stderr, "Error: too many arguments.\n");
return 1;
}
if (argc < 2) {
fprintf(stderr, "Error: too few arguments.\n");
return 1;
}
struct tm tm = {0};
if (strptime(argv[1], "%s", &tm) == NULL) {
fprintf(stderr, "Error: please provide time in UNIX timestamp format.\n");
return 1;
}
time_t norm_time = mktime(&tm);
char *paths[] = {CWD, NULL};
FTS *fts = fts_open(paths, FTS_NOCHDIR, NULL);
if (fts == NULL) {
err("Can't initialise FTS", __LINE__ - 1);
return 2;
}
char *buf = (char *)malloc(1024 * sizeof(char));
for (FTSENT *ent = fts_read(fts); ent != NULL; ent = fts_read(fts)) {
switch (ent->fts_info) {
case FTS_F:
if (ent->fts_statp->st_mtime > norm_time) {
printf(RED);
printf("--------------------------------\n");
printf(GREEN);
printf("File: %s\n", ent->fts_name);
char mtime[1024] = {0};
strftime(mtime, 1024, "%T", localtime(&ent->fts_statp->st_mtime));
printf("Modified at: %s\n", mtime);
printf("Size: %ld\n", ent->fts_statp->st_size);
FILE *file = fopen(ent->fts_path, "r");
if (file == NULL) {
err("Can't open the file", __LINE__ - 1);
break;
}
size_t bytes_read = fread(buf, 1, 1023, file);
if (ferror(file) != 0) {
err("Can't read the file", __LINE__ - 2);
fclose(file);
break;
}
buf[bytes_read] = '\0';
Encoding enc = get_encoding(buf, bytes_read);
printf("Lines length:");
size_t lines = 0;
size_t line_len = 0;
for (size_t i = (enc == 2 || enc == 1) ? 2 : 0; i < bytes_read; i++) {
line_len++;
int newline_detected = 0;
switch (enc) {
case ENC_UTF16BE:
if (i + 1 < bytes_read && buf[i] == '\0' && buf[i + 1] == '\n') {
newline_detected = 1;
line_len++;
i++;
}
break;
case ENC_UTF16LE:
if (i + 1 < bytes_read && buf[i] == '\n' && buf[i + 1] == '\0') {
newline_detected = 1;
line_len++;
i++;
}
break;
case ENC_CP1251:
case ENC_UTF8:
default:
if (buf[i] == '\n')
newline_detected = 1;
break;
}
if (newline_detected) {
printf(" %zu", line_len);
lines++;
line_len = 0;
}
}
printf("\nTotal lines: %zu\n", lines);
if (enc != ENC_UTF8)
buf = to_utf8(buf, bytes_read, enc);
printf(NORMAL);
printf("%s\n", buf);
fclose(file);
}
break;
default:
break;
}
}
free(buf);
return 0;
}