>From a1ccd78826e7ed590de244d27a7ac00339487dd4 Mon Sep 17 00:00:00 2001 From: apache2 Date: Fri, 15 Jul 2022 19:08:41 +0200 Subject: [PATCH] pread64-based buffered I/O --- libpoke/ios-dev-file.c | 223 +++++++++++++++++++++++++++++------------ 1 file changed, 158 insertions(+), 65 deletions(-) diff --git a/libpoke/ios-dev-file.c b/libpoke/ios-dev-file.c index 625d0273..02ff9691 100644 --- a/libpoke/ios-dev-file.c +++ b/libpoke/ios-dev-file.c @@ -27,6 +27,7 @@ #include #include #include +#include // SSIZE_MAX #include #include "ios.h" @@ -36,9 +37,12 @@ struct ios_dev_file { - FILE *file; char *filename; uint64_t flags; + int fileno; + size_t buffered_length; /* 0 <= buffered_length <= sizeof(buf) */ + size_t buf_offset; /* file offset that (buf) was loaded from */ + char buf[4096]; }; static const char * @@ -65,7 +69,7 @@ ios_dev_file_handler_normalize (const char *handler, uint64_t flags, int* error) /* Returns 0 when the flags are inconsistent. */ static inline int -ios_dev_file_convert_flags (int mode_flags, char **mode_for_fdopen) +ios_dev_file_convert_flags (int mode_flags) { int flags_for_open = 0; @@ -73,17 +77,14 @@ ios_dev_file_convert_flags (int mode_flags, char **mode_for_fdopen) && (mode_flags & IOS_F_WRITE)) { flags_for_open |= O_RDWR; - *mode_for_fdopen = "r+b"; } else if (mode_flags & IOS_F_READ) { flags_for_open |= O_RDONLY; - *mode_for_fdopen = "rb"; } else if (mode_flags & IOS_F_WRITE) { flags_for_open |= O_WRONLY; - *mode_for_fdopen = "wb"; } else /* Cannot open a file neither to write nor to read. */ @@ -100,19 +101,16 @@ ios_dev_file_open (const char *handler, uint64_t flags, int *error, void *data __attribute__ ((unused))) { struct ios_dev_file *fio = NULL; - FILE *f = NULL; int internal_error = IOD_ERROR; uint8_t mode_flags = flags & IOS_FLAGS_MODE; - char *mode_for_fdopen = NULL; int flags_for_open = 0; - int fd; + int fd = -1; if (mode_flags != 0) { /* Decide what mode to use to open the file. */ - flags_for_open - = ios_dev_file_convert_flags (mode_flags, &mode_for_fdopen); + flags_for_open = ios_dev_file_convert_flags (mode_flags); if (flags_for_open == -1) { internal_error = IOD_EFLAGS; @@ -121,41 +119,32 @@ ios_dev_file_open (const char *handler, uint64_t flags, int *error, fd = open (handler, flags_for_open, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); if (fd == -1) goto err; - f = fdopen (fd, mode_for_fdopen); } else { /* Try read-write initially. If that fails, then try read-only. If that fails, then try write-only. */ - const char *mode_for_fdopen; fd = open (handler, O_RDWR, 0); flags |= (IOS_F_READ | IOS_F_WRITE); - mode_for_fdopen = "r+b"; if (fd == -1) { fd = open (handler, O_RDONLY, 0); if (fd != -1) flags &= ~IOS_F_WRITE; - mode_for_fdopen = "rb"; } if (fd == -1) { fd = open (handler, O_WRONLY, 0); if (fd != -1) flags &= ~IOS_F_READ; - mode_for_fdopen = "wb"; } if (fd == -1) goto err; - f = fdopen (fd, mode_for_fdopen); } - if (!f) - goto err; - - fio = malloc (sizeof (struct ios_dev_file)); + fio = calloc (1, sizeof (struct ios_dev_file)); if (!fio) goto err; @@ -163,7 +152,7 @@ ios_dev_file_open (const char *handler, uint64_t flags, int *error, if (!fio->filename) goto err; - fio->file = f; + fio->fileno = fd; fio->flags = flags; if (error) @@ -175,8 +164,8 @@ err: free (fio->filename); free (fio); - if (f) - fclose (f); + if (-1 != fd) + close (fd); if (error) { @@ -197,20 +186,25 @@ static int ios_dev_file_close (void *iod) { struct ios_dev_file *fio = iod; + int status = IOD_OK; - if (fclose (fio->file) == 0) + if (fsync(fio->fileno) != 0) { - free (fio->filename); - free (fio); - return IOD_OK; + /* If this fails, there was a problem writing the data to disk. + * but we should still close the fd and free the fio data structure. + */ + status = IOD_ERROR; } - else + + if (close (fio->fileno) != 0) { perror (fio->filename); - free (fio->filename); - free (fio); - return IOD_ERROR; + status = IOD_ERROR; } + + free (fio->filename); + free (fio); + return status; } static uint64_t @@ -221,31 +215,103 @@ ios_dev_file_get_flags (void *iod) return fio->flags; } - static int -ios_dev_file_pread (void *iod, void *buf, size_t count, ios_dev_off offset) +ios_dev_file_pread (void *const iod, void *buf, size_t count, ios_dev_off offset) { - struct ios_dev_file *fio = iod; - size_t ret; + struct ios_dev_file *const fio = iod; + ssize_t ret = 0; - /* We are using FILE* for buffering, rather than low-level fd, so we - have to fake low-level pread by using fseeko. */ - if (ftello(fio->file) != offset) - if (fseeko (fio->file, offset, SEEK_SET) == -1) - return IOD_EOF; - ret = fread (buf, 1, count, fio->file); - - if (ferror (fio->file)) + if (count > SSIZE_MAX) { - clearerr (fio->file); - return IOD_ERROR; + return IOD_ERROR; // TODO should probably be an ICE? } - /* XXX As long as count <= 9 because the ios layer reads at most an - unaligned uint64_t, we are unlikely to hit short reads. But if - future code adds in large buffer reads, we may want to retry on - short reads rather than giving up right away. */ - return ret == count ? IOD_OK : IOD_EOF; + /* assert that we can cast (offset) to what pread64 expects */ + _Static_assert(sizeof(offset) == sizeof(off_t)); + + // logic is roughly: + // :check if chunk is contained in buffer: + // 1. fully + // memcpy(buf, iod->buf, chunk); + // 2 partially + // memcpy what we have + // buffer the next chunk + // 3 our count is large so we are going to fill and flush buffer repeatedly + // ... currently just step 1, but we should probably just write directly + // to (buf) instead of using energy on copying to and from a buffer that + // we immediately erase. TODO. + + do { + const size_t buf_adjustment = offset - fio->buf_offset; + /* offset and buf_offset are both unsigned, so + * if offset > buf_offset: + * buf_adjustment contains the offset required to align buf with file (offset) + * such that (fio->buf+buf_adjustment) represent the same bytes as on disk. + * if offset < buf_offset, buf_adjustment underflows. + * TODO: if the file is incredibly large (near SIZE_MAX) + * and we have buffered near the end, + * this will be incorrect, eg: + * offset=0; buf_offset=(size_t)-1024 + * -> buf_adjustment < fio->buffered_length + * we could check that both of these are < SSIZE_MAX, but would appreciate + * a second pair of eyes on this. + */ + if (buf_adjustment < fio->buffered_length) + { + /* fio->buf + buf_adjustment contains some of this chunk, figure + * out how much and copy that into (buf): + */ + const size_t usable = fio->buffered_length - buf_adjustment; + const size_t chunk = usable > count + ? count + : usable; + buf = mempcpy(buf, fio->buf + buf_adjustment, chunk); + count -= chunk; + offset += chunk; + } + else if (0) + { + // TODO: if offset < buf_offset + // && offset + count < buf_offset + sizeof(fio->buf) + // then we have something buffered that we could copy in. + // If that happens to align with the end of 'count' then we can reduce count + // and write the buffered data into (buf): + // [offset] [count] + // [unbuffered] [fio->buf..buffered_length] + // [ pread ] <-- I/O needed + // If not, we'd have to split the request into two: + // [offset] [count] + // [ unbuffered ] [ fio->buf..buffered_length] [unbuffered] + // [ pread ] [ pread ] + // This logic then starts getting complicated, so for now we do NOT + // implement that optimization. + } + else + { + /* We do not have the required range buffered, so fill the buffer + * from disk and update the metadata. The next while-loop iteration + * will find the data in the buffer and return the needed portion. + */ + ret = pread (fio->fileno, (void *)fio->buf, sizeof(fio->buf), offset); + switch (ret) + { + case -1: + if (EINTR == errno) + { + continue; // try again + } + return IOD_ERROR; + case 0: + // In this case we keep the last chunk of the file buffered. + return IOD_EOF; + default: + fio->buffered_length = ret; + fio->buf_offset = offset; + } + } + } while (count); + + return IOD_OK; } static int @@ -253,22 +319,40 @@ ios_dev_file_pwrite (void *iod, const void *buf, size_t count, ios_dev_off offset) { struct ios_dev_file *fio = iod; - size_t ret; - - /* We are using FILE* for buffering, rather than low-level fd, so we - have to fake low-level pread by using fseeko. */ - if (fseeko (fio->file, offset, SEEK_SET)) - return IOD_EOF; - ret = fwrite (buf, 1, count, fio->file); + ssize_t ret = 0; - if (ferror (fio->file)) + if (count > SSIZE_MAX) { - perror ("write: "); - clearerr (fio->file); - return IOD_ERROR; + return IOD_ERROR; // TODO should probably be an ICE? } - return ret == count ? IOD_OK : IOD_EOF; + /* 2022-07-16: For the time being we invalidate the read buffer upon write. + * Since the IOD interface does not provide a write-barrier mechanism + * we cannot coalesce writes (buffer data to be written). + * We *could* refrain from invalidating the cache if the data written + * does not overlap with the buffered range, but it's unclear if that is + * worth it. We will have to measure and see. + * Setting fio->buffered_length = 0 prevents (ios_dev_file_pread) from using + * any buffered data and instead re-read from the fd: + */ + fio->buffered_length = 0; + + do { + ret = pwrite (fio->fileno, buf, count, offset); + + switch (ret) + { + case -1: + return IOD_ERROR; + case 0: + return IOD_EOF; + default: + buf += ret; + count -= ret; + } + } while(count); + + return IOD_OK; } static ios_dev_off @@ -277,14 +361,23 @@ ios_dev_file_size (void *iod) struct stat st; struct ios_dev_file *fio = iod; - fstat (fileno (fio->file), &st); - return st.st_size; + if (0 == fstat (fio->fileno, &st)) + { + return st.st_size; + } + else + { + /* TODO fstat failed, we do not really have a good way + * to signal this with the current ios_dev_file_size interface. + */ + return 0; + } } static int ios_dev_file_flush (void *iod, ios_dev_off offset) { - return IOS_OK; + return IOS_OK; // why is this IOS_OK and not IOD_OK? } struct ios_dev_if ios_dev_file = -- 2.30.2