Cygwin: fallocate(2): handle FALLOC_FL_PUNCH_HOLE and FALLOC_FL_ZERO_RANGE

Split fhandler_disk_file::fallocate into multiple methods, each
implementing a different aspect of fallocate(2), thus adding
FALLOC_FL_PUNCH_HOLE and FALLOC_FL_ZERO_RANGE handling.

For more correctly implementing posix_fallocate(3) semantics, make
sure to re-allocate holes in the given range if the file is sparse.

While at it, change the way checking when to make a file sparse.
The rule is now, make file sparse if the hole created by the action
spans at least one sparse block, taking the allocation granularity
of sparse files into account.

Signed-off-by: Corinna Vinschen <corinna@vinschen.de>
This commit is contained in:
Corinna Vinschen 2023-11-28 10:42:52 +01:00
parent f64f3eced8
commit 65831f88d6
5 changed files with 301 additions and 67 deletions

View File

@ -896,6 +896,9 @@ fhandler_base::write (const void *ptr, size_t len)
did_lseek (false); /* don't do it again */
/* If the file system supports sparse files and the application is
writing after a long seek beyond EOF spanning more than one
sparsifiable chunk, convert the file to a sparse file. */
if (!(get_flags () & O_APPEND)
&& !has_attribute (FILE_ATTRIBUTE_SPARSE_FILE)
&& NT_SUCCESS (NtQueryInformationFile (get_output_handle (),
@ -904,12 +907,9 @@ fhandler_base::write (const void *ptr, size_t len)
&& NT_SUCCESS (NtQueryInformationFile (get_output_handle (),
&io, &fpi, sizeof fpi,
FilePositionInformation))
&& fpi.CurrentByteOffset.QuadPart
>= fsi.EndOfFile.QuadPart + (128 * 1024))
&& span_sparse_chunk (fpi.CurrentByteOffset.QuadPart,
fsi.EndOfFile.QuadPart))
{
/* If the file system supports sparse files and the application
is writing after a long seek beyond EOF, convert the file to
a sparse file. */
NTSTATUS status;
status = NtFsControlFile (get_output_handle (), NULL, NULL, NULL,
&io, FSCTL_SET_SPARSE, NULL, 0, NULL, 0);

View File

@ -1131,89 +1131,300 @@ fhandler_disk_file::fadvise (off_t offset, off_t length, int advice)
}
int
fhandler_disk_file::fallocate (int mode, off_t offset, off_t length)
fhandler_disk_file::falloc_allocate (int mode, off_t offset, off_t length)
{
int res = 0;
NTSTATUS status;
IO_STATUS_BLOCK io;
FILE_STANDARD_INFORMATION fsi;
FILE_END_OF_FILE_INFORMATION feofi;
FILE_ALLOCATION_INFORMATION fai = { 0 };
if (length < 0 || !get_handle ())
res = EINVAL;
else if (pc.isdir ())
res = EISDIR;
else if (!(get_access () & GENERIC_WRITE))
res = EBADF;
else
/* Fetch EOF */
status = NtQueryInformationFile (get_handle (), &io, &fsi, sizeof fsi,
FileStandardInformation);
if (!NT_SUCCESS (status))
return geterrno_from_nt_status (status);
switch (mode)
{
NTSTATUS status;
IO_STATUS_BLOCK io;
FILE_STANDARD_INFORMATION fsi;
FILE_END_OF_FILE_INFORMATION feofi;
status = NtQueryInformationFile (get_handle (), &io, &fsi, sizeof fsi,
FileStandardInformation);
if (!NT_SUCCESS (status))
return geterrno_from_nt_status (status);
/* Never change file size if FALLOC_FL_KEEP_SIZE is specified. */
if ((mode & FALLOC_FL_KEEP_SIZE)
&& offset + length > fsi.EndOfFile.QuadPart)
case 0:
/* For posix_fallocate(3), truncating the file is a no-op. However,
for sparse files we still have to allocate the blocks within
offset and offset + length which are currently in holes, due to
the following POSIX requirement:
"If posix_fallocate() returns successfully, subsequent writes to
the specified file data shall not fail due to the lack of free
space on the file system storage media." */
if (offset + length <= fsi.EndOfFile.QuadPart)
{
if (offset > fsi.EndOfFile.QuadPart) /* no-op */
if (!has_attribute (FILE_ATTRIBUTE_SPARSE_FILE))
return 0;
length = fsi.EndOfFile.QuadPart - offset;
feofi.EndOfFile.QuadPart = fsi.EndOfFile.QuadPart;
}
mode &= ~FALLOC_FL_KEEP_SIZE;
else
feofi.EndOfFile.QuadPart = offset + length;
break;
case __FALLOC_FL_TRUNCATE:
/* For ftruncate(2), offset is 0. Just use length as is. */
feofi.EndOfFile.QuadPart = length;
switch (mode)
{
case 0:
case __FALLOC_FL_TRUNCATE:
break;
case FALLOC_FL_PUNCH_HOLE: /* TODO */
return EOPNOTSUPP;
break;
case FALLOC_FL_ZERO_RANGE: /* TODO */
return EOPNOTSUPP;
break;
default:
return EINVAL;
}
if (mode == 0)
{
/* If called through posix_fallocate, silently succeed if
offset + length is less than the file's actual length. */
/* TODO: If the file is sparse, POSIX requires to allocate
the holes within offset and offset + length. */
if (offset + length < fsi.EndOfFile.QuadPart)
return 0;
}
feofi.EndOfFile.QuadPart = offset + length;
/* Create sparse files only when called through ftruncate, not when
called through posix_fallocate. */
if (mode == __FALLOC_FL_TRUNCATE
/* Make file sparse only when called through ftruncate and the mount
mode supports sparse files. Also, make sure that the new region
actually spans over at least one sparsifiable chunk. */
if (pc.support_sparse ()
&& !has_attribute (FILE_ATTRIBUTE_SPARSE_FILE)
&& pc.support_sparse ()
&& offset + length >= fsi.EndOfFile.QuadPart + (128 * 1024))
&& span_sparse_chunk (feofi.EndOfFile.QuadPart,
fsi.EndOfFile.QuadPart))
{
status = NtFsControlFile (get_handle (), NULL, NULL, NULL, &io,
FSCTL_SET_SPARSE, NULL, 0, NULL, 0);
if (NT_SUCCESS (status))
pc.file_attributes (pc.file_attributes ()
| FILE_ATTRIBUTE_SPARSE_FILE);
syscall_printf ("%y = NtFsControlFile(%S, FSCTL_SET_SPARSE)",
status, pc.get_nt_native_path ());
| FILE_ATTRIBUTE_SPARSE_FILE);
debug_printf ("%y = NtFsControlFile(%S, FSCTL_SET_SPARSE)",
status, pc.get_nt_native_path ());
}
break;
case FALLOC_FL_KEEP_SIZE:
/* Keep track of the allocation size for overallocation below.
Note that overallocation in Windows is only temporary!
As soon as the last open handle to the file is closed, the
overallocation gets removed by the system. Also, overallocation
for sparse files fails silently, so just don't bother. */
if (offset + length > fsi.EndOfFile.QuadPart
&& !has_attribute (FILE_ATTRIBUTE_SPARSE_FILE))
fai.AllocationSize.QuadPart = offset + length;
feofi.EndOfFile.QuadPart = fsi.EndOfFile.QuadPart;
break;
}
/* Now set the new EOF */
if (feofi.EndOfFile.QuadPart != fsi.EndOfFile.QuadPart)
{
status = NtSetInformationFile (get_handle (), &io,
&feofi, sizeof feofi,
FileEndOfFileInformation);
if (!NT_SUCCESS (status))
res = geterrno_from_nt_status (status);
return geterrno_from_nt_status (status);
}
/* If called via fallocate(2) or posix_fallocate(3), allocate blocks in
sparse file holes. */
if (mode != __FALLOC_FL_TRUNCATE
&& length
&& has_attribute (FILE_ATTRIBUTE_SPARSE_FILE))
{
int res = falloc_zero_range (mode | __FALLOC_FL_ZERO_HOLES,
offset, length);
if (res)
return res;
}
/* Last but not least, set the new allocation size, if any */
if (fai.AllocationSize.QuadPart)
{
/* This is not fatal. Just note a failure in the debug output. */
status = NtSetInformationFile (get_handle (), &io,
&fai, sizeof fai,
FileAllocationInformation);
if (!NT_SUCCESS (status))
debug_printf ("%y = NtSetInformationFile(%S, "
"FileAllocationInformation)",
status, pc.get_nt_native_path ());
}
return 0;
}
int
fhandler_disk_file::falloc_punch_hole (off_t offset, off_t length)
{
NTSTATUS status;
IO_STATUS_BLOCK io;
FILE_STANDARD_INFORMATION fsi;
FILE_ZERO_DATA_INFORMATION fzi;
/* Fetch EOF */
status = NtQueryInformationFile (get_handle (), &io, &fsi, sizeof fsi,
FileStandardInformation);
if (!NT_SUCCESS (status))
return geterrno_from_nt_status (status);
if (offset > fsi.EndOfFile.QuadPart) /* no-op */
return 0;
if (offset + length > fsi.EndOfFile.QuadPart)
length = fsi.EndOfFile.QuadPart - offset;
/* If the file isn't sparse yet, make it so. */
if (!has_attribute (FILE_ATTRIBUTE_SPARSE_FILE))
{
status = NtFsControlFile (get_handle (), NULL, NULL, NULL, &io,
FSCTL_SET_SPARSE, NULL, 0, NULL, 0);
debug_printf ("%y = NtFsControlFile(%S, FSCTL_SET_SPARSE)",
status, pc.get_nt_native_path ());
if (!NT_SUCCESS (status))
return geterrno_from_nt_status (status);
pc.file_attributes (pc.file_attributes () | FILE_ATTRIBUTE_SPARSE_FILE);
}
/* Now punch a hole. For once, FSCTL_SET_ZERO_DATA does it exactly as per
fallocate(FALLOC_FL_PUNCH_HOLE) specs. */
fzi.FileOffset.QuadPart = offset;
fzi.BeyondFinalZero.QuadPart = offset + length;
status = NtFsControlFile (get_handle (), NULL, NULL, NULL, &io,
FSCTL_SET_ZERO_DATA, &fzi, sizeof fzi, NULL, 0);
if (!NT_SUCCESS (status))
return geterrno_from_nt_status (status);
return 0;
}
int
fhandler_disk_file::falloc_zero_range (int mode, off_t offset, off_t length)
{
NTSTATUS status;
IO_STATUS_BLOCK io;
FILE_STANDARD_INFORMATION fsi;
FILE_ALLOCATED_RANGE_BUFFER inp, *out = NULL;
OBJECT_ATTRIBUTES attr;
HANDLE zo_handle;
tmp_pathbuf tp;
size_t data_chunk_count = 0;
/* Fetch EOF */
status = NtQueryInformationFile (get_handle (), &io, &fsi, sizeof fsi,
FileStandardInformation);
if (!NT_SUCCESS (status))
return geterrno_from_nt_status (status);
/* offset and length must not exceed EOF with FALLOC_FL_KEEP_SIZE */
if (mode & FALLOC_FL_KEEP_SIZE)
{
if (offset > fsi.EndOfFile.QuadPart) /* no-op */
return 0;
if (offset + length > fsi.EndOfFile.QuadPart)
length = fsi.EndOfFile.QuadPart - offset;
}
/* If the file is sparse, fetch the data ranges within the file
to be able to recognize holes. */
if (has_attribute (FILE_ATTRIBUTE_SPARSE_FILE))
{
inp.FileOffset.QuadPart = offset;
inp.Length.QuadPart = length;
out = (FILE_ALLOCATED_RANGE_BUFFER *) tp.t_get ();
status = NtFsControlFile (get_handle (), NULL, NULL, NULL,
&io, FSCTL_QUERY_ALLOCATED_RANGES,
&inp, sizeof inp, out, 2 * NT_MAX_PATH);
if (NT_ERROR (status))
out = NULL;
else
data_chunk_count = io.Information / sizeof *out;
}
/* Re-open the file and use this handle ever after, so as not to
move the file pointer of the original file object. */
status = NtOpenFile (&zo_handle, SYNCHRONIZE | GENERIC_WRITE,
pc.init_reopen_attr (attr, get_handle ()), &io,
FILE_SHARE_VALID_FLAGS, get_options ());
if (!NT_SUCCESS (status))
return geterrno_from_nt_status (status);
/* FILE_SPARSE_GRANULARITY == 2 * NT_MAX_PATH ==> fits exactly */
char *nullbuf = tp.t_get ();
memset (nullbuf, 0, FILE_SPARSE_GRANULARITY);
int res = 0;
/* Split range into chunks of size FILE_SPARSE_GRANULARITY and handle
them according to being data or hole */
LARGE_INTEGER off = { QuadPart:offset };
size_t start_idx = 0;
while (length > 0)
{
off_t chunk_len;
bool in_data = true;
if (off.QuadPart % FILE_SPARSE_GRANULARITY) /* First block */
chunk_len = roundup2 (off.QuadPart, FILE_SPARSE_GRANULARITY) - off.QuadPart;
else
chunk_len = FILE_SPARSE_GRANULARITY;
if (chunk_len > length) /* First or last block */
chunk_len = length;
/* Check if the current chunk is within data or hole */
if (has_attribute (FILE_ATTRIBUTE_SPARSE_FILE)
&& off.QuadPart < fsi.EndOfFile.QuadPart)
{
in_data = false;
for (size_t idx = start_idx; idx < data_chunk_count; ++idx)
if (off.QuadPart >= out[idx].FileOffset.QuadPart)
{
/* Skip entries with lower start address next time. */
start_idx = idx;
if (off.QuadPart < out[idx].FileOffset.QuadPart
+ out[idx].Length.QuadPart)
{
in_data = true;
break;
}
}
}
/* Eventually, write zeros into the block. Completely zero out data
blocks, just write a single zero to former holes in sparse files.
If __FALLOC_FL_ZERO_HOLES has been specified, only write to holes. */
if (!(mode & __FALLOC_FL_ZERO_HOLES) || !in_data)
{
status = NtWriteFile (zo_handle, NULL, NULL, NULL, &io, nullbuf,
in_data ? chunk_len : 1, &off, NULL);
if (!NT_SUCCESS (status))
{
res = geterrno_from_nt_status (status);
break;
}
}
off.QuadPart += chunk_len;
length -= chunk_len;
}
NtClose (zo_handle);
return res;
}
int
fhandler_disk_file::fallocate (int mode, off_t offset, off_t length)
{
if (length < 0 || !get_handle ())
return EINVAL;
if (pc.isdir ())
return EISDIR;
if (!(get_access () & GENERIC_WRITE))
return EBADF;
switch (mode)
{
case 0:
case __FALLOC_FL_TRUNCATE:
case FALLOC_FL_KEEP_SIZE:
return falloc_allocate (mode, offset, length);
case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE:
/* Only if the filesystem supports it... */
if (!(pc.fs_flags () & FILE_SUPPORTS_SPARSE_FILES))
return EOPNOTSUPP;
return falloc_punch_hole (offset, length);
case FALLOC_FL_ZERO_RANGE:
case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE:
return falloc_zero_range (mode, offset, length);
default:
break;
}
return EINVAL;
}
int
fhandler_disk_file::link (const char *newpath)
{
@ -1741,7 +1952,7 @@ fhandler_disk_file::pwrite (void *buf, size_t count, off_t offset, void *aio)
&& NT_SUCCESS (NtQueryInformationFile (get_handle (),
&io, &fsi, sizeof fsi,
FileStandardInformation))
&& offset >= fsi.EndOfFile.QuadPart + (128 * 1024))
&& span_sparse_chunk (offset, fsi.EndOfFile.QuadPart))
{
NTSTATUS status;
status = NtFsControlFile (get_handle (), NULL, NULL, NULL,

View File

@ -51,6 +51,7 @@ details. */
#define FALLOC_FL_KEEP_SIZE 0x1000
/* Internal flags */
#define __FALLOC_FL_TRUNCATE 0x2000
#define __FALLOC_FL_ZERO_HOLES 0x4000
#endif
__BEGIN_DECLS

View File

@ -14,6 +14,7 @@ details. */
#include <cygwin/_socketflags.h>
#include <cygwin/_ucred.h>
#include <sys/un.h>
#include <sys/param.h>
/* It appears that 64K is the block size used for buffered I/O on NT.
Using this blocksize in read/write calls in the application results
@ -37,6 +38,15 @@ details. */
ERROR_NOT_ENOUGH_MEMORY occurs in win7 if this value is used. */
#define INREC_SIZE 2048
/* Helper function to allow checking if some offset in a file is so far
beyond EOF, that at least one sparse chunk fits into the span. */
inline bool
span_sparse_chunk (off_t new_pos, off_t old_eof)
{
return roundup2 (old_eof, FILE_SPARSE_GRANULARITY) + FILE_SPARSE_GRANULARITY
<= rounddown (new_pos, FILE_SPARSE_GRANULARITY);
}
extern const char *windows_device_names[];
extern struct __cygwin_perfile *perfile_table;
#define __fmode (*(user_data->fmode_ptr))
@ -1708,6 +1718,10 @@ class fhandler_disk_file: public fhandler_base
uint64_t fs_ioc_getflags ();
int fs_ioc_setflags (uint64_t);
falloc_allocate (int, off_t, off_t);
falloc_punch_hole (off_t, off_t);
falloc_zero_range (int, off_t, off_t);
public:
fhandler_disk_file ();
fhandler_disk_file (path_conv &pc);

View File

@ -45,6 +45,9 @@ extern GUID __cygwin_socket_guid;
#define FILE_WRITE_TO_END_OF_FILE (-1LL)
#define FILE_USE_FILE_POINTER_POSITION (-2LL)
/* Sparsification granularity on NTFS. */
#define FILE_SPARSE_GRANULARITY (64 * 1024)
/* Device Characteristics. */
#define FILE_REMOVABLE_MEDIA 0x00000001
#define FILE_READ_ONLY_DEVICE 0x00000002
@ -390,6 +393,11 @@ typedef struct _FILE_ALL_INFORMATION { // 18
FILE_NAME_INFORMATION NameInformation;
} FILE_ALL_INFORMATION, *PFILE_ALL_INFORMATION;
typedef struct _FILE_ALLOCATION_INFORMATION // 19
{
LARGE_INTEGER AllocationSize;
} FILE_ALLOCATION_INFORMATION, *PFILE_ALLOCATION_INFORMATION;
typedef struct _FILE_END_OF_FILE_INFORMATION // 20
{
LARGE_INTEGER EndOfFile;