Cygwin: fallocate(2): handle FALLOC_FL_PUNCH_HOLE and FALLOC_FL_ZERO_RANGE
Split fhandler_disk_file::fallocate into multiple methods, each implementing a different aspect of fallocate(2), thus adding FALLOC_FL_PUNCH_HOLE and FALLOC_FL_ZERO_RANGE handling. For more correctly implementing posix_fallocate(3) semantics, make sure to re-allocate holes in the given range if the file is sparse. While at it, change the way checking when to make a file sparse. The rule is now, make file sparse if the hole created by the action spans at least one sparse block, taking the allocation granularity of sparse files into account. Signed-off-by: Corinna Vinschen <corinna@vinschen.de>
This commit is contained in:
parent
f64f3eced8
commit
65831f88d6
|
@ -896,6 +896,9 @@ fhandler_base::write (const void *ptr, size_t len)
|
|||
|
||||
did_lseek (false); /* don't do it again */
|
||||
|
||||
/* If the file system supports sparse files and the application is
|
||||
writing after a long seek beyond EOF spanning more than one
|
||||
sparsifiable chunk, convert the file to a sparse file. */
|
||||
if (!(get_flags () & O_APPEND)
|
||||
&& !has_attribute (FILE_ATTRIBUTE_SPARSE_FILE)
|
||||
&& NT_SUCCESS (NtQueryInformationFile (get_output_handle (),
|
||||
|
@ -904,12 +907,9 @@ fhandler_base::write (const void *ptr, size_t len)
|
|||
&& NT_SUCCESS (NtQueryInformationFile (get_output_handle (),
|
||||
&io, &fpi, sizeof fpi,
|
||||
FilePositionInformation))
|
||||
&& fpi.CurrentByteOffset.QuadPart
|
||||
>= fsi.EndOfFile.QuadPart + (128 * 1024))
|
||||
&& span_sparse_chunk (fpi.CurrentByteOffset.QuadPart,
|
||||
fsi.EndOfFile.QuadPart))
|
||||
{
|
||||
/* If the file system supports sparse files and the application
|
||||
is writing after a long seek beyond EOF, convert the file to
|
||||
a sparse file. */
|
||||
NTSTATUS status;
|
||||
status = NtFsControlFile (get_output_handle (), NULL, NULL, NULL,
|
||||
&io, FSCTL_SET_SPARSE, NULL, 0, NULL, 0);
|
||||
|
|
|
@ -1131,89 +1131,300 @@ fhandler_disk_file::fadvise (off_t offset, off_t length, int advice)
|
|||
}
|
||||
|
||||
int
|
||||
fhandler_disk_file::fallocate (int mode, off_t offset, off_t length)
|
||||
{
|
||||
int res = 0;
|
||||
|
||||
if (length < 0 || !get_handle ())
|
||||
res = EINVAL;
|
||||
else if (pc.isdir ())
|
||||
res = EISDIR;
|
||||
else if (!(get_access () & GENERIC_WRITE))
|
||||
res = EBADF;
|
||||
else
|
||||
fhandler_disk_file::falloc_allocate (int mode, off_t offset, off_t length)
|
||||
{
|
||||
NTSTATUS status;
|
||||
IO_STATUS_BLOCK io;
|
||||
FILE_STANDARD_INFORMATION fsi;
|
||||
FILE_END_OF_FILE_INFORMATION feofi;
|
||||
FILE_ALLOCATION_INFORMATION fai = { 0 };
|
||||
|
||||
/* Fetch EOF */
|
||||
status = NtQueryInformationFile (get_handle (), &io, &fsi, sizeof fsi,
|
||||
FileStandardInformation);
|
||||
if (!NT_SUCCESS (status))
|
||||
return geterrno_from_nt_status (status);
|
||||
|
||||
/* Never change file size if FALLOC_FL_KEEP_SIZE is specified. */
|
||||
if ((mode & FALLOC_FL_KEEP_SIZE)
|
||||
&& offset + length > fsi.EndOfFile.QuadPart)
|
||||
{
|
||||
if (offset > fsi.EndOfFile.QuadPart) /* no-op */
|
||||
return 0;
|
||||
length = fsi.EndOfFile.QuadPart - offset;
|
||||
}
|
||||
mode &= ~FALLOC_FL_KEEP_SIZE;
|
||||
|
||||
switch (mode)
|
||||
{
|
||||
case 0:
|
||||
case __FALLOC_FL_TRUNCATE:
|
||||
break;
|
||||
case FALLOC_FL_PUNCH_HOLE: /* TODO */
|
||||
return EOPNOTSUPP;
|
||||
break;
|
||||
case FALLOC_FL_ZERO_RANGE: /* TODO */
|
||||
return EOPNOTSUPP;
|
||||
break;
|
||||
default:
|
||||
return EINVAL;
|
||||
}
|
||||
|
||||
if (mode == 0)
|
||||
/* For posix_fallocate(3), truncating the file is a no-op. However,
|
||||
for sparse files we still have to allocate the blocks within
|
||||
offset and offset + length which are currently in holes, due to
|
||||
the following POSIX requirement:
|
||||
"If posix_fallocate() returns successfully, subsequent writes to
|
||||
the specified file data shall not fail due to the lack of free
|
||||
space on the file system storage media." */
|
||||
if (offset + length <= fsi.EndOfFile.QuadPart)
|
||||
{
|
||||
/* If called through posix_fallocate, silently succeed if
|
||||
offset + length is less than the file's actual length. */
|
||||
|
||||
/* TODO: If the file is sparse, POSIX requires to allocate
|
||||
the holes within offset and offset + length. */
|
||||
if (offset + length < fsi.EndOfFile.QuadPart)
|
||||
if (!has_attribute (FILE_ATTRIBUTE_SPARSE_FILE))
|
||||
return 0;
|
||||
feofi.EndOfFile.QuadPart = fsi.EndOfFile.QuadPart;
|
||||
}
|
||||
|
||||
else
|
||||
feofi.EndOfFile.QuadPart = offset + length;
|
||||
/* Create sparse files only when called through ftruncate, not when
|
||||
called through posix_fallocate. */
|
||||
if (mode == __FALLOC_FL_TRUNCATE
|
||||
break;
|
||||
case __FALLOC_FL_TRUNCATE:
|
||||
/* For ftruncate(2), offset is 0. Just use length as is. */
|
||||
feofi.EndOfFile.QuadPart = length;
|
||||
|
||||
/* Make file sparse only when called through ftruncate and the mount
|
||||
mode supports sparse files. Also, make sure that the new region
|
||||
actually spans over at least one sparsifiable chunk. */
|
||||
if (pc.support_sparse ()
|
||||
&& !has_attribute (FILE_ATTRIBUTE_SPARSE_FILE)
|
||||
&& pc.support_sparse ()
|
||||
&& offset + length >= fsi.EndOfFile.QuadPart + (128 * 1024))
|
||||
&& span_sparse_chunk (feofi.EndOfFile.QuadPart,
|
||||
fsi.EndOfFile.QuadPart))
|
||||
{
|
||||
status = NtFsControlFile (get_handle (), NULL, NULL, NULL, &io,
|
||||
FSCTL_SET_SPARSE, NULL, 0, NULL, 0);
|
||||
if (NT_SUCCESS (status))
|
||||
pc.file_attributes (pc.file_attributes ()
|
||||
| FILE_ATTRIBUTE_SPARSE_FILE);
|
||||
syscall_printf ("%y = NtFsControlFile(%S, FSCTL_SET_SPARSE)",
|
||||
debug_printf ("%y = NtFsControlFile(%S, FSCTL_SET_SPARSE)",
|
||||
status, pc.get_nt_native_path ());
|
||||
}
|
||||
break;
|
||||
case FALLOC_FL_KEEP_SIZE:
|
||||
/* Keep track of the allocation size for overallocation below.
|
||||
Note that overallocation in Windows is only temporary!
|
||||
As soon as the last open handle to the file is closed, the
|
||||
overallocation gets removed by the system. Also, overallocation
|
||||
for sparse files fails silently, so just don't bother. */
|
||||
if (offset + length > fsi.EndOfFile.QuadPart
|
||||
&& !has_attribute (FILE_ATTRIBUTE_SPARSE_FILE))
|
||||
fai.AllocationSize.QuadPart = offset + length;
|
||||
|
||||
feofi.EndOfFile.QuadPart = fsi.EndOfFile.QuadPart;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Now set the new EOF */
|
||||
if (feofi.EndOfFile.QuadPart != fsi.EndOfFile.QuadPart)
|
||||
{
|
||||
status = NtSetInformationFile (get_handle (), &io,
|
||||
&feofi, sizeof feofi,
|
||||
FileEndOfFileInformation);
|
||||
if (!NT_SUCCESS (status))
|
||||
res = geterrno_from_nt_status (status);
|
||||
return geterrno_from_nt_status (status);
|
||||
}
|
||||
|
||||
/* If called via fallocate(2) or posix_fallocate(3), allocate blocks in
|
||||
sparse file holes. */
|
||||
if (mode != __FALLOC_FL_TRUNCATE
|
||||
&& length
|
||||
&& has_attribute (FILE_ATTRIBUTE_SPARSE_FILE))
|
||||
{
|
||||
int res = falloc_zero_range (mode | __FALLOC_FL_ZERO_HOLES,
|
||||
offset, length);
|
||||
if (res)
|
||||
return res;
|
||||
}
|
||||
|
||||
/* Last but not least, set the new allocation size, if any */
|
||||
if (fai.AllocationSize.QuadPart)
|
||||
{
|
||||
/* This is not fatal. Just note a failure in the debug output. */
|
||||
status = NtSetInformationFile (get_handle (), &io,
|
||||
&fai, sizeof fai,
|
||||
FileAllocationInformation);
|
||||
if (!NT_SUCCESS (status))
|
||||
debug_printf ("%y = NtSetInformationFile(%S, "
|
||||
"FileAllocationInformation)",
|
||||
status, pc.get_nt_native_path ());
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
fhandler_disk_file::falloc_punch_hole (off_t offset, off_t length)
|
||||
{
|
||||
NTSTATUS status;
|
||||
IO_STATUS_BLOCK io;
|
||||
FILE_STANDARD_INFORMATION fsi;
|
||||
FILE_ZERO_DATA_INFORMATION fzi;
|
||||
|
||||
/* Fetch EOF */
|
||||
status = NtQueryInformationFile (get_handle (), &io, &fsi, sizeof fsi,
|
||||
FileStandardInformation);
|
||||
if (!NT_SUCCESS (status))
|
||||
return geterrno_from_nt_status (status);
|
||||
|
||||
if (offset > fsi.EndOfFile.QuadPart) /* no-op */
|
||||
return 0;
|
||||
|
||||
if (offset + length > fsi.EndOfFile.QuadPart)
|
||||
length = fsi.EndOfFile.QuadPart - offset;
|
||||
|
||||
/* If the file isn't sparse yet, make it so. */
|
||||
if (!has_attribute (FILE_ATTRIBUTE_SPARSE_FILE))
|
||||
{
|
||||
status = NtFsControlFile (get_handle (), NULL, NULL, NULL, &io,
|
||||
FSCTL_SET_SPARSE, NULL, 0, NULL, 0);
|
||||
debug_printf ("%y = NtFsControlFile(%S, FSCTL_SET_SPARSE)",
|
||||
status, pc.get_nt_native_path ());
|
||||
if (!NT_SUCCESS (status))
|
||||
return geterrno_from_nt_status (status);
|
||||
pc.file_attributes (pc.file_attributes () | FILE_ATTRIBUTE_SPARSE_FILE);
|
||||
}
|
||||
|
||||
/* Now punch a hole. For once, FSCTL_SET_ZERO_DATA does it exactly as per
|
||||
fallocate(FALLOC_FL_PUNCH_HOLE) specs. */
|
||||
fzi.FileOffset.QuadPart = offset;
|
||||
fzi.BeyondFinalZero.QuadPart = offset + length;
|
||||
status = NtFsControlFile (get_handle (), NULL, NULL, NULL, &io,
|
||||
FSCTL_SET_ZERO_DATA, &fzi, sizeof fzi, NULL, 0);
|
||||
if (!NT_SUCCESS (status))
|
||||
return geterrno_from_nt_status (status);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
fhandler_disk_file::falloc_zero_range (int mode, off_t offset, off_t length)
|
||||
{
|
||||
NTSTATUS status;
|
||||
IO_STATUS_BLOCK io;
|
||||
FILE_STANDARD_INFORMATION fsi;
|
||||
FILE_ALLOCATED_RANGE_BUFFER inp, *out = NULL;
|
||||
OBJECT_ATTRIBUTES attr;
|
||||
HANDLE zo_handle;
|
||||
tmp_pathbuf tp;
|
||||
size_t data_chunk_count = 0;
|
||||
|
||||
/* Fetch EOF */
|
||||
status = NtQueryInformationFile (get_handle (), &io, &fsi, sizeof fsi,
|
||||
FileStandardInformation);
|
||||
if (!NT_SUCCESS (status))
|
||||
return geterrno_from_nt_status (status);
|
||||
|
||||
/* offset and length must not exceed EOF with FALLOC_FL_KEEP_SIZE */
|
||||
if (mode & FALLOC_FL_KEEP_SIZE)
|
||||
{
|
||||
if (offset > fsi.EndOfFile.QuadPart) /* no-op */
|
||||
return 0;
|
||||
|
||||
if (offset + length > fsi.EndOfFile.QuadPart)
|
||||
length = fsi.EndOfFile.QuadPart - offset;
|
||||
}
|
||||
|
||||
/* If the file is sparse, fetch the data ranges within the file
|
||||
to be able to recognize holes. */
|
||||
if (has_attribute (FILE_ATTRIBUTE_SPARSE_FILE))
|
||||
{
|
||||
inp.FileOffset.QuadPart = offset;
|
||||
inp.Length.QuadPart = length;
|
||||
out = (FILE_ALLOCATED_RANGE_BUFFER *) tp.t_get ();
|
||||
status = NtFsControlFile (get_handle (), NULL, NULL, NULL,
|
||||
&io, FSCTL_QUERY_ALLOCATED_RANGES,
|
||||
&inp, sizeof inp, out, 2 * NT_MAX_PATH);
|
||||
if (NT_ERROR (status))
|
||||
out = NULL;
|
||||
else
|
||||
data_chunk_count = io.Information / sizeof *out;
|
||||
}
|
||||
|
||||
/* Re-open the file and use this handle ever after, so as not to
|
||||
move the file pointer of the original file object. */
|
||||
status = NtOpenFile (&zo_handle, SYNCHRONIZE | GENERIC_WRITE,
|
||||
pc.init_reopen_attr (attr, get_handle ()), &io,
|
||||
FILE_SHARE_VALID_FLAGS, get_options ());
|
||||
if (!NT_SUCCESS (status))
|
||||
return geterrno_from_nt_status (status);
|
||||
|
||||
/* FILE_SPARSE_GRANULARITY == 2 * NT_MAX_PATH ==> fits exactly */
|
||||
char *nullbuf = tp.t_get ();
|
||||
memset (nullbuf, 0, FILE_SPARSE_GRANULARITY);
|
||||
int res = 0;
|
||||
|
||||
/* Split range into chunks of size FILE_SPARSE_GRANULARITY and handle
|
||||
them according to being data or hole */
|
||||
LARGE_INTEGER off = { QuadPart:offset };
|
||||
size_t start_idx = 0;
|
||||
while (length > 0)
|
||||
{
|
||||
off_t chunk_len;
|
||||
bool in_data = true;
|
||||
|
||||
if (off.QuadPart % FILE_SPARSE_GRANULARITY) /* First block */
|
||||
chunk_len = roundup2 (off.QuadPart, FILE_SPARSE_GRANULARITY) - off.QuadPart;
|
||||
else
|
||||
chunk_len = FILE_SPARSE_GRANULARITY;
|
||||
if (chunk_len > length) /* First or last block */
|
||||
chunk_len = length;
|
||||
|
||||
/* Check if the current chunk is within data or hole */
|
||||
if (has_attribute (FILE_ATTRIBUTE_SPARSE_FILE)
|
||||
&& off.QuadPart < fsi.EndOfFile.QuadPart)
|
||||
{
|
||||
in_data = false;
|
||||
for (size_t idx = start_idx; idx < data_chunk_count; ++idx)
|
||||
if (off.QuadPart >= out[idx].FileOffset.QuadPart)
|
||||
{
|
||||
/* Skip entries with lower start address next time. */
|
||||
start_idx = idx;
|
||||
if (off.QuadPart < out[idx].FileOffset.QuadPart
|
||||
+ out[idx].Length.QuadPart)
|
||||
{
|
||||
in_data = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Eventually, write zeros into the block. Completely zero out data
|
||||
blocks, just write a single zero to former holes in sparse files.
|
||||
If __FALLOC_FL_ZERO_HOLES has been specified, only write to holes. */
|
||||
if (!(mode & __FALLOC_FL_ZERO_HOLES) || !in_data)
|
||||
{
|
||||
status = NtWriteFile (zo_handle, NULL, NULL, NULL, &io, nullbuf,
|
||||
in_data ? chunk_len : 1, &off, NULL);
|
||||
if (!NT_SUCCESS (status))
|
||||
{
|
||||
res = geterrno_from_nt_status (status);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
off.QuadPart += chunk_len;
|
||||
length -= chunk_len;
|
||||
}
|
||||
|
||||
NtClose (zo_handle);
|
||||
return res;
|
||||
}
|
||||
|
||||
int
|
||||
fhandler_disk_file::fallocate (int mode, off_t offset, off_t length)
|
||||
{
|
||||
if (length < 0 || !get_handle ())
|
||||
return EINVAL;
|
||||
if (pc.isdir ())
|
||||
return EISDIR;
|
||||
if (!(get_access () & GENERIC_WRITE))
|
||||
return EBADF;
|
||||
|
||||
switch (mode)
|
||||
{
|
||||
case 0:
|
||||
case __FALLOC_FL_TRUNCATE:
|
||||
case FALLOC_FL_KEEP_SIZE:
|
||||
return falloc_allocate (mode, offset, length);
|
||||
case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE:
|
||||
/* Only if the filesystem supports it... */
|
||||
if (!(pc.fs_flags () & FILE_SUPPORTS_SPARSE_FILES))
|
||||
return EOPNOTSUPP;
|
||||
return falloc_punch_hole (offset, length);
|
||||
case FALLOC_FL_ZERO_RANGE:
|
||||
case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE:
|
||||
return falloc_zero_range (mode, offset, length);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return EINVAL;
|
||||
}
|
||||
|
||||
int
|
||||
fhandler_disk_file::link (const char *newpath)
|
||||
{
|
||||
|
@ -1741,7 +1952,7 @@ fhandler_disk_file::pwrite (void *buf, size_t count, off_t offset, void *aio)
|
|||
&& NT_SUCCESS (NtQueryInformationFile (get_handle (),
|
||||
&io, &fsi, sizeof fsi,
|
||||
FileStandardInformation))
|
||||
&& offset >= fsi.EndOfFile.QuadPart + (128 * 1024))
|
||||
&& span_sparse_chunk (offset, fsi.EndOfFile.QuadPart))
|
||||
{
|
||||
NTSTATUS status;
|
||||
status = NtFsControlFile (get_handle (), NULL, NULL, NULL,
|
||||
|
|
|
@ -51,6 +51,7 @@ details. */
|
|||
#define FALLOC_FL_KEEP_SIZE 0x1000
|
||||
/* Internal flags */
|
||||
#define __FALLOC_FL_TRUNCATE 0x2000
|
||||
#define __FALLOC_FL_ZERO_HOLES 0x4000
|
||||
#endif
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
|
|
@ -14,6 +14,7 @@ details. */
|
|||
#include <cygwin/_socketflags.h>
|
||||
#include <cygwin/_ucred.h>
|
||||
#include <sys/un.h>
|
||||
#include <sys/param.h>
|
||||
|
||||
/* It appears that 64K is the block size used for buffered I/O on NT.
|
||||
Using this blocksize in read/write calls in the application results
|
||||
|
@ -37,6 +38,15 @@ details. */
|
|||
ERROR_NOT_ENOUGH_MEMORY occurs in win7 if this value is used. */
|
||||
#define INREC_SIZE 2048
|
||||
|
||||
/* Helper function to allow checking if some offset in a file is so far
|
||||
beyond EOF, that at least one sparse chunk fits into the span. */
|
||||
inline bool
|
||||
span_sparse_chunk (off_t new_pos, off_t old_eof)
|
||||
{
|
||||
return roundup2 (old_eof, FILE_SPARSE_GRANULARITY) + FILE_SPARSE_GRANULARITY
|
||||
<= rounddown (new_pos, FILE_SPARSE_GRANULARITY);
|
||||
}
|
||||
|
||||
extern const char *windows_device_names[];
|
||||
extern struct __cygwin_perfile *perfile_table;
|
||||
#define __fmode (*(user_data->fmode_ptr))
|
||||
|
@ -1708,6 +1718,10 @@ class fhandler_disk_file: public fhandler_base
|
|||
uint64_t fs_ioc_getflags ();
|
||||
int fs_ioc_setflags (uint64_t);
|
||||
|
||||
falloc_allocate (int, off_t, off_t);
|
||||
falloc_punch_hole (off_t, off_t);
|
||||
falloc_zero_range (int, off_t, off_t);
|
||||
|
||||
public:
|
||||
fhandler_disk_file ();
|
||||
fhandler_disk_file (path_conv &pc);
|
||||
|
|
|
@ -45,6 +45,9 @@ extern GUID __cygwin_socket_guid;
|
|||
#define FILE_WRITE_TO_END_OF_FILE (-1LL)
|
||||
#define FILE_USE_FILE_POINTER_POSITION (-2LL)
|
||||
|
||||
/* Sparsification granularity on NTFS. */
|
||||
#define FILE_SPARSE_GRANULARITY (64 * 1024)
|
||||
|
||||
/* Device Characteristics. */
|
||||
#define FILE_REMOVABLE_MEDIA 0x00000001
|
||||
#define FILE_READ_ONLY_DEVICE 0x00000002
|
||||
|
@ -390,6 +393,11 @@ typedef struct _FILE_ALL_INFORMATION { // 18
|
|||
FILE_NAME_INFORMATION NameInformation;
|
||||
} FILE_ALL_INFORMATION, *PFILE_ALL_INFORMATION;
|
||||
|
||||
typedef struct _FILE_ALLOCATION_INFORMATION // 19
|
||||
{
|
||||
LARGE_INTEGER AllocationSize;
|
||||
} FILE_ALLOCATION_INFORMATION, *PFILE_ALLOCATION_INFORMATION;
|
||||
|
||||
typedef struct _FILE_END_OF_FILE_INFORMATION // 20
|
||||
{
|
||||
LARGE_INTEGER EndOfFile;
|
||||
|
|
Loading…
Reference in New Issue