680 lines
21 KiB
C
Raw Normal View History

/*
** 2010 April 7
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
**
** This file implements an example of a simple VFS implementation that
** omits complex features often not required or not possible on embedded
** platforms. Code is included to buffer writes to the journal file,
** which can be a significant performance improvement on some embedded
** platforms.
**
** OVERVIEW
**
** The code in this file implements a minimal SQLite VFS that can be
** used on Linux and other posix-like operating systems. The following
** system calls are used:
**
** File-system: access(), unlink(), getcwd()
** File IO: open(), read(), write(), fsync(), close(), fstat()
** Other: sleep(), usleep(), time()
**
** The following VFS features are omitted:
**
** 1. File locking. The user must ensure that there is at most one
** connection to each database when using this VFS. Multiple
** connections to a single shared-cache count as a single connection
** for the purposes of the previous statement.
**
** 2. The loading of dynamic extensions (shared libraries).
**
** 3. Temporary files. The user must configure SQLite to use in-memory
** temp files when using this VFS. The easiest way to do this is to
** compile with:
**
** -DSQLITE_TEMP_STORE=3
**
** 4. File truncation. As of version 3.6.24, SQLite may run without
** a working xTruncate() call, providing the user does not configure
** SQLite to use "journal_mode=truncate", or use both
** "journal_mode=persist" and ATTACHed databases.
**
** It is assumed that the system uses UNIX-like path-names. Specifically,
** that '/' characters are used to separate path components and that
** a path-name is a relative path unless it begins with a '/'. And that
** no UTF-8 encoded paths are greater than 512 bytes in length.
**
** JOURNAL WRITE-BUFFERING
**
** To commit a transaction to the database, SQLite first writes rollback
** information into the journal file. This usually consists of 4 steps:
**
** 1. The rollback information is sequentially written into the journal
** file, starting at the start of the file.
** 2. The journal file is synced to disk.
** 3. A modification is made to the first few bytes of the journal file.
** 4. The journal file is synced to disk again.
**
** Most of the data is written in step 1 using a series of calls to the
** VFS xWrite() method. The buffers passed to the xWrite() calls are of
** various sizes. For example, as of version 3.6.24, when committing a
** transaction that modifies 3 pages of a database file that uses 4096
** byte pages residing on a media with 512 byte sectors, SQLite makes
** eleven calls to the xWrite() method to create the rollback journal,
** as follows:
**
** Write offset | Bytes written
** ----------------------------
** 0 512
** 512 4
** 516 4096
** 4612 4
** 4616 4
** 4620 4096
** 8716 4
** 8720 4
** 8724 4096
** 12820 4
** ++++++++++++SYNC+++++++++++
** 0 12
** ++++++++++++SYNC+++++++++++
**
** On many operating systems, this is an efficient way to write to a file.
** However, on some embedded systems that do not cache writes in OS
** buffers it is much more efficient to write data in blocks that are
** an integer multiple of the sector-size in size and aligned at the
** start of a sector.
**
** To work around this, the code in this file allocates a fixed size
** buffer of SQLITE_DEMOVFS_BUFFERSZ using sqlite3_malloc() whenever a
** journal file is opened. It uses the buffer to coalesce sequential
** writes into aligned SQLITE_DEMOVFS_BUFFERSZ blocks. When SQLite
** invokes the xSync() method to sync the contents of the file to disk,
** all accumulated data is written out, even if it does not constitute
** a complete block. This means the actual IO to create the rollback
** journal for the example transaction above is this:
**
** Write offset | Bytes written
** ----------------------------
** 0 8192
** 8192 4632
** ++++++++++++SYNC+++++++++++
** 0 12
** ++++++++++++SYNC+++++++++++
**
** Much more efficient if the underlying OS is not caching write
** operations.
*/
#if !defined(SQLITE_TEST) || SQLITE_OS_UNIX
#include <sqlite3.h>
#include <assert.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/file.h>
#include <sys/param.h>
#include <unistd.h>
#include <time.h>
#include <errno.h>
#include <fcntl.h>
/*
** Size of the write buffer used by journal files in bytes.
*/
#ifndef SQLITE_DEMOVFS_BUFFERSZ
# define SQLITE_DEMOVFS_BUFFERSZ 8192
#endif
/*
** The maximum pathname length supported by this VFS.
*/
#define MAXPATHNAME 512
/*
** When using this VFS, the sqlite3_file* handles that SQLite uses are
** actually pointers to instances of type DemoFile.
*/
typedef struct DemoFile DemoFile;
struct DemoFile {
sqlite3_file base; /* Base class. Must be first. */
int fd; /* File descriptor */
char *aBuffer; /* Pointer to malloc'd buffer */
int nBuffer; /* Valid bytes of data in zBuffer */
sqlite3_int64 iBufferOfst; /* Offset in file of zBuffer[0] */
};
/*
** Write directly to the file passed as the first argument. Even if the
** file has a write-buffer (DemoFile.aBuffer), ignore it.
*/
static int demoDirectWrite(
DemoFile *p, /* File handle */
const void *zBuf, /* Buffer containing data to write */
int iAmt, /* Size of data to write in bytes */
sqlite_int64 iOfst /* File offset to write to */
){
off_t ofst; /* Return value from lseek() */
size_t nWrite; /* Return value from write() */
ofst = lseek(p->fd, iOfst, SEEK_SET);
if( ofst!=iOfst ){
return SQLITE_IOERR_WRITE;
}
nWrite = write(p->fd, zBuf, iAmt);
if( nWrite!=iAmt ){
return SQLITE_IOERR_WRITE;
}
return SQLITE_OK;
}
/*
** Flush the contents of the DemoFile.aBuffer buffer to disk. This is a
** no-op if this particular file does not have a buffer (i.e. it is not
** a journal file) or if the buffer is currently empty.
*/
static int demoFlushBuffer(DemoFile *p){
int rc = SQLITE_OK;
if( p->nBuffer ){
rc = demoDirectWrite(p, p->aBuffer, p->nBuffer, p->iBufferOfst);
p->nBuffer = 0;
}
return rc;
}
/*
** Close a file.
*/
static int demoClose(sqlite3_file *pFile){
int rc;
DemoFile *p = (DemoFile*)pFile;
rc = demoFlushBuffer(p);
sqlite3_free(p->aBuffer);
close(p->fd);
return rc;
}
/*
** Read data from a file.
*/
static int demoRead(
sqlite3_file *pFile,
void *zBuf,
int iAmt,
sqlite_int64 iOfst
){
DemoFile *p = (DemoFile*)pFile;
off_t ofst; /* Return value from lseek() */
int nRead; /* Return value from read() */
int rc; /* Return code from demoFlushBuffer() */
/* Flush any data in the write buffer to disk in case this operation
** is trying to read data the file-region currently cached in the buffer.
** It would be possible to detect this case and possibly save an
** unnecessary write here, but in practice SQLite will rarely read from
** a journal file when there is data cached in the write-buffer.
*/
rc = demoFlushBuffer(p);
if( rc!=SQLITE_OK ){
return rc;
}
ofst = lseek(p->fd, iOfst, SEEK_SET);
if( ofst!=iOfst ){
return SQLITE_IOERR_READ;
}
nRead = read(p->fd, zBuf, iAmt);
if( nRead==iAmt ){
return SQLITE_OK;
}else if( nRead>=0 ){
return SQLITE_IOERR_SHORT_READ;
}
return SQLITE_IOERR_READ;
}
/*
** Write data to a crash-file.
*/
static int demoWrite(
sqlite3_file *pFile,
const void *zBuf,
int iAmt,
sqlite_int64 iOfst
){
DemoFile *p = (DemoFile*)pFile;
if( p->aBuffer ){
char *z = (char *)zBuf; /* Pointer to remaining data to write */
int n = iAmt; /* Number of bytes at z */
sqlite3_int64 i = iOfst; /* File offset to write to */
while( n>0 ){
int nCopy; /* Number of bytes to copy into buffer */
/* If the buffer is full, or if this data is not being written directly
** following the data already buffered, flush the buffer. Flushing
** the buffer is a no-op if it is empty.
*/
if( p->nBuffer==SQLITE_DEMOVFS_BUFFERSZ || p->iBufferOfst+p->nBuffer!=i ){
int rc = demoFlushBuffer(p);
if( rc!=SQLITE_OK ){
return rc;
}
}
assert( p->nBuffer==0 || p->iBufferOfst+p->nBuffer==i );
p->iBufferOfst = i - p->nBuffer;
/* Copy as much data as possible into the buffer. */
nCopy = SQLITE_DEMOVFS_BUFFERSZ - p->nBuffer;
if( nCopy>n ){
nCopy = n;
}
memcpy(&p->aBuffer[p->nBuffer], z, nCopy);
p->nBuffer += nCopy;
n -= nCopy;
i += nCopy;
z += nCopy;
}
}else{
return demoDirectWrite(p, zBuf, iAmt, iOfst);
}
return SQLITE_OK;
}
/*
** Truncate a file. This is a no-op for this VFS (see header comments at
** the top of the file).
*/
static int demoTruncate(sqlite3_file *pFile, sqlite_int64 size){
#if 0
if( ftruncate(((DemoFile *)pFile)->fd, size) ) return SQLITE_IOERR_TRUNCATE;
#endif
return SQLITE_OK;
}
/*
** Sync the contents of the file to the persistent media.
*/
static int demoSync(sqlite3_file *pFile, int flags){
DemoFile *p = (DemoFile*)pFile;
int rc;
rc = demoFlushBuffer(p);
if( rc!=SQLITE_OK ){
return rc;
}
rc = fsync(p->fd);
return (rc==0 ? SQLITE_OK : SQLITE_IOERR_FSYNC);
}
/*
** Write the size of the file in bytes to *pSize.
*/
static int demoFileSize(sqlite3_file *pFile, sqlite_int64 *pSize){
DemoFile *p = (DemoFile*)pFile;
int rc; /* Return code from fstat() call */
struct stat sStat; /* Output of fstat() call */
/* Flush the contents of the buffer to disk. As with the flush in the
** demoRead() method, it would be possible to avoid this and save a write
** here and there. But in practice this comes up so infrequently it is
** not worth the trouble.
*/
rc = demoFlushBuffer(p);
if( rc!=SQLITE_OK ){
return rc;
}
rc = fstat(p->fd, &sStat);
if( rc!=0 ) return SQLITE_IOERR_FSTAT;
*pSize = sStat.st_size;
return SQLITE_OK;
}
/*
** Locking functions. The xLock() and xUnlock() methods are both no-ops.
** The xCheckReservedLock() always indicates that no other process holds
** a reserved lock on the database file. This ensures that if a hot-journal
** file is found in the file-system it is rolled back.
*/
static int demoLock(sqlite3_file *pFile, int eLock){
return SQLITE_OK;
}
static int demoUnlock(sqlite3_file *pFile, int eLock){
return SQLITE_OK;
}
static int demoCheckReservedLock(sqlite3_file *pFile, int *pResOut){
*pResOut = 0;
return SQLITE_OK;
}
/*
** No xFileControl() verbs are implemented by this VFS.
*/
static int demoFileControl(sqlite3_file *pFile, int op, void *pArg){
return SQLITE_OK;
}
/*
** The xSectorSize() and xDeviceCharacteristics() methods. These two
** may return special values allowing SQLite to optimize file-system
** access to some extent. But it is also safe to simply return 0.
*/
static int demoSectorSize(sqlite3_file *pFile){
return 0;
}
static int demoDeviceCharacteristics(sqlite3_file *pFile){
return 0;
}
/*
** Open a file handle.
*/
static int demoOpen(
sqlite3_vfs *pVfs, /* VFS */
const char *zName, /* File to open, or 0 for a temp file */
sqlite3_file *pFile, /* Pointer to DemoFile struct to populate */
int flags, /* Input SQLITE_OPEN_XXX flags */
int *pOutFlags /* Output SQLITE_OPEN_XXX flags (or NULL) */
){
static const sqlite3_io_methods demoio = {
1, /* iVersion */
demoClose, /* xClose */
demoRead, /* xRead */
demoWrite, /* xWrite */
demoTruncate, /* xTruncate */
demoSync, /* xSync */
demoFileSize, /* xFileSize */
demoLock, /* xLock */
demoUnlock, /* xUnlock */
demoCheckReservedLock, /* xCheckReservedLock */
demoFileControl, /* xFileControl */
demoSectorSize, /* xSectorSize */
demoDeviceCharacteristics /* xDeviceCharacteristics */
};
DemoFile *p = (DemoFile*)pFile; /* Populate this structure */
int oflags = 0; /* flags to pass to open() call */
char *aBuf = 0;
if( zName==0 ){
return SQLITE_IOERR;
}
if( flags&SQLITE_OPEN_MAIN_JOURNAL ){
aBuf = (char *)sqlite3_malloc(SQLITE_DEMOVFS_BUFFERSZ);
if( !aBuf ){
return SQLITE_NOMEM;
}
}
if( flags&SQLITE_OPEN_EXCLUSIVE ) oflags |= O_EXCL;
if( flags&SQLITE_OPEN_CREATE ) oflags |= O_CREAT;
if( flags&SQLITE_OPEN_READONLY ) oflags |= O_RDONLY;
if( flags&SQLITE_OPEN_READWRITE ) oflags |= O_RDWR;
memset(p, 0, sizeof(DemoFile));
p->fd = open(zName, oflags, 0600);
if( p->fd<0 ){
sqlite3_free(aBuf);
return SQLITE_CANTOPEN;
}
p->aBuffer = aBuf;
if( pOutFlags ){
*pOutFlags = flags;
}
p->base.pMethods = &demoio;
return SQLITE_OK;
}
/*
** Delete the file identified by argument zPath. If the dirSync parameter
** is non-zero, then ensure the file-system modification to delete the
** file has been synced to disk before returning.
*/
static int demoDelete(sqlite3_vfs *pVfs, const char *zPath, int dirSync){
int rc; /* Return code */
rc = unlink(zPath);
if( rc!=0 && errno==ENOENT ) return SQLITE_OK;
if( rc==0 && dirSync ){
int dfd; /* File descriptor open on directory */
int i; /* Iterator variable */
char zDir[MAXPATHNAME+1]; /* Name of directory containing file zPath */
/* Figure out the directory name from the path of the file deleted. */
sqlite3_snprintf(MAXPATHNAME, zDir, "%s", zPath);
zDir[MAXPATHNAME] = '\0';
for(i=strlen(zDir); i>1 && zDir[i]!='/'; i++);
zDir[i] = '\0';
/* Open a file-descriptor on the directory. Sync. Close. */
dfd = open(zDir, O_RDONLY, 0);
if( dfd<0 ){
rc = -1;
}else{
rc = fsync(dfd);
close(dfd);
}
}
return (rc==0 ? SQLITE_OK : SQLITE_IOERR_DELETE);
}
#ifndef F_OK
# define F_OK 0
#endif
#ifndef R_OK
# define R_OK 4
#endif
#ifndef W_OK
# define W_OK 2
#endif
/*
** Query the file-system to see if the named file exists, is readable or
** is both readable and writable.
*/
static int demoAccess(
sqlite3_vfs *pVfs,
const char *zPath,
int flags,
int *pResOut
){
int rc; /* access() return code */
int eAccess = F_OK; /* Second argument to access() */
assert( flags==SQLITE_ACCESS_EXISTS /* access(zPath, F_OK) */
|| flags==SQLITE_ACCESS_READ /* access(zPath, R_OK) */
|| flags==SQLITE_ACCESS_READWRITE /* access(zPath, R_OK|W_OK) */
);
if( flags==SQLITE_ACCESS_READWRITE ) eAccess = R_OK|W_OK;
if( flags==SQLITE_ACCESS_READ ) eAccess = R_OK;
rc = access(zPath, eAccess);
*pResOut = (rc==0);
return SQLITE_OK;
}
/*
** Argument zPath points to a nul-terminated string containing a file path.
** If zPath is an absolute path, then it is copied as is into the output
** buffer. Otherwise, if it is a relative path, then the equivalent full
** path is written to the output buffer.
**
** This function assumes that paths are UNIX style. Specifically, that:
**
** 1. Path components are separated by a '/'. and
** 2. Full paths begin with a '/' character.
*/
static int demoFullPathname(
sqlite3_vfs *pVfs, /* VFS */
const char *zPath, /* Input path (possibly a relative path) */
int nPathOut, /* Size of output buffer in bytes */
char *zPathOut /* Pointer to output buffer */
){
char zDir[MAXPATHNAME+1];
if( zPath[0]=='/' ){
zDir[0] = '\0';
}else{
if( getcwd(zDir, sizeof(zDir))==0 ) return SQLITE_IOERR;
}
zDir[MAXPATHNAME] = '\0';
sqlite3_snprintf(nPathOut, zPathOut, "%s/%s", zDir, zPath);
zPathOut[nPathOut-1] = '\0';
return SQLITE_OK;
}
/*
** The following four VFS methods:
**
** xDlOpen
** xDlError
** xDlSym
** xDlClose
**
** are supposed to implement the functionality needed by SQLite to load
** extensions compiled as shared objects. This simple VFS does not support
** this functionality, so the following functions are no-ops.
*/
static void *demoDlOpen(sqlite3_vfs *pVfs, const char *zPath){
return 0;
}
static void demoDlError(sqlite3_vfs *pVfs, int nByte, char *zErrMsg){
sqlite3_snprintf(nByte, zErrMsg, "Loadable extensions are not supported");
zErrMsg[nByte-1] = '\0';
}
static void (*demoDlSym(sqlite3_vfs *pVfs, void *pH, const char *z))(void){
return 0;
}
static void demoDlClose(sqlite3_vfs *pVfs, void *pHandle){
return;
}
/*
** Parameter zByte points to a buffer nByte bytes in size. Populate this
** buffer with pseudo-random data.
*/
static int demoRandomness(sqlite3_vfs *pVfs, int nByte, char *zByte){
return SQLITE_OK;
}
/*
** Sleep for at least nMicro microseconds. Return the (approximate) number
** of microseconds slept for.
*/
static int demoSleep(sqlite3_vfs *pVfs, int nMicro){
sleep(nMicro / 1000000);
usleep(nMicro % 1000000);
return nMicro;
}
/*
** Set *pTime to the current UTC time expressed as a Julian day. Return
** SQLITE_OK if successful, or an error code otherwise.
**
** http://en.wikipedia.org/wiki/Julian_day
**
** This implementation is not very good. The current time is rounded to
** an integer number of seconds. Also, assuming time_t is a signed 32-bit
** value, it will stop working some time in the year 2038 AD (the so-called
** "year 2038" problem that afflicts systems that store time this way).
*/
static int demoCurrentTime(sqlite3_vfs *pVfs, double *pTime){
time_t t = time(0);
*pTime = t/86400.0 + 2440587.5;
return SQLITE_OK;
}
/*
** This function returns a pointer to the VFS implemented in this file.
** To make the VFS available to SQLite:
**
** sqlite3_vfs_register(sqlite3_demovfs(), 0);
*/
sqlite3_vfs *sqlite3_demovfs(void){
static sqlite3_vfs demovfs = {
1, /* iVersion */
sizeof(DemoFile), /* szOsFile */
MAXPATHNAME, /* mxPathname */
0, /* pNext */
"demo", /* zName */
0, /* pAppData */
demoOpen, /* xOpen */
demoDelete, /* xDelete */
demoAccess, /* xAccess */
demoFullPathname, /* xFullPathname */
demoDlOpen, /* xDlOpen */
demoDlError, /* xDlError */
demoDlSym, /* xDlSym */
demoDlClose, /* xDlClose */
demoRandomness, /* xRandomness */
demoSleep, /* xSleep */
demoCurrentTime, /* xCurrentTime */
};
return &demovfs;
}
#endif /* !defined(SQLITE_TEST) || SQLITE_OS_UNIX */
#ifdef SQLITE_TEST
#include <tcl.h>
#if SQLITE_OS_UNIX
static int register_demovfs(
ClientData clientData, /* Pointer to sqlite3_enable_XXX function */
Tcl_Interp *interp, /* The TCL interpreter that invoked this command */
int objc, /* Number of arguments */
Tcl_Obj *CONST objv[] /* Command arguments */
){
sqlite3_vfs_register(sqlite3_demovfs(), 1);
return TCL_OK;
}
static int unregister_demovfs(
ClientData clientData, /* Pointer to sqlite3_enable_XXX function */
Tcl_Interp *interp, /* The TCL interpreter that invoked this command */
int objc, /* Number of arguments */
Tcl_Obj *CONST objv[] /* Command arguments */
){
sqlite3_vfs_unregister(sqlite3_demovfs());
return TCL_OK;
}
/*
** Register commands with the TCL interpreter.
*/
int Sqlitetest_demovfs_Init(Tcl_Interp *interp){
Tcl_CreateObjCommand(interp, "register_demovfs", register_demovfs, 0, 0);
Tcl_CreateObjCommand(interp, "unregister_demovfs", unregister_demovfs, 0, 0);
return TCL_OK;
}
#else
int Sqlitetest_demovfs_Init(Tcl_Interp *interp){ return TCL_OK; }
#endif
#endif /* SQLITE_TEST */