diff --git a/winsup/cygwin/ChangeLog b/winsup/cygwin/ChangeLog index 5d2dd3ede..a1b52ffad 100644 --- a/winsup/cygwin/ChangeLog +++ b/winsup/cygwin/ChangeLog @@ -1,3 +1,15 @@ +2008-07-29 Corinna Vinschen + + * Makefile.in (DLL_OFILES): Remove v8 regexp files. + (OBSOLETE_FUNCTIONS): Remove v8 regexp functions. + (NEW_FUNCTIONS): Remove POSIX regex functions. + * cygwin.din: Export POSIX regex functions with their correct symbol + name. Export with posix_ prefix for backward compatibility. + * syscalls.cc (regfree): Remove ancient fake function. + * regex/regex.h: Remove renaming regex functions within Cygwin. + * regexp/*: Remove. + * include /cygwin/version,.h: Bump API minor number. + 2008-07-27 Corinna Vinschen Christopher Faylor diff --git a/winsup/cygwin/Makefile.in b/winsup/cygwin/Makefile.in index 9d13bb0d9..e9b96629c 100644 --- a/winsup/cygwin/Makefile.in +++ b/winsup/cygwin/Makefile.in @@ -144,14 +144,13 @@ DLL_OFILES:=assert.o autoload.o bsdlib.o ctype.o cxx.o cygheap.o cygthread.o \ sec_helper.o security.o select.o sem.o shared.o shm.o sigfe.o signal.o \ sigproc.o smallprint.o spawn.o strace.o strfuncs.o strptime.o strsep.o \ strsig.o sync.o syscalls.o sysconf.o syslog.o termios.o thread.o \ - timer.o times.o tls_pbuf.o tty.o uinfo.o uname.o v8_regexp.o \ - v8_regerror.o v8_regsub.o wait.o wincap.o window.o winf.o xsique.o \ + timer.o times.o tls_pbuf.o tty.o uinfo.o uname.o wait.o wincap.o \ + window.o winf.o xsique.o \ $(EXTRA_DLL_OFILES) $(EXTRA_OFILES) $(MALLOC_OFILES) $(MT_SAFE_OBJECTS) GMON_OFILES:=gmon.o mcount.o profil.o -OBSOLETE_FUNCTIONS:=regcomp regerror regexec regfree regsub \ - open acl aclcheck aclfrommode aclfrompbits \ +OBSOLETE_FUNCTIONS:=open acl aclcheck aclfrommode aclfrompbits \ aclfromtext aclsort acltomode acltopbits \ acltotext chown facl fchown fcntl fdopen fgetpos fopen \ freopen fseeko fsetpos fstat ftello ftruncate \ @@ -161,11 +160,7 @@ OBSOLETE_FUNCTIONS:=regcomp regerror regexec regfree regsub \ setgid setgroups setregid setreuid setuid stat \ telldir tmpfile truncate timezone -NEW_FUNCTIONS:=regcomp posix_regcomp \ - regerror posix_regerror \ - regexec posix_regexec \ - regfree posix_regfree \ - open _open64 \ +NEW_FUNCTIONS:=open _open64 \ acl _acl32 \ aclcheck _aclcheck32 \ aclfrommode _aclfrommode32 \ diff --git a/winsup/cygwin/cygwin.din b/winsup/cygwin/cygwin.din index e418f4033..1dac2268d 100644 --- a/winsup/cygwin/cygwin.din +++ b/winsup/cygwin/cygwin.din @@ -1054,10 +1054,10 @@ posix_fallocate SIGFE posix_madvise SIGFE posix_memalign SIGFE posix_openpt SIGFE -posix_regcomp SIGFE -posix_regerror SIGFE -posix_regexec SIGFE -posix_regfree SIGFE +posix_regcomp = regcomp SIGFE +posix_regerror = regerror SIGFE +posix_regexec = regexec SIGFE +posix_regfree = regfree SIGFE pow NOSIGFE _pow = pow NOSIGFE pow10 NOSIGFE @@ -1192,6 +1192,10 @@ realpath SIGFE recv = cygwin_recv SIGFE recvfrom = cygwin_recvfrom SIGFE recvmsg = cygwin_recvmsg SIGFE +regcomp SIGFE +regerror SIGFE +regexec SIGFE +regfree SIGFE remainder NOSIGFE _remainder = remainder NOSIGFE remainderf NOSIGFE diff --git a/winsup/cygwin/include/cygwin/version.h b/winsup/cygwin/include/cygwin/version.h index 1abf43d12..0b824318b 100644 --- a/winsup/cygwin/include/cygwin/version.h +++ b/winsup/cygwin/include/cygwin/version.h @@ -332,12 +332,13 @@ details. */ linkat, mkdirat, mkfifoat, mknodat, readlinkat, renameat, symlinkat, unlinkat. 185: Export futimens, utimensat. + 186: Remove ancient V8 regexp functions. */ /* Note that we forgot to bump the api for ualarm, strtoll, strtoull */ #define CYGWIN_VERSION_API_MAJOR 0 -#define CYGWIN_VERSION_API_MINOR 185 +#define CYGWIN_VERSION_API_MINOR 186 /* There is also a compatibity version number associated with the shared memory regions. It is incremented when incompatible diff --git a/winsup/cygwin/regex/regex.h b/winsup/cygwin/regex/regex.h index ecba140b0..03a182555 100644 --- a/winsup/cygwin/regex/regex.h +++ b/winsup/cygwin/regex/regex.h @@ -6,13 +6,6 @@ extern "C" { #endif -#ifdef __INSIDE_CYGWIN__ -#define regcomp posix_regcomp -#define regerror posix_regerror -#define regexec posix_regexec -#define regfree posix_regfree -#endif - /* === regex2.h === */ typedef _off_t regoff_t; typedef struct { diff --git a/winsup/cygwin/regexp/COPYRIGHT b/winsup/cygwin/regexp/COPYRIGHT deleted file mode 100644 index 48b3f4339..000000000 --- a/winsup/cygwin/regexp/COPYRIGHT +++ /dev/null @@ -1,22 +0,0 @@ -This entire subtree is copyright the University of Toronto. -The following copyright notice applies to all files found here. None of -these files contain AT&T proprietary source code. -_____________________________________________________________________________ - - Copyright (c) 1986 by University of Toronto. - Written by Henry Spencer. Not derived from licensed software. - - Permission is granted to anyone to use this software for any - purpose on any computer system, and to redistribute it freely, - subject to the following restrictions: - - 1. The author is not responsible for the consequences of use of - this software, no matter how awful, even if they arise - from defects in it. - - 2. The origin of this software must not be misrepresented, either - by explicit claim or by omission. - - 3. Altered versions must be plainly marked as such, and must not - be misrepresented as being the original software. - diff --git a/winsup/cygwin/regexp/README b/winsup/cygwin/regexp/README deleted file mode 100644 index 37d6f51c7..000000000 --- a/winsup/cygwin/regexp/README +++ /dev/null @@ -1,84 +0,0 @@ -This is a nearly-public-domain reimplementation of the V8 regexp(3) package. -It gives C programs the ability to use egrep-style regular expressions, and -does it in a much cleaner fashion than the analogous routines in SysV. - - Copyright (c) 1986 by University of Toronto. - Written by Henry Spencer. Not derived from licensed software. - - Permission is granted to anyone to use this software for any - purpose on any computer system, and to redistribute it freely, - subject to the following restrictions: - - 1. The author is not responsible for the consequences of use of - this software, no matter how awful, even if they arise - from defects in it. - - 2. The origin of this software must not be misrepresented, either - by explicit claim or by omission. - - 3. Altered versions must be plainly marked as such, and must not - be misrepresented as being the original software. - -Barring a couple of small items in the BUGS list, this implementation is -believed 100% compatible with V8. It should even be binary-compatible, -sort of, since the only fields in a "struct regexp" that other people have -any business touching are declared in exactly the same way at the same -location in the struct (the beginning). - -This implementation is *NOT* AT&T/Bell code, and is not derived from licensed -software. Even though U of T is a V8 licensee. This software is based on -a V8 manual page sent to me by Dennis Ritchie (the manual page enclosed -here is a complete rewrite and hence is not covered by AT&T copyright). -The software was nearly complete at the time of arrival of our V8 tape. -I haven't even looked at V8 yet, although a friend elsewhere at U of T has -been kind enough to run a few test programs using the V8 regexp(3) to resolve -a few fine points. I admit to some familiarity with regular-expression -implementations of the past, but the only one that this code traces any -ancestry to is the one published in Kernighan & Plauger (from which this -one draws ideas but not code). - -Simplistically: put this stuff into a source directory, copy regexp.h into -/usr/include, inspect Makefile for compilation options that need changing -to suit your local environment, and then do "make r". This compiles the -regexp(3) functions, compiles a test program, and runs a large set of -regression tests. If there are no complaints, then put regexp.o, regsub.o, -and regerror.o into your C library, and regexp.3 into your manual-pages -directory. - -Note that if you don't put regexp.h into /usr/include *before* compiling, -you'll have to add "-I." to CFLAGS before compiling. - -The files are: - -Makefile instructions to make everything -regexp.3 manual page -regexp.h header file, for /usr/include -regexp.c source for regcomp() and regexec() -regsub.c source for regsub() -regerror.c source for default regerror() -regmagic.h internal header file -try.c source for test program -timer.c source for timing program -tests test list for try and timer - -This implementation uses nondeterministic automata rather than the -deterministic ones found in some other implementations, which makes it -simpler, smaller, and faster at compiling regular expressions, but slower -at executing them. In theory, anyway. This implementation does employ -some special-case optimizations to make the simpler cases (which do make -up the bulk of regular expressions actually used) run quickly. In general, -if you want blazing speed you're in the wrong place. Replacing the insides -of egrep with this stuff is probably a mistake; if you want your own egrep -you're going to have to do a lot more work. But if you want to use regular -expressions a little bit in something else, you're in luck. Note that many -existing text editors use nondeterministic regular-expression implementations, -so you're in good company. - -This stuff should be pretty portable, given appropriate option settings. -If your chars have less than 8 bits, you're going to have to change the -internal representation of the automaton, although knowledge of the details -of this is fairly localized. There are no "reserved" char values except for -NUL, and no special significance is attached to the top bit of chars. -The string(3) functions are used a fair bit, on the grounds that they are -probably faster than coding the operations in line. Some attempts at code -tuning have been made, but this is invariably a bit machine-specific. diff --git a/winsup/cygwin/regexp/regexp.h b/winsup/cygwin/regexp/regexp.h deleted file mode 100644 index 0850d70a4..000000000 --- a/winsup/cygwin/regexp/regexp.h +++ /dev/null @@ -1,34 +0,0 @@ -/* regexp.h - - Copyright 1996, 2001 Red Hat, Inc. - -This file is part of Cygwin. - -This software is a copyrighted work licensed under the terms of the -Cygwin license. Please consult the file "CYGWIN_LICENSE" for -details. */ - -/* - * Definitions etc. for regexp(3) routines. - * - * Caveat: this is V8 regexp(3) [actually, a reimplementation thereof], - * not the System V one. - * - * $Id$ - */ - -#define NSUBEXP 10 -typedef struct regexp { - char *startp[NSUBEXP]; - char *endp[NSUBEXP]; - char regstart; /* Internal use only. */ - char reganch; /* Internal use only. */ - char *regmust; /* Internal use only. */ - int regmlen; /* Internal use only. */ - char program[1]; /* Unwarranted chumminess with compiler. */ -} regexp; - -extern regexp *regcomp(); -extern int regexec(); -extern void regsub(); -extern void regerror(); diff --git a/winsup/cygwin/regexp/regmagic.h b/winsup/cygwin/regexp/regmagic.h deleted file mode 100644 index ea7835711..000000000 --- a/winsup/cygwin/regexp/regmagic.h +++ /dev/null @@ -1,17 +0,0 @@ -/* regmagic.h - - Copyright 1996, 2001 Red Hat, Inc. - -This file is part of Cygwin. - -This software is a copyrighted work licensed under the terms of the -Cygwin license. Please consult the file "CYGWIN_LICENSE" for -details. */ - -/* $Id$ */ - -/* - * The first byte of the regexp internal "program" is actually this magic - * number; the start node begins in the second byte. - */ -#define MAGIC 0234 diff --git a/winsup/cygwin/regexp/v8_regerror.c b/winsup/cygwin/regexp/v8_regerror.c deleted file mode 100644 index 56d63ff2f..000000000 --- a/winsup/cygwin/regexp/v8_regerror.c +++ /dev/null @@ -1,28 +0,0 @@ -/* regerror.c - - Copyright 1996, 1998, 2001 Red Hat, Inc. - -This file is part of Cygwin. - -This software is a copyrighted work licensed under the terms of the -Cygwin license. Please consult the file "CYGWIN_LICENSE" for -details. */ - -#include "winsup.h" -#include "regexp.h" -#include - -void __declspec(dllexport) -regerror(const char *s __attribute__ ((unused))) -{ -#ifdef ERRAVAIL - error("regexp: %s", s); -#else -/* - fprintf(stderr, "regexp(3): %s\n", s); - exit(1); -*/ - return; /* let std. egrep handle errors */ -#endif - /* NOTREACHED */ -} diff --git a/winsup/cygwin/regexp/v8_regexp.c b/winsup/cygwin/regexp/v8_regexp.c deleted file mode 100644 index fef1267a0..000000000 --- a/winsup/cygwin/regexp/v8_regexp.c +++ /dev/null @@ -1,1321 +0,0 @@ -/* - * regcomp and regexec -- regsub and regerror are elsewhere - * - * Copyright (c) 1986 by University of Toronto. - * Written by Henry Spencer. Not derived from licensed software. - * - * Permission is granted to anyone to use this software for any - * purpose on any computer system, and to redistribute it freely, - * subject to the following restrictions: - * - * 1. The author is not responsible for the consequences of use of - * this software, no matter how awful, even if they arise - * from defects in it. - * - * 2. The origin of this software must not be misrepresented, either - * by explicit claim or by omission. - * - * 3. Altered versions must be plainly marked as such, and must not - * be misrepresented as being the original software. - *** THIS IS AN ALTERED VERSION. It was altered by John Gilmore, - *** hoptoad!gnu, on 27 Dec 1986, to add \n as an alternative to | - *** to assist in implementing egrep. - *** THIS IS AN ALTERED VERSION. It was altered by John Gilmore, - *** hoptoad!gnu, on 27 Dec 1986, to add \< and \> for word-matching - *** as in BSD grep and ex. - *** THIS IS AN ALTERED VERSION. It was altered by John Gilmore, - *** hoptoad!gnu, on 28 Dec 1986, to optimize characters quoted with \. - *** THIS IS AN ALTERED VERSION. It was altered by James A. Woods, - *** ames!jaw, on 19 June 1987, to quash a regcomp() redundancy. - *** THIS IS AN ALTERED VERSION. It was altered by Geoffrey Noer, - *** noer@cygnus.com, on 6 Oct 1997, to change the prototype format - *** for inclusion in the Cygwin32 library. - * - * Beware that some of this code is subtly aware of the way operator - * precedence is structured in regular expressions. Serious changes in - * regular-expression syntax might require a total rethink. - */ - -#include "winsup.h" -#include "regexp.h" -#include -#include -#include -#include -#include "regmagic.h" - -/* - * The "internal use only" fields in regexp.h are present to pass info from - * compile to execute that permits the execute phase to run lots faster on - * simple cases. They are: - * - * regstart char that must begin a match; '\0' if none obvious - * reganch is the match anchored (at beginning-of-line only)? - * regmust string (pointer into program) that match must include, or NULL - * regmlen length of regmust string - * - * Regstart and reganch permit very fast decisions on suitable starting points - * for a match, cutting down the work a lot. Regmust permits fast rejection - * of lines that cannot possibly match. The regmust tests are costly enough - * that regcomp() supplies a regmust only if the r.e. contains something - * potentially expensive (at present, the only such thing detected is * or + - * at the start of the r.e., which can involve a lot of backup). Regmlen is - * supplied because the test in regexec() needs it and regcomp() is computing - * it anyway. - */ - -/* - * Structure for regexp "program". This is essentially a linear encoding - * of a nondeterministic finite-state machine (aka syntax charts or - * "railroad normal form" in parsing technology). Each node is an opcode - * plus a "next" pointer, possibly plus an operand. "Next" pointers of - * all nodes except BRANCH implement concatenation; a "next" pointer with - * a BRANCH on both ends of it is connecting two alternatives. (Here we - * have one of the subtle syntax dependencies: an individual BRANCH (as - * opposed to a collection of them) is never concatenated with anything - * because of operator precedence.) The operand of some types of node is - * a literal string; for others, it is a node leading into a sub-FSM. In - * particular, the operand of a BRANCH node is the first node of the branch. - * (NB this is *not* a tree structure: the tail of the branch connects - * to the thing following the set of BRANCHes.) The opcodes are: - */ - -/* definition number opnd? meaning */ -#define END 0 /* no End of program. */ -#define BOL 1 /* no Match "" at beginning of line. */ -#define EOL 2 /* no Match "" at end of line. */ -#define ANY 3 /* no Match any one character. */ -#define ANYOF 4 /* str Match any character in this string. */ -#define ANYBUT 5 /* str Match any character not in this string. */ -#define BRANCH 6 /* node Match this alternative, or the next... */ -#define BACK 7 /* no Match "", "next" ptr points backward. */ -#define EXACTLY 8 /* str Match this string. */ -#define NOTHING 9 /* no Match empty string. */ -#define STAR 10 /* node Match this (simple) thing 0 or more times. */ -#define PLUS 11 /* node Match this (simple) thing 1 or more times. */ -#define WORDA 12 /* no Match "" at wordchar, where prev is nonword */ -#define WORDZ 13 /* no Match "" at nonwordchar, where prev is word */ -#define OPEN 20 /* no Mark this point in input as start of #n. */ - /* OPEN+1 is number 1, etc. */ -#define CLOSE 30 /* no Analogous to OPEN. */ - -/* - * Opcode notes: - * - * BRANCH The set of branches constituting a single choice are hooked - * together with their "next" pointers, since precedence prevents - * anything being concatenated to any individual branch. The - * "next" pointer of the last BRANCH in a choice points to the - * thing following the whole choice. This is also where the - * final "next" pointer of each individual branch points; each - * branch starts with the operand node of a BRANCH node. - * - * BACK Normal "next" pointers all implicitly point forward; BACK - * exists to make loop structures possible. - * - * STAR,PLUS '?', and complex '*' and '+', are implemented as circular - * BRANCH structures using BACK. Simple cases (one character - * per match) are implemented with STAR and PLUS for speed - * and to minimize recursive plunges. - * - * OPEN,CLOSE ...are numbered at compile time. - */ - -/* - * A node is one char of opcode followed by two chars of "next" pointer. - * "Next" pointers are stored as two 8-bit pieces, high order first. The - * value is a positive offset from the opcode of the node containing it. - * An operand, if any, simply follows the node. (Note that much of the - * code generation knows about this implicit relationship.) - * - * Using two bytes for the "next" pointer is vast overkill for most things, - * but allows patterns to get big without disasters. - */ -#define OP(p) (*(p)) -#define NEXT(p) (((*((p)+1)&0377)<<8) + (*((p)+2)&0377)) -#define OPERAND(p) ((p) + 3) - -/* - * See regmagic.h for one further detail of program structure. - */ - - -/* - * Utility definitions. - */ -#ifndef CHARBITS -#define UCHARAT(p) ((int)*(unsigned char *)(p)) -#else -#define UCHARAT(p) ((int)*(p)&CHARBITS) -#endif - -#define FAIL(m) { regerror(m); return(NULL); } -#define ISMULT(c) ((c) == '*' || (c) == '+' || (c) == '?') - -/* - * Flags to be passed up and down. - */ -#define HASWIDTH 01 /* Known never to match null string. */ -#define SIMPLE 02 /* Simple enough to be STAR/PLUS operand. */ -#define SPSTART 04 /* Starts with * or +. */ -#define WORST 0 /* Worst case. */ - -/* - * Global work variables for regcomp(). - */ -static char *regparse; /* Input-scan pointer. */ -static int regnpar; /* () count. */ -static char regdummy; -static char *regcode; /* Code-emit pointer; ®dummy = don't. */ -static long regsize; /* Code size. */ - -/* - * Forward declarations for regcomp()'s friends. - */ -#ifndef STATIC -#define STATIC static -#endif -STATIC char *reg (int, int *); -STATIC char *regbranch (int *); -STATIC char *regpiece (int *); -STATIC char *regatom (int *); -STATIC char *regnode (char); -STATIC char *regnext (char *); -STATIC void regc (char); -STATIC void reginsert (char, char *); -STATIC void regtail (char *, char *); -STATIC void regoptail (char *, char *); -#ifdef STRCSPN -STATIC int strcspn (char *, char *); -#endif - -/* - - regcomp - compile a regular expression into internal code - * - * We can't allocate space until we know how big the compiled form will be, - * but we can't compile it (and thus know how big it is) until we've got a - * place to put the code. So we cheat: we compile it twice, once with code - * generation turned off and size counting turned on, and once "for real". - * This also means that we don't allocate space until we are sure that the - * thing really will compile successfully, and we never have to move the - * code and thus invalidate pointers into it. (Note that it has to be in - * one piece because free() must be able to free it all.) - * - * Beware that the optimization-preparation code in here knows about some - * of the structure of the compiled regexp. - */ -regexp * __declspec(dllexport) -regcomp(exp) -const char *exp; -{ - register regexp *r; - register char *scan; - register char *longest; - register int len; - int flags; - - if (exp == NULL) - FAIL("NULL argument"); - - /* First pass: determine size, legality. */ -#ifdef notdef - if (exp[0] == '.' && exp[1] == '*') exp += 2; /* aid grep */ -#endif - regparse = (char *)exp; - regnpar = 1; - regsize = 0L; - regcode = ®dummy; - regc(MAGIC); - if (reg(0, &flags) == NULL) - return(NULL); - - /* Small enough for pointer-storage convention? */ - if (regsize >= 32767L) /* Probably could be 65535L. */ - FAIL("regexp too big"); - - /* Allocate space. */ - r = (regexp *)malloc(sizeof(regexp) + (unsigned)regsize); - if (r == NULL) - FAIL("out of space"); - - /* Second pass: emit code. */ - regparse = (char *)exp; - regnpar = 1; - regcode = r->program; - regc(MAGIC); - if (reg(0, &flags) == NULL) - return(NULL); - - /* Dig out information for optimizations. */ - r->regstart = '\0'; /* Worst-case defaults. */ - r->reganch = 0; - r->regmust = NULL; - r->regmlen = 0; - scan = r->program+1; /* First BRANCH. */ - if (OP(regnext(scan)) == END) { /* Only one top-level choice. */ - scan = OPERAND(scan); - - /* Starting-point info. */ - if (OP(scan) == EXACTLY) - r->regstart = *OPERAND(scan); - else if (OP(scan) == BOL) - r->reganch++; - - /* - * If there's something expensive in the r.e., find the - * longest literal string that must appear and make it the - * regmust. Resolve ties in favor of later strings, since - * the regstart check works with the beginning of the r.e. - * and avoiding duplication strengthens checking. Not a - * strong reason, but sufficient in the absence of others. - */ - if (flags&SPSTART) { - longest = NULL; - len = 0; - for (; scan != NULL; scan = regnext(scan)) - if (OP(scan) == EXACTLY && (int) strlen(OPERAND(scan)) >= len) { - longest = OPERAND(scan); - len = strlen(OPERAND(scan)); - } - r->regmust = longest; - r->regmlen = len; - } - } - - return(r); -} - -/* - - reg - regular expression, i.e. main body or parenthesized thing - * - * Caller must absorb opening parenthesis. - * - * Combining parenthesis handling with the base level of regular expression - * is a trifle forced, but the need to tie the tails of the branches to what - * follows makes it hard to avoid. - */ -static char * -reg(paren, flagp) -int paren; /* Parenthesized? */ -int *flagp; -{ - register char *ret; - register char *br; - register char *ender; - register int parno = 0; - int flags; - - *flagp = HASWIDTH; /* Tentatively. */ - - /* Make an OPEN node, if parenthesized. */ - if (paren) { - if (regnpar >= NSUBEXP) - FAIL("too many ()"); - parno = regnpar; - regnpar++; - ret = regnode(OPEN+parno); - } else - ret = NULL; - - /* Pick up the branches, linking them together. */ - br = regbranch(&flags); - if (br == NULL) - return(NULL); - if (ret != NULL) - regtail(ret, br); /* OPEN -> first. */ - else - ret = br; - if (!(flags&HASWIDTH)) - *flagp &= ~HASWIDTH; - *flagp |= flags&SPSTART; - while (*regparse == '|' || *regparse == '\n') { - regparse++; - br = regbranch(&flags); - if (br == NULL) - return(NULL); - regtail(ret, br); /* BRANCH -> BRANCH. */ - if (!(flags&HASWIDTH)) - *flagp &= ~HASWIDTH; - *flagp |= flags&SPSTART; - } - - /* Make a closing node, and hook it on the end. */ - ender = regnode((paren) ? CLOSE+parno : END); - regtail(ret, ender); - - /* Hook the tails of the branches to the closing node. */ - for (br = ret; br != NULL; br = regnext(br)) - regoptail(br, ender); - - /* Check for proper termination. */ - if (paren && *regparse++ != ')') { - FAIL("unmatched ()"); - } else if (!paren && *regparse != '\0') { - if (*regparse == ')') { - FAIL("unmatched ()"); - } else - FAIL("junk on end"); /* "Can't happen". */ - /* NOTREACHED */ - } - - return(ret); -} - -/* - - regbranch - one alternative of an | operator - * - * Implements the concatenation operator. - */ -static char * -regbranch(flagp) -int *flagp; -{ - register char *ret; - register char *chain; - register char *latest; - int flags; - - *flagp = WORST; /* Tentatively. */ - - ret = regnode(BRANCH); - chain = NULL; - while (*regparse != '\0' && *regparse != ')' && - *regparse != '\n' && *regparse != '|') { - latest = regpiece(&flags); - if (latest == NULL) - return(NULL); - *flagp |= flags&HASWIDTH; - if (chain == NULL) /* First piece. */ - *flagp |= flags&SPSTART; - else - regtail(chain, latest); - chain = latest; - } - if (chain == NULL) /* Loop ran zero times. */ - (void) regnode(NOTHING); - - return(ret); -} - -/* - - regpiece - something followed by possible [*+?] - * - * Note that the branching code sequences used for ? and the general cases - * of * and + are somewhat optimized: they use the same NOTHING node as - * both the endmarker for their branch list and the body of the last branch. - * It might seem that this node could be dispensed with entirely, but the - * endmarker role is not redundant. - */ -static char * -regpiece(flagp) -int *flagp; -{ - register char *ret; - register char op; - register char *next; - int flags; - - ret = regatom(&flags); - if (ret == NULL) - return(NULL); - - op = *regparse; - if (!ISMULT(op)) { - *flagp = flags; - return(ret); - } - - if (!(flags&HASWIDTH) && op != '?') - FAIL("*+ operand could be empty"); - *flagp = (op != '+') ? (WORST|SPSTART) : (WORST|HASWIDTH); - - if (op == '*' && (flags&SIMPLE)) - reginsert(STAR, ret); - else if (op == '*') { - /* Emit x* as (x&|), where & means "self". */ - reginsert(BRANCH, ret); /* Either x */ - regoptail(ret, regnode(BACK)); /* and loop */ - regoptail(ret, ret); /* back */ - regtail(ret, regnode(BRANCH)); /* or */ - regtail(ret, regnode(NOTHING)); /* null. */ - } else if (op == '+' && (flags&SIMPLE)) - reginsert(PLUS, ret); - else if (op == '+') { - /* Emit x+ as x(&|), where & means "self". */ - next = regnode(BRANCH); /* Either */ - regtail(ret, next); - regtail(regnode(BACK), ret); /* loop back */ - regtail(next, regnode(BRANCH)); /* or */ - regtail(ret, regnode(NOTHING)); /* null. */ - } else if (op == '?') { - /* Emit x? as (x|) */ - reginsert(BRANCH, ret); /* Either x */ - regtail(ret, regnode(BRANCH)); /* or */ - next = regnode(NOTHING); /* null. */ - regtail(ret, next); - regoptail(ret, next); - } - regparse++; - if (ISMULT(*regparse)) - FAIL("nested *?+"); - - return(ret); -} - -/* - - regatom - the lowest level - * - * Optimization: gobbles an entire sequence of ordinary characters so that - * it can turn them into a single node, which is smaller to store and - * faster to run. Backslashed characters are exceptions, each becoming a - * separate node; the code is simpler that way and it's not worth fixing. - */ -static char * -regatom(flagp) -int *flagp; -{ - register char *ret; - int flags; - - *flagp = WORST; /* Tentatively. */ - - switch (*regparse++) { - /* FIXME: these chars only have meaning at beg/end of pat? */ - case '^': - ret = regnode(BOL); - break; - case '$': - ret = regnode(EOL); - break; - case '.': - ret = regnode(ANY); - *flagp |= HASWIDTH|SIMPLE; - break; - case '[': { - register int class; - register int classend; - - if (*regparse == '^') { /* Complement of range. */ - ret = regnode(ANYBUT); - regparse++; - } else - ret = regnode(ANYOF); - if (*regparse == ']' || *regparse == '-') - regc(*regparse++); - while (*regparse != '\0' && *regparse != ']') { - if (*regparse == '-') { - regparse++; - if (*regparse == ']' || *regparse == '\0') - regc('-'); - else { - class = UCHARAT(regparse-2)+1; - classend = UCHARAT(regparse); - if (class > classend+1) - FAIL("invalid [] range"); - for (; class <= classend; class++) - regc(class); - regparse++; - } - } else - regc(*regparse++); - } - regc('\0'); - if (*regparse != ']') - FAIL("unmatched []"); - regparse++; - *flagp |= HASWIDTH|SIMPLE; - } - break; - case '(': - ret = reg(1, &flags); - if (ret == NULL) - return(NULL); - *flagp |= flags&(HASWIDTH|SPSTART); - break; - case '\0': - case '|': - case '\n': - case ')': - FAIL("internal urp"); /* Supposed to be caught earlier. */ - break; - case '?': - case '+': - case '*': - FAIL("?+* follows nothing"); - break; - case '\\': - switch (*regparse++) { - case '\0': - FAIL("trailing \\"); - break; - case '<': - ret = regnode(WORDA); - break; - case '>': - ret = regnode(WORDZ); - break; - /* FIXME: Someday handle \1, \2, ... */ - default: - /* Handle general quoted chars in exact-match routine */ - goto de_fault; - } - break; - de_fault: - default: - /* - * Encode a string of characters to be matched exactly. - * - * This is a bit tricky due to quoted chars and due to - * '*', '+', and '?' taking the SINGLE char previous - * as their operand. - * - * On entry, the char at regparse[-1] is going to go - * into the string, no matter what it is. (It could be - * following a \ if we are entered from the '\' case.) - * - * Basic idea is to pick up a good char in ch and - * examine the next char. If it's *+? then we twiddle. - * If it's \ then we frozzle. If it's other magic char - * we push ch and terminate the string. If none of the - * above, we push ch on the string and go around again. - * - * regprev is used to remember where "the current char" - * starts in the string, if due to a *+? we need to back - * up and put the current char in a separate, 1-char, string. - * When regprev is NULL, ch is the only char in the - * string; this is used in *+? handling, and in setting - * flags |= SIMPLE at the end. - */ - { - char *regprev; - register char ch = 0; - - regparse--; /* Look at cur char */ - ret = regnode(EXACTLY); - for ( regprev = 0 ; ; ) { - ch = *regparse++; /* Get current char */ - switch (*regparse) { /* look at next one */ - - default: - regc(ch); /* Add cur to string */ - break; - - case '.': case '[': case '(': - case ')': case '|': case '\n': - case '$': case '^': - case '\0': - /* FIXME, $ and ^ should not always be magic */ - magic: - regc(ch); /* dump cur char */ - goto done; /* and we are done */ - - case '?': case '+': case '*': - if (!regprev) /* If just ch in str, */ - goto magic; /* use it */ - /* End mult-char string one early */ - regparse = regprev; /* Back up parse */ - goto done; - - case '\\': - regc(ch); /* Cur char OK */ - switch (regparse[1]){ /* Look after \ */ - case '\0': - case '<': - case '>': - /* FIXME: Someday handle \1, \2, ... */ - goto done; /* Not quoted */ - default: - /* Backup point is \, scan * point is after it. */ - regprev = regparse; - regparse++; - continue; /* NOT break; */ - } - } - regprev = regparse; /* Set backup point */ - } - done: - regc('\0'); - *flagp |= HASWIDTH; - if (!regprev) /* One char? */ - *flagp |= SIMPLE; - } - break; - } - - return(ret); -} - -/* - - regnode - emit a node - */ -static char * /* Location. */ -regnode(op) -char op; -{ - register char *ret; - register char *ptr; - - ret = regcode; - if (ret == ®dummy) { - regsize += 3; - return(ret); - } - - ptr = ret; - *ptr++ = op; - *ptr++ = '\0'; /* Null "next" pointer. */ - *ptr++ = '\0'; - regcode = ptr; - - return(ret); -} - -/* - - regc - emit (if appropriate) a byte of code - */ -static void -regc(b) -char b; -{ - if (regcode != ®dummy) - *regcode++ = b; - else - regsize++; -} - -/* - - reginsert - insert an operator in front of already-emitted operand - * - * Means relocating the operand. - */ -static void -reginsert(op, opnd) -char op; -char *opnd; -{ - register char *src; - register char *dst; - register char *place; - - if (regcode == ®dummy) { - regsize += 3; - return; - } - - src = regcode; - regcode += 3; - dst = regcode; - while (src > opnd) - *--dst = *--src; - - place = opnd; /* Op node, where operand used to be. */ - *place++ = op; - *place++ = '\0'; - *place++ = '\0'; -} - -/* - - regtail - set the next-pointer at the end of a node chain - */ -static void -regtail(p, val) -char *p; -char *val; -{ - register char *scan; - register char *temp; - register int offset; - - if (p == ®dummy) - return; - - /* Find last node. */ - scan = p; - for (;;) { - temp = regnext(scan); - if (temp == NULL) - break; - scan = temp; - } - - if (OP(scan) == BACK) - offset = scan - val; - else - offset = val - scan; - *(scan+1) = (offset>>8)&0377; - *(scan+2) = offset&0377; -} - -/* - - regoptail - regtail on operand of first argument; nop if operandless - */ -static void -regoptail(p, val) -char *p; -char *val; -{ - /* "Operandless" and "op != BRANCH" are synonymous in practice. */ - if (p == NULL || p == ®dummy || OP(p) != BRANCH) - return; - regtail(OPERAND(p), val); -} - -/* - * regexec and friends - */ - -/* - * Global work variables for regexec(). - */ -static char *reginput; /* String-input pointer. */ -static char *regbol; /* Beginning of input, for ^ check. */ -static char **regstartp; /* Pointer to startp array. */ -static char **regendp; /* Ditto for endp. */ - -/* - * Forwards. - */ -STATIC int regtry (const regexp *, const char *); -STATIC int regmatch (char *); -STATIC int regrepeat (char *); - -#ifdef DEBUG -int regnarrate = 0; -void regdump __P((regexp *)); -STATIC char *regprop __P((char *)); -#endif - -/* - - regexec - match a regexp against a string - */ -int __declspec(dllexport) -regexec(prog, string) -register const regexp *prog; -register const char *string; -{ - register char *s; - - /* Be paranoid... */ - if (prog == NULL || string == NULL) { - regerror("NULL parameter"); - return(0); - } - - /* Check validity of program. */ - if (UCHARAT(prog->program) != MAGIC) { - regerror("corrupted program"); - return(0); - } - - /* If there is a "must appear" string, look for it. */ - if (prog->regmust != NULL) { - s = (char *)string; - while ((s = strchr(s, prog->regmust[0])) != NULL) { - if (strncmp(s, prog->regmust, prog->regmlen) == 0) - break; /* Found it. */ - s++; - } - if (s == NULL) /* Not present. */ - return(0); - } - - /* Mark beginning of line for ^ . */ - regbol = (char *)string; - - /* Simplest case: anchored match need be tried only once. */ - if (prog->reganch) - return(regtry(prog, string)); - - /* Messy cases: unanchored match. */ - s = (char *)string; - if (prog->regstart != '\0') - /* We know what char it must start with. */ - while ((s = strchr(s, prog->regstart)) != NULL) { - if (regtry(prog, s)) - return(1); - s++; - } - else - /* We don't -- general case. */ - do { - if (regtry(prog, s)) - return(1); - } while (*s++ != '\0'); - - /* Failure. */ - return(0); -} - -/* - - regtry - try match at specific point - */ -static int /* 0 failure, 1 success */ -regtry(prog, string) -const regexp *prog; -const char *string; -{ - register int i; - register char **sp; - register char **ep; - - reginput = (char *)string; /* XXX */ - regstartp = (char **)prog->startp; /* XXX */ - regendp = (char **)prog->endp; /* XXX */ - - sp = (char **)prog->startp; /* XXX */ - ep = (char **)prog->endp; /* XXX */ - for (i = NSUBEXP; i > 0; i--) { - *sp++ = NULL; - *ep++ = NULL; - } - if (regmatch((char *)prog->program + 1)) { /* XXX */ - ((regexp *)prog)->startp[0] = (char *)string; /* XXX */ - ((regexp *)prog)->endp[0] = reginput; /* XXX */ - return(1); - } else - return(0); -} - -/* - - regmatch - main matching routine - * - * Conceptually the strategy is simple: check to see whether the current - * node matches, call self recursively to see whether the rest matches, - * and then act accordingly. In practice we make some effort to avoid - * recursion, in particular by going through "ordinary" nodes (that don't - * need to know whether the rest of the match failed) by a loop instead of - * by recursion. - */ -static int /* 0 failure, 1 success */ -regmatch(prog) -char *prog; -{ - register char *scan; /* Current node. */ - char *next; /* Next node. */ - - scan = prog; -#ifdef DEBUG - if (scan != NULL && regnarrate) - fprintf(stderr, "%s(\n", regprop(scan)); -#endif - while (scan != NULL) { -#ifdef DEBUG - if (regnarrate) - fprintf(stderr, "%s...\n", regprop(scan)); -#endif - next = regnext(scan); - - switch (OP(scan)) { - case BOL: - if (reginput != regbol) - return(0); - break; - case EOL: - if (*reginput != '\0') - return(0); - break; - case WORDA: - /* Must be looking at a letter, digit, or _ */ - if ((!isalnum(*reginput)) && *reginput != '_') - return(0); - /* Prev must be BOL or nonword */ - if (reginput > regbol && - (isalnum(reginput[-1]) || reginput[-1] == '_')) - return(0); - break; - case WORDZ: - /* Must be looking at non letter, digit, or _ */ - if (isalnum(*reginput) || *reginput == '_') - return(0); - /* We don't care what the previous char was */ - break; - case ANY: - if (*reginput == '\0') - return(0); - reginput++; - break; - case EXACTLY: { - register int len; - register char *opnd; - - opnd = OPERAND(scan); - /* Inline the first character, for speed. */ - if (*opnd != *reginput) - return(0); - len = strlen(opnd); - if (len > 1 && strncmp(opnd, reginput, len) != 0) - return(0); - reginput += len; - } - break; - case ANYOF: - if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) == NULL) - return(0); - reginput++; - break; - case ANYBUT: - if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) != NULL) - return(0); - reginput++; - break; - case NOTHING: - break; - case BACK: - break; - case OPEN+1: - case OPEN+2: - case OPEN+3: - case OPEN+4: - case OPEN+5: - case OPEN+6: - case OPEN+7: - case OPEN+8: - case OPEN+9: { - register int no; - register char *save; - - no = OP(scan) - OPEN; - save = reginput; - - if (regmatch(next)) { - /* - * Don't set startp if some later - * invocation of the same parentheses - * already has. - */ - if (regstartp[no] == NULL) - regstartp[no] = save; - return(1); - } else - return(0); - } - break; - case CLOSE+1: - case CLOSE+2: - case CLOSE+3: - case CLOSE+4: - case CLOSE+5: - case CLOSE+6: - case CLOSE+7: - case CLOSE+8: - case CLOSE+9: { - register int no; - register char *save; - - no = OP(scan) - CLOSE; - save = reginput; - - if (regmatch(next)) { - /* - * Don't set endp if some later - * invocation of the same parentheses - * already has. - */ - if (regendp[no] == NULL) - regendp[no] = save; - return(1); - } else - return(0); - } - break; - case BRANCH: { - register char *save; - - if (OP(next) != BRANCH) /* No choice. */ - next = OPERAND(scan); /* Avoid recursion. */ - else { - do { - save = reginput; - if (regmatch(OPERAND(scan))) - return(1); - reginput = save; - scan = regnext(scan); - } while (scan != NULL && OP(scan) == BRANCH); - return(0); - /* NOTREACHED */ - } - } - break; - case STAR: - case PLUS: { - register char nextch; - register int no; - register char *save; - register int min; - - /* - * Lookahead to avoid useless match attempts - * when we know what character comes next. - */ - nextch = '\0'; - if (OP(next) == EXACTLY) - nextch = *OPERAND(next); - min = (OP(scan) == STAR) ? 0 : 1; - save = reginput; - no = regrepeat(OPERAND(scan)); - while (no >= min) { - /* If it could work, try it. */ - if (nextch == '\0' || *reginput == nextch) - if (regmatch(next)) - return(1); - /* Couldn't or didn't -- back up. */ - no--; - reginput = save + no; - } - return(0); - } - break; - case END: - return(1); /* Success! */ - break; - default: - regerror("memory corruption"); - return(0); - break; - } - - scan = next; - } - - /* - * We get here only if there's trouble -- normally "case END" is - * the terminating point. - */ - regerror("corrupted pointers"); - return(0); -} - -/* - - regrepeat - repeatedly match something simple, report how many - */ -static int -regrepeat(p) -char *p; -{ - register int count = 0; - register char *scan; - register char *opnd; - - scan = reginput; - opnd = OPERAND(p); - switch (OP(p)) { - case ANY: - count = strlen(scan); - scan += count; - break; - case EXACTLY: - while (*opnd == *scan) { - count++; - scan++; - } - break; - case ANYOF: - while (*scan != '\0' && strchr(opnd, *scan) != NULL) { - count++; - scan++; - } - break; - case ANYBUT: - while (*scan != '\0' && strchr(opnd, *scan) == NULL) { - count++; - scan++; - } - break; - default: /* Oh dear. Called inappropriately. */ - regerror("internal foulup"); - count = 0; /* Best compromise. */ - break; - } - reginput = scan; - - return(count); -} - -/* - - regnext - dig the "next" pointer out of a node - */ -static char * -regnext(p) -register char *p; -{ - register int offset; - - if (p == ®dummy) - return(NULL); - - offset = NEXT(p); - if (offset == 0) - return(NULL); - - if (OP(p) == BACK) - return(p-offset); - else - return(p+offset); -} - -#ifdef DEBUG - -/* - - regdump - dump a regexp onto stdout in vaguely comprehensible form - */ -void -regdump(r) -regexp *r; -{ - register char *s; - register char op = EXACTLY; /* Arbitrary non-END op. */ - register char *next; - extern char *strchr(); - - - s = r->program + 1; - while (op != END) { /* While that wasn't END last time... */ - op = OP(s); - printf("%2d%s", s-r->program, regprop(s)); /* Where, what. */ - next = regnext(s); - if (next == NULL) /* Next ptr. */ - printf("(0)"); - else - printf("(%d)", (s-r->program)+(next-s)); - s += 3; - if (op == ANYOF || op == ANYBUT || op == EXACTLY) { - /* Literal string, where present. */ - while (*s != '\0') { - putchar(*s); - s++; - } - s++; - } - putchar('\n'); - } - - /* Header fields of interest. */ - if (r->regstart != '\0') - printf("start `%c' ", r->regstart); - if (r->reganch) - printf("anchored "); - if (r->regmust != NULL) - printf("must have \"%s\"", r->regmust); - printf("\n"); -} - -/* - - regprop - printable representation of opcode - */ -static char * -regprop(op) -char *op; -{ - register char *p; - static char buf[50]; - - (void) strcpy(buf, ":"); - - switch (OP(op)) { - case BOL: - p = "BOL"; - break; - case EOL: - p = "EOL"; - break; - case ANY: - p = "ANY"; - break; - case ANYOF: - p = "ANYOF"; - break; - case ANYBUT: - p = "ANYBUT"; - break; - case BRANCH: - p = "BRANCH"; - break; - case EXACTLY: - p = "EXACTLY"; - break; - case NOTHING: - p = "NOTHING"; - break; - case BACK: - p = "BACK"; - break; - case END: - p = "END"; - break; - case OPEN+1: - case OPEN+2: - case OPEN+3: - case OPEN+4: - case OPEN+5: - case OPEN+6: - case OPEN+7: - case OPEN+8: - case OPEN+9: - sprintf(buf+strlen(buf), "OPEN%d", OP(op)-OPEN); - p = NULL; - break; - case CLOSE+1: - case CLOSE+2: - case CLOSE+3: - case CLOSE+4: - case CLOSE+5: - case CLOSE+6: - case CLOSE+7: - case CLOSE+8: - case CLOSE+9: - sprintf(buf+strlen(buf), "CLOSE%d", OP(op)-CLOSE); - p = NULL; - break; - case STAR: - p = "STAR"; - break; - case PLUS: - p = "PLUS"; - break; - case WORDA: - p = "WORDA"; - break; - case WORDZ: - p = "WORDZ"; - break; - default: - regerror("corrupted opcode"); - break; - } - if (p != NULL) - (void) strcat(buf, p); - return(buf); -} -#endif - -/* - * The following is provided for those people who do not have strcspn() in - * their C libraries. They should get off their butts and do something - * about it; at least one public-domain implementation of those (highly - * useful) string routines has been published on Usenet. - */ -#ifdef STRCSPN -/* - * strcspn - find length of initial segment of s1 consisting entirely - * of characters not from s2 - */ - -static int -strcspn(s1, s2) -char *s1; -char *s2; -{ - register char *scan1; - register char *scan2; - register int count; - - count = 0; - for (scan1 = s1; *scan1 != '\0'; scan1++) { - for (scan2 = s2; *scan2 != '\0';) /* ++ moved down. */ - if (*scan1 == *scan2++) - return(count); - count++; - } - return(count); -} -#endif diff --git a/winsup/cygwin/regexp/v8_regsub.c b/winsup/cygwin/regexp/v8_regsub.c deleted file mode 100644 index aa95b876a..000000000 --- a/winsup/cygwin/regexp/v8_regsub.c +++ /dev/null @@ -1,88 +0,0 @@ -/* - * regsub - * - * Copyright (c) 1986 by University of Toronto. - * Written by Henry Spencer. Not derived from licensed software. - * - * Permission is granted to anyone to use this software for any - * purpose on any computer system, and to redistribute it freely, - * subject to the following restrictions: - * - * 1. The author is not responsible for the consequences of use of - * this software, no matter how awful, even if they arise - * from defects in it. - * - * 2. The origin of this software must not be misrepresented, either - * by explicit claim or by omission. - * - * 3. Altered versions must be plainly marked as such, and must not - * be misrepresented as being the original software. - */ - -#if 0 -#ifndef lint -static char *rcsid = "$Id$"; -#endif /* not lint */ -#endif - -#include "winsup.h" -#include "regexp.h" -#include -#include -#include "regmagic.h" - -#ifndef CHARBITS -#define UCHARAT(p) ((int)*(unsigned char *)(p)) -#else -#define UCHARAT(p) ((int)*(p)&CHARBITS) -#endif - -/* - - regsub - perform substitutions after a regexp match - */ -void __declspec(dllexport) -regsub(prog, source, dest) -const regexp *prog; -const char *source; -char *dest; -{ - register char *src; - register char *dst; - register char c; - register int no; - register int len; - - if (prog == NULL || source == NULL || dest == NULL) { - regerror("NULL parm to regsub"); - return; - } - if (UCHARAT(prog->program) != MAGIC) { - regerror("damaged regexp fed to regsub"); - return; - } - - src = (char *)source; - dst = dest; - while ((c = *src++) != '\0') { - if (c == '&') - no = 0; - else if (c == '\\' && '0' <= *src && *src <= '9') - no = *src++ - '0'; - else - no = -1; - if (no < 0) { /* Ordinary character. */ - if (c == '\\' && (*src == '\\' || *src == '&')) - c = *src++; - *dst++ = c; - } else if (prog->startp[no] != NULL && prog->endp[no] != NULL) { - len = prog->endp[no] - prog->startp[no]; - (void) strncpy(dst, prog->startp[no], len); - dst += len; - if (len != 0 && *(dst-1) == '\0') { /* strncpy hit NUL. */ - regerror("damaged match string"); - return; - } - } - } - *dst++ = '\0'; -} diff --git a/winsup/cygwin/syscalls.cc b/winsup/cygwin/syscalls.cc index bb74dff26..a8a66ecbf 100644 --- a/winsup/cygwin/syscalls.cc +++ b/winsup/cygwin/syscalls.cc @@ -2401,13 +2401,6 @@ ptsname (int fd) return (char *) (cfd->ptsname ()); } -/* FIXME: what is this? */ -extern "C" int __declspec(dllexport) -regfree () -{ - return 0; -} - static int __stdcall mknod_worker (const char *path, mode_t type, mode_t mode, _major_t major, _minor_t minor)