diff options
| author | vvvv <[email protected]> | 2023-07-31 18:21:04 +0300 | 
|---|---|---|
| committer | vvvv <[email protected]> | 2023-07-31 18:21:04 +0300 | 
| commit | dec41c40e51aa407edef81a3c566a5a15780fc49 (patch) | |
| tree | 4f197b596b32f35eca368121f0dff913419da9af /library/cpp/regex | |
| parent | 3ca8b54c96e09eb2b65be7f09675623438d559c7 (diff) | |
YQL-16239 Move purecalc to public
Diffstat (limited to 'library/cpp/regex')
| -rw-r--r-- | library/cpp/regex/CMakeLists.darwin-x86_64.txt | 1 | ||||
| -rw-r--r-- | library/cpp/regex/CMakeLists.linux-aarch64.txt | 1 | ||||
| -rw-r--r-- | library/cpp/regex/CMakeLists.linux-x86_64.txt | 1 | ||||
| -rw-r--r-- | library/cpp/regex/CMakeLists.windows-x86_64.txt | 1 | ||||
| -rw-r--r-- | library/cpp/regex/glob/CMakeLists.darwin-x86_64.txt | 19 | ||||
| -rw-r--r-- | library/cpp/regex/glob/CMakeLists.linux-aarch64.txt | 20 | ||||
| -rw-r--r-- | library/cpp/regex/glob/CMakeLists.linux-x86_64.txt | 20 | ||||
| -rw-r--r-- | library/cpp/regex/glob/CMakeLists.txt | 17 | ||||
| -rw-r--r-- | library/cpp/regex/glob/CMakeLists.windows-x86_64.txt | 19 | ||||
| -rw-r--r-- | library/cpp/regex/glob/glob.cpp | 921 | ||||
| -rw-r--r-- | library/cpp/regex/glob/glob_compat.h | 73 | ||||
| -rw-r--r-- | library/cpp/regex/glob/glob_iterator.cpp | 1 | ||||
| -rw-r--r-- | library/cpp/regex/glob/glob_iterator.h | 36 | ||||
| -rw-r--r-- | library/cpp/regex/glob/ya.make | 12 | 
14 files changed, 1142 insertions, 0 deletions
| diff --git a/library/cpp/regex/CMakeLists.darwin-x86_64.txt b/library/cpp/regex/CMakeLists.darwin-x86_64.txt index 6e2a4fabcd2..877d40538b9 100644 --- a/library/cpp/regex/CMakeLists.darwin-x86_64.txt +++ b/library/cpp/regex/CMakeLists.darwin-x86_64.txt @@ -6,6 +6,7 @@  # original buildsystem will not be accepted. +add_subdirectory(glob)  add_subdirectory(hyperscan)  add_subdirectory(pcre)  add_subdirectory(pire) diff --git a/library/cpp/regex/CMakeLists.linux-aarch64.txt b/library/cpp/regex/CMakeLists.linux-aarch64.txt index 279390306ba..84c257a8194 100644 --- a/library/cpp/regex/CMakeLists.linux-aarch64.txt +++ b/library/cpp/regex/CMakeLists.linux-aarch64.txt @@ -6,5 +6,6 @@  # original buildsystem will not be accepted. +add_subdirectory(glob)  add_subdirectory(pcre)  add_subdirectory(pire) diff --git a/library/cpp/regex/CMakeLists.linux-x86_64.txt b/library/cpp/regex/CMakeLists.linux-x86_64.txt index 6e2a4fabcd2..877d40538b9 100644 --- a/library/cpp/regex/CMakeLists.linux-x86_64.txt +++ b/library/cpp/regex/CMakeLists.linux-x86_64.txt @@ -6,6 +6,7 @@  # original buildsystem will not be accepted. +add_subdirectory(glob)  add_subdirectory(hyperscan)  add_subdirectory(pcre)  add_subdirectory(pire) diff --git a/library/cpp/regex/CMakeLists.windows-x86_64.txt b/library/cpp/regex/CMakeLists.windows-x86_64.txt index 6e2a4fabcd2..877d40538b9 100644 --- a/library/cpp/regex/CMakeLists.windows-x86_64.txt +++ b/library/cpp/regex/CMakeLists.windows-x86_64.txt @@ -6,6 +6,7 @@  # original buildsystem will not be accepted. +add_subdirectory(glob)  add_subdirectory(hyperscan)  add_subdirectory(pcre)  add_subdirectory(pire) diff --git a/library/cpp/regex/glob/CMakeLists.darwin-x86_64.txt b/library/cpp/regex/glob/CMakeLists.darwin-x86_64.txt new file mode 100644 index 00000000000..ca8383e355a --- /dev/null +++ b/library/cpp/regex/glob/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,19 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-regex-glob) +target_link_libraries(cpp-regex-glob PUBLIC +  contrib-libs-cxxsupp +  yutil +  library-cpp-charset +) +target_sources(cpp-regex-glob PRIVATE +  ${CMAKE_SOURCE_DIR}/library/cpp/regex/glob/glob.cpp +  ${CMAKE_SOURCE_DIR}/library/cpp/regex/glob/glob_iterator.cpp +) diff --git a/library/cpp/regex/glob/CMakeLists.linux-aarch64.txt b/library/cpp/regex/glob/CMakeLists.linux-aarch64.txt new file mode 100644 index 00000000000..3953937c6d1 --- /dev/null +++ b/library/cpp/regex/glob/CMakeLists.linux-aarch64.txt @@ -0,0 +1,20 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-regex-glob) +target_link_libraries(cpp-regex-glob PUBLIC +  contrib-libs-linux-headers +  contrib-libs-cxxsupp +  yutil +  library-cpp-charset +) +target_sources(cpp-regex-glob PRIVATE +  ${CMAKE_SOURCE_DIR}/library/cpp/regex/glob/glob.cpp +  ${CMAKE_SOURCE_DIR}/library/cpp/regex/glob/glob_iterator.cpp +) diff --git a/library/cpp/regex/glob/CMakeLists.linux-x86_64.txt b/library/cpp/regex/glob/CMakeLists.linux-x86_64.txt new file mode 100644 index 00000000000..3953937c6d1 --- /dev/null +++ b/library/cpp/regex/glob/CMakeLists.linux-x86_64.txt @@ -0,0 +1,20 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-regex-glob) +target_link_libraries(cpp-regex-glob PUBLIC +  contrib-libs-linux-headers +  contrib-libs-cxxsupp +  yutil +  library-cpp-charset +) +target_sources(cpp-regex-glob PRIVATE +  ${CMAKE_SOURCE_DIR}/library/cpp/regex/glob/glob.cpp +  ${CMAKE_SOURCE_DIR}/library/cpp/regex/glob/glob_iterator.cpp +) diff --git a/library/cpp/regex/glob/CMakeLists.txt b/library/cpp/regex/glob/CMakeLists.txt new file mode 100644 index 00000000000..f8b31df0c11 --- /dev/null +++ b/library/cpp/regex/glob/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) +  include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") +  include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) +  include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) +  include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/library/cpp/regex/glob/CMakeLists.windows-x86_64.txt b/library/cpp/regex/glob/CMakeLists.windows-x86_64.txt new file mode 100644 index 00000000000..ca8383e355a --- /dev/null +++ b/library/cpp/regex/glob/CMakeLists.windows-x86_64.txt @@ -0,0 +1,19 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-regex-glob) +target_link_libraries(cpp-regex-glob PUBLIC +  contrib-libs-cxxsupp +  yutil +  library-cpp-charset +) +target_sources(cpp-regex-glob PRIVATE +  ${CMAKE_SOURCE_DIR}/library/cpp/regex/glob/glob.cpp +  ${CMAKE_SOURCE_DIR}/library/cpp/regex/glob/glob_iterator.cpp +) diff --git a/library/cpp/regex/glob/glob.cpp b/library/cpp/regex/glob/glob.cpp new file mode 100644 index 00000000000..9da058122a7 --- /dev/null +++ b/library/cpp/regex/glob/glob.cpp @@ -0,0 +1,921 @@ +#define FROM_IMPLEMENTATION +#include "glob_compat.h" + +#if defined(USE_INTERNAL_GLOB) +/* + * Copyright (c) 1989, 1993 + *    The Regents of the University of California.  All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Guido van Rossum. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + *    must display the following acknowledgement: + *    This product includes software developed by the University of + *    California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + *    may be used to endorse or promote products derived from this software + *    without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <library/cpp/charset/ci_string.h> +#include <util/system/compat.h> +#include <util/folder/dirut.h> + +/* + * glob(3) -- a superset of the one defined in POSIX 1003.2. + * + * The [!...] convention to negate a range is supported (SysV, Posix, ksh). + * + * Optional extra services, controlled by flags not defined by POSIX: + * + * GLOB_QUOTE: + *    Escaping convention: \ inhibits any special meaning the following + *    character might have (except \ at end of string is retained). + * GLOB_MAGCHAR: + *    Set in gl_flags if pattern contained a globbing character. + * GLOB_NOMAGIC: + *    Same as GLOB_NOCHECK, but it will only append pattern if it did + *    not contain any magic characters.  [Used in csh style globbing] + * GLOB_ALTDIRFUNC: + *    Use alternately specified directory access functions. + * GLOB_TILDE: + *    expand ~user/foo to the /home/dir/of/user/foo + * GLOB_BRACE: + *    expand {1,2}{a,b} to 1a 1b 2a 2b + * gl_matchc: + *    Number of matches in the current invocation of glob. + */ + +/* + * Some notes on multibyte character support: + * 1. Patterns with illegal byte sequences match nothing - even if + *    GLOB_NOCHECK is specified. + * 2. Illegal byte sequences in filenames are handled by treating them as + *    single-byte characters with a value of the first byte of the sequence + *    cast to wchar_t. + * 3. State-dependent encodings are not currently supported. + */ + +//#include <sys/param.h> +#include <sys/stat.h> + +#include <ctype.h> +//#include <dirent.h> +#include <errno.h> +#include <limits.h> +//#include <pwd.h> +//#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#if defined(_unix_) +#include <unistd.h> +#endif +#include <wchar.h> + +#if !defined(_unix_) +// silly replacement for compilation +using uint_fast64_t = ui64; +using u_int = unsigned int; +using u_char = unsigned char; +#define ARG_MAX 256 +#define S_ISDIR(x) ((x) & _S_IFDIR) +#define S_ISLNK(x) 0 +#define lstat stat +inline bool issetugid() { return false; } +inline char *getlogin() { return 0; } +inline int getuid() { return 0; } +struct passwd { +    char *pw_dir; +}; +inline passwd *getpwuid(int) { return 0; } +inline passwd *getpwnam(char *) { return 0; } +#endif + +#define __collate_load_error 1 +inline int __collate_range_cmp(int, int) { return 0; } +#undef COMMA // was defined in stroka.h +// end silly replacement + +//#include "collate.h" + +#define    DOLLAR        '$' +#define    DOT        '.' +#define    EOS        '\0' +#define    LBRACKET    '[' +#define    NOT        '!' +#define    QUESTION    '?' +#define    QUOTE        '\\' +#define    RANGE        '-' +#define    RBRACKET    ']' +#define    SEP        '/' +#define    STAR        '*' +#define    TILDE        '~' +#define    UNDERSCORE    '_' +#define    LBRACE        '{' +#define    RBRACE        '}' +#define    SLASH        '/' +#define    COMMA        ',' + +#ifndef DEBUG + +#define    M_QUOTE        0x8000000000ULL +#define    M_PROTECT    0x4000000000ULL +#define    M_MASK        0xffffffffffULL +#define    M_CHAR        0x00ffffffffULL + +using Char = uint_fast64_t; + +#else + +#define    M_QUOTE        0x80 +#define    M_PROTECT    0x40 +#define    M_MASK        0xff +#define    M_CHAR        0x7f + +using Char = char; + +#endif + + +#define    CHAR(c)        ((Char)((c)&M_CHAR)) +#define    META(c)        ((Char)((c)|M_QUOTE)) +#define    M_ALL        META('*') +#define    M_END        META(']') +#define    M_NOT        META('!') +#define    M_ONE        META('?') +#define    M_RNG        META('-') +#define    M_SET        META('[') +#define    ismeta(c)    (((c)&M_QUOTE) != 0) + + +static int     compare(const void *, const void *); +static int     g_Ctoc(const Char *, char *, u_int); +static int     g_lstat(Char *, struct stat *, glob_t *); +static DIR    *g_opendir(Char *, glob_t *); +static Char    *g_strchr(Char *, wchar_t); +#ifdef notdef +static Char    *g_strcat(Char *, const Char *); +#endif +static int     glob0(const Char *, glob_t *, int *); +static int     glob1(Char *, glob_t *, int *); +static int     glob2(Char *, Char *, Char *, Char *, glob_t *, int *); +static int     glob3(Char *, Char *, Char *, Char *, Char *, glob_t *, int *); +static int     globextend(const Char *, glob_t *, int *); +static const Char * +         globtilde(const Char *, Char *, size_t, glob_t *); +static int     globexp1(const Char *, glob_t *, int *); +static int     globexp2(const Char *, const Char *, glob_t *, int *, int *); +static int     match(Char *, Char *, Char *); +#ifdef DEBUG +static void     qprintf(const char *, Char *); +#endif + +int +glob(const char *pattern, int flags, int (*errfunc)(const char *, int), glob_t *pglob) +{ +    const u_char *patnext; +    int limit; +    Char *bufnext, *bufend, patbuf[MAXPATHLEN], prot; +    mbstate_t mbs; +    wchar_t wc; +    size_t clen; + +    patnext = (u_char *) pattern; +    if (!(flags & GLOB_APPEND)) { +        pglob->gl_pathc = 0; +        pglob->gl_pathv = NULL; +        if (!(flags & GLOB_DOOFFS)) +            pglob->gl_offs = 0; +    } +    if (flags & GLOB_LIMIT) { +        limit = pglob->gl_matchc; +        if (limit == 0) +            limit = ARG_MAX; +    } else +        limit = 0; +    pglob->gl_flags = flags & ~GLOB_MAGCHAR; +    pglob->gl_errfunc = errfunc; +    pglob->gl_matchc = 0; + +    bufnext = patbuf; +    bufend = bufnext + MAXPATHLEN - 1; +    if (flags & GLOB_NOESCAPE) { +        memset(&mbs, 0, sizeof(mbs)); +        while (bufend - bufnext >= MB_CUR_MAX) { +            clen = mbrtowc(&wc, (const char*)patnext, MB_LEN_MAX, &mbs); +            if (clen == (size_t)-1 || clen == (size_t)-2) +                return (GLOB_NOMATCH); +            else if (clen == 0) +                break; +            *bufnext++ = wc; +            patnext += clen; +        } +    } else { +        /* Protect the quoted characters. */ +        memset(&mbs, 0, sizeof(mbs)); +        while (bufend - bufnext >= MB_CUR_MAX) { +            if (*patnext == QUOTE) { +                if (*++patnext == EOS) { +                    *bufnext++ = QUOTE | M_PROTECT; +                    continue; +                } +                prot = M_PROTECT; +            } else +                prot = 0; +            clen = mbrtowc(&wc, (const char*)patnext, MB_LEN_MAX, &mbs); +            if (clen == (size_t)-1 || clen == (size_t)-2) +                return (GLOB_NOMATCH); +            else if (clen == 0) +                break; +            *bufnext++ = wc | prot; +            patnext += clen; +        } +    } +    *bufnext = EOS; + +    if (flags & GLOB_BRACE) +        return globexp1(patbuf, pglob, &limit); +    else +        return glob0(patbuf, pglob, &limit); +} + +/* + * Expand recursively a glob {} pattern. When there is no more expansion + * invoke the standard globbing routine to glob the rest of the magic + * characters + */ +static int +globexp1(const Char *pattern, glob_t *pglob, int *limit) +{ +    const Char* ptr = pattern; +    int rv; + +    /* Protect a single {}, for find(1), like csh */ +    if (pattern[0] == LBRACE && pattern[1] == RBRACE && pattern[2] == EOS) +        return glob0(pattern, pglob, limit); + +    while ((ptr = (const Char *) g_strchr((Char *) ptr, LBRACE)) != NULL) +        if (!globexp2(ptr, pattern, pglob, &rv, limit)) +            return rv; + +    return glob0(pattern, pglob, limit); +} + + +/* + * Recursive brace globbing helper. Tries to expand a single brace. + * If it succeeds then it invokes globexp1 with the new pattern. + * If it fails then it tries to glob the rest of the pattern and returns. + */ +static int +globexp2(const Char *ptr, const Char *pattern, glob_t *pglob, int *rv, int *limit) +{ +    int     i; +    Char   *lm, *ls; +    const Char *pe, *pm, *pm1, *pl; +    Char    patbuf[MAXPATHLEN]; + +    /* copy part up to the brace */ +    for (lm = patbuf, pm = pattern; pm != ptr; *lm++ = *pm++) +        continue; +    *lm = EOS; +    ls = lm; + +    /* Find the balanced brace */ +    for (i = 0, pe = ++ptr; *pe; pe++) +        if (*pe == LBRACKET) { +            /* Ignore everything between [] */ +            for (pm = pe++; *pe != RBRACKET && *pe != EOS; pe++) +                continue; +            if (*pe == EOS) { +                /* +                 * We could not find a matching RBRACKET. +                 * Ignore and just look for RBRACE +                 */ +                pe = pm; +            } +        } +        else if (*pe == LBRACE) +            i++; +        else if (*pe == RBRACE) { +            if (i == 0) +                break; +            i--; +        } + +    /* Non matching braces; just glob the pattern */ +    if (i != 0 || *pe == EOS) { +        *rv = glob0(patbuf, pglob, limit); +        return 0; +    } + +    for (i = 0, pl = pm = ptr; pm <= pe; pm++) +        switch (*pm) { +        case LBRACKET: +            /* Ignore everything between [] */ +            for (pm1 = pm++; *pm != RBRACKET && *pm != EOS; pm++) +                continue; +            if (*pm == EOS) { +                /* +                 * We could not find a matching RBRACKET. +                 * Ignore and just look for RBRACE +                 */ +                pm = pm1; +            } +            break; + +        case LBRACE: +            i++; +            break; + +        case RBRACE: +            if (i) { +                i--; +                break; +            } +            [[fallthrough]]; +        case COMMA: +            if (i && *pm == COMMA) +                break; +            else { +                /* Append the current string */ +                for (lm = ls; (pl < pm); *lm++ = *pl++) +                    continue; +                /* +                 * Append the rest of the pattern after the +                 * closing brace +                 */ +                for (pl = pe + 1; (*lm++ = *pl++) != EOS;) +                    continue; + +                /* Expand the current pattern */ +#ifdef DEBUG +                qprintf("globexp2:", patbuf); +#endif +                *rv = globexp1(patbuf, pglob, limit); + +                /* move after the comma, to the next string */ +                pl = pm + 1; +            } +            break; + +        default: +            break; +        } +    *rv = 0; +    return 0; +} + + + +/* + * expand tilde from the passwd file. + */ +static const Char * +globtilde(const Char *pattern, Char *patbuf, size_t patbuf_len, glob_t *pglob) +{ +    struct passwd *pwd; +    char *h; +    const Char *p; +    Char *b, *eb; + +    if (*pattern != TILDE || !(pglob->gl_flags & GLOB_TILDE)) +        return pattern; + +    /* +     * Copy up to the end of the string or / +     */ +    eb = &patbuf[patbuf_len - 1]; +    for (p = pattern + 1, h = (char *) patbuf; +        h < (char *)eb && *p && *p != SLASH; *h++ = (char)*p++) +        continue; + +    *h = EOS; + +    if (((char *) patbuf)[0] == EOS) { +        /* +         * handle a plain ~ or ~/ by expanding $HOME first (iff +         * we're not running setuid or setgid) and then trying +         * the password file +         */ +        if (issetugid() != 0 || +            (h = ::getenv("HOME")) == NULL) { +            if (((h = getlogin()) != NULL && +                 (pwd = getpwnam(h)) != NULL) || +                (pwd = getpwuid(getuid())) != NULL) +                h = pwd->pw_dir; +            else +                return pattern; +        } +    } +    else { +        /* +         * Expand a ~user +         */ +        if ((pwd = getpwnam((char*) patbuf)) == NULL) +            return pattern; +        else +            h = pwd->pw_dir; +    } + +    /* Copy the home directory */ +    for (b = patbuf; b < eb && *h; *b++ = *h++) +        continue; + +    /* Append the rest of the pattern */ +    while (b < eb && (*b++ = *p++) != EOS) +        continue; +    *b = EOS; + +    return patbuf; +} + + +/* + * The main glob() routine: compiles the pattern (optionally processing + * quotes), calls glob1() to do the real pattern matching, and finally + * sorts the list (unless unsorted operation is requested).  Returns 0 + * if things went well, nonzero if errors occurred. + */ +static int +glob0(const Char *pattern, glob_t *pglob, int *limit) +{ +    const Char *qpatnext; +    int c, err, oldpathc; +    Char *bufnext, patbuf[MAXPATHLEN]; + +    qpatnext = globtilde(pattern, patbuf, MAXPATHLEN, pglob); +    oldpathc = pglob->gl_pathc; +    bufnext = patbuf; + +    /* We don't need to check for buffer overflow any more. */ +    while ((c = (char)*qpatnext++) != EOS) { +        switch (c) { +        case LBRACKET: +            c = (char)*qpatnext; +            if (c == NOT) +                ++qpatnext; +            if (*qpatnext == EOS || +                g_strchr((Char *) qpatnext+1, RBRACKET) == NULL) { +                *bufnext++ = LBRACKET; +                if (c == NOT) +                    --qpatnext; +                break; +            } +            *bufnext++ = M_SET; +            if (c == NOT) +                *bufnext++ = M_NOT; +            c = (char)*qpatnext++; +            do { +                *bufnext++ = CHAR(c); +                if (*qpatnext == RANGE && +                    (c = (char)qpatnext[1]) != RBRACKET) { +                    *bufnext++ = M_RNG; +                    *bufnext++ = CHAR(c); +                    qpatnext += 2; +                } +            } while ((c = (char)*qpatnext++) != RBRACKET); +            pglob->gl_flags |= GLOB_MAGCHAR; +            *bufnext++ = M_END; +            break; +        case QUESTION: +            pglob->gl_flags |= GLOB_MAGCHAR; +            *bufnext++ = M_ONE; +            break; +        case STAR: +            pglob->gl_flags |= GLOB_MAGCHAR; +            /* collapse adjacent stars to one, +             * to avoid exponential behavior +             */ +            if (bufnext == patbuf || bufnext[-1] != M_ALL) +                *bufnext++ = M_ALL; +            break; +        default: +            *bufnext++ = CHAR(c); +            break; +        } +    } +    *bufnext = EOS; +#ifdef DEBUG +    qprintf("glob0:", patbuf); +#endif + +    if ((err = glob1(patbuf, pglob, limit)) != 0) +        return(err); + +    /* +     * If there was no match we are going to append the pattern +     * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified +     * and the pattern did not contain any magic characters +     * GLOB_NOMAGIC is there just for compatibility with csh. +     */ +    if (pglob->gl_pathc == oldpathc) { +        if (((pglob->gl_flags & GLOB_NOCHECK) || +            ((pglob->gl_flags & GLOB_NOMAGIC) && +            !(pglob->gl_flags & GLOB_MAGCHAR)))) +            return(globextend(pattern, pglob, limit)); +        else +            return(GLOB_NOMATCH); +    } +    if (!(pglob->gl_flags & GLOB_NOSORT)) +        qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc, +            pglob->gl_pathc - oldpathc, sizeof(char *), compare); +    return(0); +} + +static int +compare(const void *p, const void *q) +{ +    return(strcmp(*(char **)p, *(char **)q)); +} + +static int +glob1(Char *pattern, glob_t *pglob, int *limit) +{ +    Char pathbuf[MAXPATHLEN]; + +    /* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */ +    if (*pattern == EOS) +        return(0); +    return(glob2(pathbuf, pathbuf, pathbuf + MAXPATHLEN - 1, +        pattern, pglob, limit)); +} + +/* + * The functions glob2 and glob3 are mutually recursive; there is one level + * of recursion for each segment in the pattern that contains one or more + * meta characters. + */ +static int +glob2(Char *pathbuf, Char *pathend, Char *pathend_last, Char *pattern, glob_t *pglob, int *limit) +{ +    struct stat sb; +    Char *p, *q; +    int anymeta; + +    /* +     * Loop over pattern segments until end of pattern or until +     * segment with meta character found. +     */ +    for (anymeta = 0;;) { +        if (*pattern == EOS) {        /* End of pattern? */ +            *pathend = EOS; +            if (g_lstat(pathbuf, &sb, pglob)) +                return(0); + +            if (((pglob->gl_flags & GLOB_MARK) && +                pathend[-1] != SEP) && (S_ISDIR(sb.st_mode))) { +                if (pathend + 1 > pathend_last) +                    return (GLOB_ABORTED); +                *pathend++ = SEP; +                *pathend = EOS; +            } +            ++pglob->gl_matchc; +            return(globextend(pathbuf, pglob, limit)); +        } + +        /* Find end of next segment, copy tentatively to pathend. */ +        q = pathend; +        p = pattern; +        while (*p != EOS && *p != SEP) { +            if (ismeta(*p)) +                anymeta = 1; +            if (q + 1 > pathend_last) +                return (GLOB_ABORTED); +            *q++ = *p++; +        } + +        if (!anymeta) {        /* No expansion, do next segment. */ +            pathend = q; +            pattern = p; +            while (*pattern == SEP) { +                if (pathend + 1 > pathend_last) +                    return (GLOB_ABORTED); +                *pathend++ = *pattern++; +            } +        } else            /* Need expansion, recurse. */ +            return(glob3(pathbuf, pathend, pathend_last, pattern, p, +                pglob, limit)); +    } +    /* NOTREACHED */ +} + +static int +glob3(Char *pathbuf, Char *pathend, Char *pathend_last, Char *pattern, Char *restpattern, glob_t *pglob, int *limit) +{ +    struct dirent *dp; +    DIR *dirp; +    int err; +    char buf[MAXPATHLEN]; + +    /* +     * The readdirfunc declaration can't be prototyped, because it is +     * assigned, below, to two functions which are prototyped in glob.h +     * and dirent.h as taking pointers to differently typed opaque +     * structures. +     */ +    typedef struct dirent *(*readdirfunc_t)(void*); +    readdirfunc_t readdirfunc; + +    if (pathend > pathend_last) +        return (GLOB_ABORTED); +    *pathend = EOS; +    errno = 0; + +    if ((dirp = g_opendir(pathbuf, pglob)) == NULL) { +        /* TODO: don't call for ENOENT or ENOTDIR? */ +        if (pglob->gl_errfunc) { +            if (g_Ctoc(pathbuf, buf, sizeof(buf))) +                return (GLOB_ABORTED); +            if (pglob->gl_errfunc(buf, errno) || +                pglob->gl_flags & GLOB_ERR) +                return (GLOB_ABORTED); +        } +        return(0); +    } + +    err = 0; + +    /* Search directory for matching names. */ +    if (pglob->gl_flags & GLOB_ALTDIRFUNC) +        readdirfunc = pglob->gl_readdir; +    else +        readdirfunc = (readdirfunc_t)readdir; +    while ((dp = (*readdirfunc)(dirp))) { +        u_char *sc; +        Char *dc; +        wchar_t wc; +        size_t clen; +        mbstate_t mbs; + +        /* Initial DOT must be matched literally. */ +        if (dp->d_name[0] == DOT && *pattern != DOT) +            continue; +        memset(&mbs, 0, sizeof(mbs)); +        dc = pathend; +        sc = (u_char *) dp->d_name; +        while (dc < pathend_last) { +            clen = mbrtowc(&wc, (const char*)sc, MB_LEN_MAX, &mbs); +            if (clen == (size_t)-1 || clen == (size_t)-2) { +                wc = *sc; +                clen = 1; +                memset(&mbs, 0, sizeof(mbs)); +            } +            if ((*dc++ = wc) == EOS) +                break; +            sc += clen; +        } +        if (!match(pathend, pattern, restpattern)) { +            *pathend = EOS; +            continue; +        } +        err = glob2(pathbuf, --dc, pathend_last, restpattern, +            pglob, limit); +        if (err) +            break; +    } + +    if (pglob->gl_flags & GLOB_ALTDIRFUNC) +        (*pglob->gl_closedir)(dirp); +    else +        closedir(dirp); +    return(err); +} + + +/* + * Extend the gl_pathv member of a glob_t structure to accomodate a new item, + * add the new item, and update gl_pathc. + * + * This assumes the BSD realloc, which only copies the block when its size + * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic + * behavior. + * + * Return 0 if new item added, error code if memory couldn't be allocated. + * + * Invariant of the glob_t structure: + *    Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and + *    gl_pathv points to (gl_offs + gl_pathc + 1) items. + */ +static int +globextend(const Char *path, glob_t *pglob, int *limit) +{ +    char **pathv; +    int i; +    size_t newsize, len; +    char *copy; +    const Char *p; + +    if (*limit && pglob->gl_pathc > *limit) { +        errno = 0; +        return (GLOB_NOSPACE); +    } + +    newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs); +    pathv = pglob->gl_pathv ? +            (char**)realloc((char *)pglob->gl_pathv, newsize) : +            (char**)malloc(newsize); +    if (pathv == NULL) { +        if (pglob->gl_pathv) { +            free(pglob->gl_pathv); +            pglob->gl_pathv = NULL; +        } +        return(GLOB_NOSPACE); +    } + +    if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) { +        /* first time around -- clear initial gl_offs items */ +        pathv += pglob->gl_offs; +        for (i = pglob->gl_offs; --i >= 0; ) +            *--pathv = NULL; +    } +    pglob->gl_pathv = pathv; + +    for (p = path; *p++;) +        continue; +    len = MB_CUR_MAX * (size_t)(p - path);    /* XXX overallocation */ +    if ((copy = (char*)malloc(len)) != NULL) { +        if (g_Ctoc(path, copy, (u_int)len)) { +            free(copy); +            return (GLOB_NOSPACE); +        } +        pathv[pglob->gl_offs + pglob->gl_pathc++] = copy; +    } +    pathv[pglob->gl_offs + pglob->gl_pathc] = NULL; +    return(copy == NULL ? GLOB_NOSPACE : 0); +} + +/* + * pattern matching function for filenames.  Each occurrence of the * + * pattern causes a recursion level. + */ +static int +match(Char *name, Char *pat, Char *patend) +{ +    int ok, negate_range; +    Char c, k; + +    while (pat < patend) { +        c = *pat++; +        switch (c & M_MASK) { +        case M_ALL: +            if (pat == patend) +                return(1); +            do +                if (match(name, pat, patend)) +                    return(1); +            while (*name++ != EOS); +            return(0); +        case M_ONE: +            if (*name++ == EOS) +                return(0); +            break; +        case M_SET: +            ok = 0; +            if ((k = *name++) == EOS) +                return(0); +            if ((negate_range = ((*pat & M_MASK) == M_NOT)) != EOS) +                ++pat; +            while (((c = *pat++) & M_MASK) != M_END) +                if ((*pat & M_MASK) == M_RNG) { +                    if (__collate_load_error ? +                        CHAR(c) <= CHAR(k) && CHAR(k) <= CHAR(pat[1]) : +                           __collate_range_cmp((int)CHAR(c), (int)CHAR(k)) <= 0 +                        && __collate_range_cmp((int)CHAR(k), (int)CHAR(pat[1])) <= 0 +                       ) +                        ok = 1; +                    pat += 2; +                } else if (c == k) +                    ok = 1; +            if (ok == negate_range) +                return(0); +            break; +        default: +            if (*name++ != c) +                return(0); +            break; +        } +    } +    return(*name == EOS); +} + +/* Free allocated data belonging to a glob_t structure. */ +void +globfree(glob_t *pglob) +{ +    int i; +    char **pp; + +    if (pglob->gl_pathv != NULL) { +        pp = pglob->gl_pathv + pglob->gl_offs; +        for (i = pglob->gl_pathc; i--; ++pp) +            if (*pp) +                free(*pp); +        free(pglob->gl_pathv); +        pglob->gl_pathv = NULL; +    } +} + +static DIR * +g_opendir(Char *str, glob_t *pglob) +{ +    char buf[MAXPATHLEN]; + +    if (!*str) +        strcpy(buf, "."); +    else { +        if (g_Ctoc(str, buf, sizeof(buf))) +            return (NULL); +    } + +    if (pglob->gl_flags & GLOB_ALTDIRFUNC) +        return (DIR*)((*pglob->gl_opendir)(buf)); + +    return(opendir(buf)); +} + +static int +g_lstat(Char *fn, struct stat *sb, glob_t *pglob) +{ +    char buf[MAXPATHLEN]; + +    if (g_Ctoc(fn, buf, sizeof(buf))) { +        errno = ENAMETOOLONG; +        return (-1); +    } +    if (pglob->gl_flags & GLOB_ALTDIRFUNC) +        return((*pglob->gl_lstat)(buf, sb)); +    return(lstat(buf, sb)); +} + +static Char * +g_strchr(Char *str, wchar_t ch) +{ +    do { +        if (*str == ch) +            return (str); +    } while (*str++); +    return (NULL); +} + +static int +g_Ctoc(const Char *str, char *buf, u_int len) +{ +    mbstate_t mbs; +    size_t clen; + +    memset(&mbs, 0, sizeof(mbs)); +    while ((int)len >= MB_CUR_MAX) { +        clen = wcrtomb(buf, (wchar_t)*str, &mbs); +        if (clen == (size_t)-1) +            return (1); +        if (*str == L'\0') +            return (0); +        str++; +        buf += clen; +        len -= (u_int)clen; +    } +    return (1); +} + +#ifdef DEBUG +static void +qprintf(const char *str, Char *s) +{ +    Char *p; + +    (void)printf("%s:\n", str); +    for (p = s; *p; p++) +        (void)printf("%c", CHAR(*p)); +    (void)printf("\n"); +    for (p = s; *p; p++) +        (void)printf("%c", *p & M_PROTECT ? '"' : ' '); +    (void)printf("\n"); +    for (p = s; *p; p++) +        (void)printf("%c", ismeta(*p) ? '_' : ' '); +    (void)printf("\n"); +} +#endif +#endif diff --git a/library/cpp/regex/glob/glob_compat.h b/library/cpp/regex/glob/glob_compat.h new file mode 100644 index 00000000000..0dc518d51bd --- /dev/null +++ b/library/cpp/regex/glob/glob_compat.h @@ -0,0 +1,73 @@ +#pragma once + +#include <util/system/defaults.h> + +#if defined(_MSC_VER) || defined(_bionic_) +#define USE_INTERNAL_GLOB +#endif + +#if !defined(USE_INTERNAL_GLOB) +#include <glob.h> +#else + +struct stat; +typedef struct { +    int gl_pathc;    /* Count of total paths so far. */ +    int gl_matchc;   /* Count of paths matching pattern. */ +    int gl_offs;     /* Reserved at beginning of gl_pathv. */ +    int gl_flags;    /* Copy of flags parameter to glob. */ +    char** gl_pathv; /* List of paths matching pattern. */ +                     /* Copy of errfunc parameter to glob. */ +    int (*gl_errfunc)(const char*, int); + +    /* +     * Alternate filesystem access methods for glob; replacement +     * versions of closedir(3), readdir(3), opendir(3), stat(2) +     * and lstat(2). +     */ +    void (*gl_closedir)(void*); +    struct dirent* (*gl_readdir)(void*); +    void* (*gl_opendir)(const char*); +    int (*gl_lstat)(const char*, struct stat*); +    int (*gl_stat)(const char*, struct stat*); +} glob_t; + +//#if __POSIX_VISIBLE >= 199209 +/* Believed to have been introduced in 1003.2-1992 */ +#define GLOB_APPEND 0x0001     /* Append to output from previous call. */ +#define GLOB_DOOFFS 0x0002     /* Use gl_offs. */ +#define GLOB_ERR 0x0004        /* Return on error. */ +#define GLOB_MARK 0x0008       /* Append / to matching directories. */ +#define GLOB_NOCHECK 0x0010    /* Return pattern itself if nothing matches. */ +#define GLOB_NOSORT 0x0020     /* Don't sort. */ +#define GLOB_NOESCAPE 0x2000   /* Disable backslash escaping. */ + +/* Error values returned by glob(3) */ +#define GLOB_NOSPACE (-1)      /* Malloc call failed. */ +#define GLOB_ABORTED (-2)      /* Unignored error. */ +#define GLOB_NOMATCH (-3)      /* No match and GLOB_NOCHECK was not set. */ +#define GLOB_NOSYS (-4)        /* Obsolete: source comptability only. */ +//#endif /* __POSIX_VISIBLE >= 199209 */ + +//#if __BSD_VISIBLE +#define GLOB_ALTDIRFUNC 0x0040 /* Use alternately specified directory funcs. */ +#define GLOB_BRACE 0x0080      /* Expand braces ala csh. */ +#define GLOB_MAGCHAR 0x0100    /* Pattern had globbing characters. */ +#define GLOB_NOMAGIC 0x0200    /* GLOB_NOCHECK without magic chars (csh). */ +#define GLOB_QUOTE 0x0400      /* Quote special chars with \. */ +#define GLOB_TILDE 0x0800      /* Expand tilde names from the passwd file. */ +#define GLOB_LIMIT 0x1000      /* limit number of returned paths */ + +/* source compatibility, these are the old names */ +#define GLOB_MAXPATH GLOB_LIMIT +#define GLOB_ABEND GLOB_ABORTED +//#endif /* __BSD_VISIBLE */ + +int glob(const char*, int, int (*)(const char*, int), glob_t*); +void globfree(glob_t*); + +#endif /* _MSC_VER */ + +#if !defined(FROM_IMPLEMENTATION) +#undef USE_INTERNAL_GLOB +#endif diff --git a/library/cpp/regex/glob/glob_iterator.cpp b/library/cpp/regex/glob/glob_iterator.cpp new file mode 100644 index 00000000000..746b49f3975 --- /dev/null +++ b/library/cpp/regex/glob/glob_iterator.cpp @@ -0,0 +1 @@ +#include "glob_iterator.h" diff --git a/library/cpp/regex/glob/glob_iterator.h b/library/cpp/regex/glob/glob_iterator.h new file mode 100644 index 00000000000..e25481e594e --- /dev/null +++ b/library/cpp/regex/glob/glob_iterator.h @@ -0,0 +1,36 @@ +#pragma once + +#include "glob_compat.h" + +#include <util/generic/noncopyable.h> +#include <util/generic/string.h> +#include <util/generic/yexception.h> + +class TGlobPaths : TNonCopyable { +public: +    TGlobPaths(const char* pattern) { +        Impl.gl_pathc = 0; +        int result = glob(pattern, 0, nullptr, &Impl); +        Y_ENSURE(result == 0 || result == GLOB_NOMATCH, "glob failed"); +    } + +    TGlobPaths(const TString& pattern) +        : TGlobPaths(pattern.data()) +    { +    } + +    ~TGlobPaths() { +        globfree(&Impl); +    } + +    const char** begin() { +        return const_cast<const char**>(Impl.gl_pathv); +    } + +    const char** end() { +        return const_cast<const char**>(Impl.gl_pathv + Impl.gl_pathc); +    } + +private: +    glob_t Impl; +}; diff --git a/library/cpp/regex/glob/ya.make b/library/cpp/regex/glob/ya.make new file mode 100644 index 00000000000..9379742d999 --- /dev/null +++ b/library/cpp/regex/glob/ya.make @@ -0,0 +1,12 @@ +LIBRARY() + +SRCS( +    glob.cpp +    glob_iterator.cpp +) + +PEERDIR( +    library/cpp/charset +) + +END() | 
