aboutsummaryrefslogblamecommitdiffstats
path: root/contrib/libs/hyperscan/src/nfa/lbr_common_impl.h
blob: 5ae35431e43d9b0e9847f46c7a5d5a0d11fe2569 (plain) (tree)
1
2
  
                                             






































































                                                                                
                                            



















                                                                        







                                                                                














































































































                                                                                     
                                                                  


















































































































































































































































                                                                                    
/*
 * Copyright (c) 2015-2016, Intel Corporation
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *  * Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 *  * Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *  * Neither the name of Intel Corporation nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/** \file
 * \brief Large Bounded Repeat (LBR) engine: runtime impl X-macros.
 */

#include "util/join.h"

#define ENGINE_EXEC_NAME JOIN(nfaExecLbr, ENGINE_ROOT_NAME)
#define EXEC_FN JOIN(lbrExec, ENGINE_ROOT_NAME)
#define FWDSCAN_FN JOIN(lbrFwdScan, ENGINE_ROOT_NAME)
#define REVSCAN_FN JOIN(lbrRevScan, ENGINE_ROOT_NAME)

char JOIN(ENGINE_EXEC_NAME, _queueCompressState)(const struct NFA *nfa,
                                                 const struct mq *q, s64a loc) {
    assert(nfa && q);
    assert(isLbrType(nfa->type));
    DEBUG_PRINTF("entry, q->offset=%llu, loc=%lld\n", q->offset, loc);

    const struct lbr_common *l = getImplNfa(nfa);
    const struct lbr_state *lstate = (const struct lbr_state *)q->state;

    u64a offset = q->offset + loc;
    lbrCompressState(l, offset, lstate, q->streamState);
    return 0;
}

char JOIN(ENGINE_EXEC_NAME, _expandState)(const struct NFA *nfa, void *dest,
                                          const void *src, u64a offset,
                                          UNUSED u8 key) {
    assert(nfa);
    assert(isLbrType(nfa->type));
    DEBUG_PRINTF("entry, offset=%llu\n", offset);

    const struct lbr_common *l = getImplNfa(nfa);
    struct lbr_state *lstate = (struct lbr_state *)dest;
    lbrExpandState(l, offset, src, lstate);
    return 0;
}

char JOIN(ENGINE_EXEC_NAME, _reportCurrent)(const struct NFA *nfa,
                                            struct mq *q) {
    assert(nfa && q);
    assert(isLbrType(nfa->type));

    const struct lbr_common *l = getImplNfa(nfa);
    u64a offset = q_cur_offset(q);
    DEBUG_PRINTF("firing match %u at %llu\n", l->report, offset);
    q->cb(0, offset, l->report, q->context);
    return 0;
}

char JOIN(ENGINE_EXEC_NAME, _inAccept)(const struct NFA *nfa,
                                       ReportID report, struct mq *q) {
    assert(nfa && q);
    assert(isLbrType(nfa->type));
    DEBUG_PRINTF("entry\n");

    const struct lbr_common *l = getImplNfa(nfa);
    const struct RepeatInfo *info = getRepeatInfo(l);
    const struct lbr_state *lstate = (const struct lbr_state *)q->state;
    if (repeatIsDead(info, lstate)) {
        DEBUG_PRINTF("repeat is dead\n");
        return 0;
    }

    u64a offset = q->offset + q_last_loc(q);
    return lbrInAccept(l, lstate, q->streamState, offset, report);
}

char JOIN(ENGINE_EXEC_NAME, _inAnyAccept)(const struct NFA *nfa, struct mq *q) {
    assert(nfa && q);
    assert(isLbrType(nfa->type));
    DEBUG_PRINTF("entry\n");

    const struct lbr_common *l = getImplNfa(nfa);
    return JOIN(ENGINE_EXEC_NAME, _inAccept)(nfa, l->report, q);
}

char JOIN(ENGINE_EXEC_NAME, _queueInitState)(const struct NFA *nfa,
                                             struct mq *q) {
    assert(nfa && q);
    assert(isLbrType(nfa->type));
    DEBUG_PRINTF("entry\n");

    const struct lbr_common *l = getImplNfa(nfa);
    const struct RepeatInfo *info = getRepeatInfo(l);

    assert(q->state);
    struct lbr_state *lstate = (struct lbr_state *)q->state;
    assert(ISALIGNED(lstate));

    lstate->lastEscape = 0;
    clearRepeat(info, lstate);

    return 0;
}

char JOIN(ENGINE_EXEC_NAME, _initCompressedState)(const struct NFA *nfa,
                                                  u64a offset,
                                                  void *state, UNUSED u8 key) {
    assert(nfa && state);
    assert(isLbrType(nfa->type));
    DEBUG_PRINTF("entry\n");

    const struct lbr_common *l = getImplNfa(nfa);
    const struct RepeatInfo *info = getRepeatInfo(l);
    struct lbr_state lstate; // temp control block on stack.
    clearRepeat(info, &lstate);
    lbrTop(l, &lstate, state, offset);
    lbrCompressState(l, offset, &lstate, state);

    return 1; // LBR is alive
}

// FIXME: this function could be much simpler for a Dot LBR, as all it needs to
// do is find the next top.
static really_inline
char JOIN(ENGINE_EXEC_NAME, _TopScan)(const struct NFA *nfa, struct mq *q,
                                      s64a end) {
    const struct lbr_common *l = getImplNfa(nfa);
    const struct RepeatInfo *info = getRepeatInfo(l);

    const u64a offset = q->offset;
    struct lbr_state *lstate = (struct lbr_state *)q->state;
    assert(ISALIGNED(lstate));

    assert(repeatIsDead(info, lstate));
    assert(q->cur < q->end);

    DEBUG_PRINTF("entry, end=%lld, offset=%llu, lastEscape=%llu\n", end,
                  offset, lstate->lastEscape);

    while (1) {
        // Find the next top with location >= the last escape we saw.
        for (; q->cur < q->end && q_cur_loc(q) <= end; q->cur++) {
            u32 event = q_cur_type(q);
            if ((event == MQE_TOP || event == MQE_TOP_FIRST) &&
                q_cur_offset(q) >= lstate->lastEscape) {
                goto found_top;
            }
            DEBUG_PRINTF("skip event type=%u offset=%lld\n", event, q_cur_offset(q));
        }

        // No more tops, we're done.
        break;

found_top:;
        assert(q->cur < q->end);

        u64a sp = q_cur_offset(q);
        u64a first_match = sp + info->repeatMin;
        DEBUG_PRINTF("first possible match is at %llu\n", first_match);

        u64a ep = MIN(MIN(end, (s64a)q->length) + offset, first_match);
        if (ep > sp && sp >= offset) {
            size_t eloc;
            DEBUG_PRINTF("rev b%llu e%llu/%zu\n", sp - offset, ep - offset,
                         q->length);
            assert(ep - offset <= q->length);
            if (REVSCAN_FN(nfa, q->buffer, sp - offset, ep - offset, &eloc)) {
                DEBUG_PRINTF("escape found at %llu\n", offset + eloc);
                lstate->lastEscape = eloc;
                q->cur++;
                continue;
            }
        }

        lbrTop(l, lstate, q->streamState, sp);
        return 1;
    }

    DEBUG_PRINTF("exhausted queue\n");
    return 0;
}

static really_inline
char JOIN(ENGINE_EXEC_NAME, _Q_i)(const struct NFA *nfa, struct mq *q,
                                  s64a end, enum MatchMode mode) {
    assert(nfa && q);
    assert(isLbrType(nfa->type));

    const struct lbr_common *l = getImplNfa(nfa);
    const struct RepeatInfo *info = getRepeatInfo(l);

    struct lbr_state *lstate = (struct lbr_state *)q->state;
    assert(ISALIGNED(lstate));


    if (q->report_current) {
        DEBUG_PRINTF("report_current: fire match at %llu\n", q_cur_offset(q));
        int rv = q->cb(0, q_cur_offset(q), l->report, q->context);
        q->report_current = 0;
        if (rv == MO_HALT_MATCHING) {
            return MO_HALT_MATCHING;
        }
    }

    if (q->cur == q->end) {
        return 1;
    }

    assert(q->cur + 1 < q->end); /* require at least two items */
    assert(q_cur_type(q) == MQE_START);
    u64a sp = q_cur_offset(q);
    q->cur++;
    DEBUG_PRINTF("sp=%llu, abs_end=%llu\n", sp, end + q->offset);

    while (q->cur < q->end) {
        DEBUG_PRINTF("q item type=%d offset=%llu\n", q_cur_type(q),
                     q_cur_offset(q));

        assert(sp >= q->offset); // not in history

        if (repeatIsDead(info, lstate)) {
            DEBUG_PRINTF("repeat is currently dead, skipping scan\n");
            goto scan_done;
        }

        u64a ep = q_cur_offset(q);
        ep = MIN(ep, q->offset + end);
        if (sp < ep) {
            size_t eloc = 0;
            char escape_found = 0;
            DEBUG_PRINTF("scanning from sp=%llu to ep=%llu\n", sp, ep);
            assert(sp >= q->offset && ep >= q->offset);
            if (FWDSCAN_FN(nfa, q->buffer, sp - q->offset, ep - q->offset, &eloc)) {
                escape_found = 1;
                ep = q->offset + eloc;
                DEBUG_PRINTF("escape found at %llu\n", ep);
                assert(ep >= sp);
            }

            assert(sp <= ep);

            if (mode == STOP_AT_MATCH) {
                size_t mloc;
                if (lbrFindMatch(l, sp, ep, lstate, q->streamState, &mloc)) {
                    DEBUG_PRINTF("storing match at %llu\n", sp + mloc);
                    q->cur--;
                    assert(q->cur < MAX_MQE_LEN);
                    q->items[q->cur].type = MQE_START;
                    q->items[q->cur].location = (s64a)(sp - q->offset) + mloc;
                    return MO_MATCHES_PENDING;
                }
            } else {
                assert(mode == CALLBACK_OUTPUT);
                char rv = lbrMatchLoop(l, sp, ep, lstate, q->streamState, q->cb,
                                       q->context);
                if (rv == MO_HALT_MATCHING) {
                    return MO_HALT_MATCHING;
                }
                assert(rv == MO_CONTINUE_MATCHING);
            }

            if (escape_found) {
                DEBUG_PRINTF("clearing repeat due to escape\n");
                clearRepeat(info, lstate);
            }
        }

    scan_done:
        if (q_cur_loc(q) > end) {
            q->cur--;
            assert(q->cur < MAX_MQE_LEN);
            q->items[q->cur].type = MQE_START;
            q->items[q->cur].location = end;
            return MO_ALIVE;
        }

        if (repeatIsDead(info, lstate)) {
            if (!JOIN(ENGINE_EXEC_NAME, _TopScan)(nfa, q, end)) {
                assert(repeatIsDead(info, lstate));
                if (q->cur < q->end && q_cur_loc(q) > end) {
                    q->cur--;
                    assert(q->cur < MAX_MQE_LEN);
                    q->items[q->cur].type = MQE_START;
                    q->items[q->cur].location = end;
                    return MO_ALIVE;
                }
                return 0;
            }
            DEBUG_PRINTF("cur offset = %llu\n", q_cur_offset(q));
        } else {
            switch (q_cur_type(q)) {
            case MQE_TOP:
            case MQE_TOP_FIRST:
                lbrTop(l, lstate, q->streamState, q_cur_offset(q));
                break;
            case MQE_START:
            case MQE_END:
                break;
            default:
                DEBUG_PRINTF("unhandled event %d!\n", q_cur_type(q));
                assert(0);
                break;
            }
        }

        sp = q_cur_offset(q);
        q->cur++;
    }

    return lbrIsAlive(l, lstate, q->streamState, sp);
}

char JOIN(ENGINE_EXEC_NAME, _Q)(const struct NFA *nfa, struct mq *q, s64a end) {
    DEBUG_PRINTF("entry, offset=%llu, end=%lld\n", q->offset, end);
    return JOIN(ENGINE_EXEC_NAME, _Q_i)(nfa, q, end, CALLBACK_OUTPUT);
}

char JOIN(ENGINE_EXEC_NAME, _Q2)(const struct NFA *nfa, struct mq *q, s64a end) {
    DEBUG_PRINTF("entry, offset=%llu, end=%lld\n", q->offset, end);
    return JOIN(ENGINE_EXEC_NAME, _Q_i)(nfa, q, end, STOP_AT_MATCH);
}

static really_inline
void JOIN(ENGINE_EXEC_NAME, _StreamSilent)(const struct NFA *nfa, struct mq *q,
                                           const u8 *buf, size_t length) {
    const struct lbr_common *l = getImplNfa(nfa);
    const struct RepeatInfo *info = getRepeatInfo(l);
    struct lbr_state *lstate = (struct lbr_state *)q->state;
    assert(ISALIGNED(lstate));

    assert(!repeatIsDead(info, lstate));

    // This call doesn't produce matches, so we elide the lbrMatchLoop call
    // entirely and just do escape scans to maintain the repeat.

    size_t eloc = 0;
    char escaped = FWDSCAN_FN(nfa, buf, 0, length, &eloc);
    if (escaped) {
        assert(eloc < length);
        DEBUG_PRINTF("escape found at %zu, clearing repeat\n", eloc);
        clearRepeat(info, lstate);
    }
}

// Rose infix path.
char JOIN(ENGINE_EXEC_NAME, _QR)(const struct NFA *nfa, struct mq *q,
                                 ReportID report) {
    assert(nfa && q);
    assert(isLbrType(nfa->type));

    if (q->cur == q->end) {
        return 1;
    }

    assert(q->cur + 1 < q->end); /* require at least two items */
    assert(q_cur_type(q) == MQE_START);
    u64a sp = q_cur_offset(q);
    q->cur++;
    DEBUG_PRINTF("sp=%llu\n", sp);

    const struct lbr_common *l = getImplNfa(nfa);
    const struct RepeatInfo *info = getRepeatInfo(l);
    struct lbr_state *lstate = (struct lbr_state *)q->state;
    assert(ISALIGNED(lstate));
    const s64a lastLoc = q_last_loc(q);

    while (q->cur < q->end) {
        DEBUG_PRINTF("q item type=%d offset=%llu\n", q_cur_type(q),
                     q_cur_offset(q));

        if (repeatIsDead(info, lstate)) {
            DEBUG_PRINTF("repeat is dead\n");
            goto scan_done;
        }

        u64a ep = q_cur_offset(q);

        if (sp < q->offset) {
            DEBUG_PRINTF("HISTORY BUFFER SCAN\n");
            assert(q->offset - sp <= q->hlength);
            u64a local_ep = MIN(q->offset, ep);
            const u8 *ptr = q->history + q->hlength + sp - q->offset;
            JOIN(ENGINE_EXEC_NAME, _StreamSilent)(nfa, q, ptr, local_ep - sp);
            sp = local_ep;
        }

        if (repeatIsDead(info, lstate)) {
            DEBUG_PRINTF("repeat is dead\n");
            goto scan_done;
        }

        if (sp < ep) {
            DEBUG_PRINTF("MAIN BUFFER SCAN\n");
            assert(ep - q->offset <= q->length);
            const u8 *ptr = q->buffer + sp - q->offset;
            JOIN(ENGINE_EXEC_NAME, _StreamSilent)(nfa, q, ptr, ep - sp);
        }

        if (repeatIsDead(info, lstate)) {
scan_done:
            if (!JOIN(ENGINE_EXEC_NAME, _TopScan)(nfa, q, lastLoc)) {
                assert(repeatIsDead(info, lstate));
                assert(q->cur == q->end);
                return 0;
            }
        } else {
            switch (q_cur_type(q)) {
            case MQE_TOP:
            case MQE_TOP_FIRST:
                lbrTop(l, lstate, q->streamState, q_cur_offset(q));
                break;
            case MQE_START:
            case MQE_END:
                break;
            default:
                DEBUG_PRINTF("unhandled event %d!\n", q_cur_type(q));
                assert(0);
                break;
            }
        }

        sp = q_cur_offset(q);
        q->cur++;
    }

    if (repeatIsDead(info, lstate)) {
        DEBUG_PRINTF("repeat is dead\n");
        return 0;
    }

    if (lbrInAccept(l, lstate, q->streamState, sp, report)) {
        return MO_MATCHES_PENDING;
    }

    return lbrIsActive(l, lstate, q->streamState, sp);
}

#undef ENGINE_EXEC_NAME
#undef EXEC_FN
#undef FWDSCAN_FN
#undef REVSCAN_FN
#undef ENGINE_ROOT_NAME