/*
 * Copyright (c) 2015-2018, Intel Corporation
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *  * Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 *  * Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *  * Neither the name of Intel Corporation nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/** \file
 * \brief NG and graph handling.
 */
#include "ng.h"

#include "grey.h"
#include "ng_anchored_acyclic.h"
#include "ng_anchored_dots.h"
#include "ng_asserts.h"
#include "ng_calc_components.h"
#include "ng_cyclic_redundancy.h"
#include "ng_dump.h"
#include "ng_edge_redundancy.h"
#include "ng_equivalence.h"
#include "ng_extparam.h"
#include "ng_fixed_width.h"
#include "ng_fuzzy.h"
#include "ng_haig.h"
#include "ng_literal_component.h"
#include "ng_literal_decorated.h"
#include "ng_misc_opt.h"
#include "ng_puff.h"
#include "ng_prefilter.h"
#include "ng_prune.h"
#include "ng_redundancy.h"
#include "ng_region.h"
#include "ng_region_redundancy.h"
#include "ng_reports.h"
#include "ng_sep.h"
#include "ng_small_literal_set.h"
#include "ng_som.h"
#include "ng_vacuous.h"
#include "ng_violet.h"
#include "ng_utf8.h"
#include "ng_util.h"
#include "ng_width.h"
#include "ue2common.h"
#include "compiler/compiler.h"
#include "nfa/goughcompile.h"
#include "rose/rose_build.h"
#include "smallwrite/smallwrite_build.h"
#include "util/compile_error.h"
#include "util/container.h"
#include "util/depth.h"
#include "util/graph_range.h"
#include "util/make_unique.h"
#include "util/ue2string.h"

using namespace std;

namespace ue2 {

NG::NG(const CompileContext &in_cc, size_t num_patterns,
       unsigned in_somPrecision)
    : maxSomRevHistoryAvailable(in_cc.grey.somMaxRevNfaLength),
      minWidth(depth::infinity()),
      rm(in_cc.grey),
      ssm(in_somPrecision),
      cc(in_cc),
      smwr(makeSmallWriteBuilder(num_patterns, rm, cc)),
      rose(makeRoseBuilder(rm, ssm, *smwr, cc, boundary)) {
}

NG::~NG() {
    // empty
}

/** \brief SOM handling code, called by \ref addComponent.
 *
 * \return true if the component was handled completely by something (e.g. a
 * Haig outfix), false if SOM could be established but implementation via an
 * engine will be required.
 *
 * \throw CompileError if SOM cannot be supported for the component.
 */
static
bool addComponentSom(NG &ng, NGHolder &g, const ExpressionInfo &expr,
                     const som_type som, const u32 comp_id) {
    DEBUG_PRINTF("doing som\n");
    dumpComponent(g, "03_presom", expr.index, comp_id, ng.cc.grey);
    assert(hasCorrectlyNumberedVertices(g));
    assert(allMatchStatesHaveReports(g));

    // First, we try the "SOM chain" support in ng_som.cpp.

    sombe_rv rv = doSom(ng, g, expr, comp_id, som);
    if (rv == SOMBE_HANDLED_INTERNAL) {
        return false;
    } else if (rv == SOMBE_HANDLED_ALL) {
        return true;
    }
    assert(rv == SOMBE_FAIL);

    /* Next, Sombe style approaches */
    rv = doSomWithHaig(ng, g, expr, comp_id, som);
    if (rv == SOMBE_HANDLED_INTERNAL) {
        return false;
    } else if (rv == SOMBE_HANDLED_ALL) {
        return true;
    }
    assert(rv == SOMBE_FAIL);

    // If the previous approach could not support this pattern, we try treating
    // it monolithically, as a Haig outfix.

    vector<vector<CharReach> > triggers; /* empty for outfix */

    assert(g.kind == NFA_OUTFIX);
    dumpComponent(g, "haig", expr.index, comp_id, ng.cc.grey);
    makeReportsSomPass(ng.rm, g);
    auto haig = attemptToBuildHaig(g, som, ng.ssm.somPrecision(), triggers,
                                   ng.cc.grey);
    if (haig) {
        DEBUG_PRINTF("built haig outfix\n");
        ng.rose->addOutfix(g, *haig);
        return true;
    }

    /* Our various strategies for supporting SOM for this pattern have failed.
     * Provide a generic pattern not supported/too large return value as it is
     * unclear what the meaning of a specific SOM error would be */
    throw CompileError(expr.index, "Pattern is too large.");

    assert(0); // unreachable
    return false;
}

void reduceGraph(NGHolder &g, som_type som, bool utf8,
                 const CompileContext &cc) {
    if (!cc.grey.performGraphSimplification) {
        return;
    }

    // We run reduction passes until either the graph stops changing or we hit
    // a (small) limit.

    if (!som) {
        mergeCyclicDotStars(g);
    }

    const unsigned MAX_PASSES = 3;
    for (unsigned pass = 1; pass <= MAX_PASSES; pass++) {
        bool changed = false;
        DEBUG_PRINTF("reduce pass %u/%u\n", pass, MAX_PASSES);
        changed |= removeEdgeRedundancy(g, som, cc);
        changed |= reduceGraphEquivalences(g, cc);
        changed |= removeRedundancy(g, som);
        changed |= removeCyclicPathRedundancy(g);
        if (!changed) {
            DEBUG_PRINTF("graph unchanged after pass %u, stopping\n", pass);
            break;
        }
    }

    if (utf8) {
        utf8DotRestoration(g, som);
    }

    /* Minor non-redundancy improvements */
    if (improveGraph(g, som)) {
        /* may be some more edges to remove */
        removeEdgeRedundancy(g, som, cc);
    }

    removeCyclicDominated(g, som);

    if (!som) {
        mergeCyclicDotStars(g);
    }

    if (!som) {
        removeSiblingsOfStartDotStar(g);
    }
}

static
bool addComponent(NG &ng, NGHolder &g, const ExpressionInfo &expr,
                  const som_type som, const u32 comp_id) {
    const CompileContext &cc = ng.cc;
    assert(hasCorrectlyNumberedVertices(g));

    DEBUG_PRINTF("expr=%u, comp=%u: %zu vertices, %zu edges\n",
                 expr.index, comp_id, num_vertices(g), num_edges(g));

    dumpComponent(g, "01_begin", expr.index, comp_id, ng.cc.grey);

    assert(allMatchStatesHaveReports(g));

    reduceExtendedParams(g, ng.rm, som);
    reduceGraph(g, som, expr.utf8, cc);

    dumpComponent(g, "02_reduced", expr.index, comp_id, ng.cc.grey);

    // There may be redundant regions that we can remove
    if (cc.grey.performGraphSimplification) {
        removeRegionRedundancy(g, som);
    }

    // We might be done at this point: if we've run out of vertices, we can
    // stop processing.
    if (num_vertices(g) == N_SPECIALS) {
        DEBUG_PRINTF("all vertices claimed\n");
        return true;
    }

    // "Short Exhaustible Passthrough" patterns always become outfixes.
    if (!som && isSEP(g, ng.rm, cc.grey)) {
        DEBUG_PRINTF("graph is SEP\n");
        if (ng.rose->addOutfix(g)) {
            return true;
        }
    }

    // Start Of Match handling.
    if (som) {
        if (addComponentSom(ng, g, expr, som, comp_id)) {
            return true;
        }
    }

    assert(allMatchStatesHaveReports(g));

    if (splitOffAnchoredAcyclic(*ng.rose, g, cc)) {
        return true;
    }

    if (handleSmallLiteralSets(*ng.rose, g, cc)
        || handleFixedWidth(*ng.rose, g, cc.grey)) {
        return true;
    }

    if (handleDecoratedLiterals(*ng.rose, g, cc)) {
        return true;
    }

    if (doViolet(*ng.rose, g, expr.prefilter, false, ng.rm, cc)) {
        return true;
    }

    if (splitOffPuffs(*ng.rose, ng.rm, g, expr.prefilter, cc)) {
        return true;
    }

    if (handleSmallLiteralSets(*ng.rose, g, cc)
        || handleFixedWidth(*ng.rose, g, cc.grey)) {
        return true;
    }

    if (handleDecoratedLiterals(*ng.rose, g, cc)) {
        return true;
    }

    if (doViolet(*ng.rose, g, expr.prefilter, true, ng.rm, cc)) {
        return true;
    }

    DEBUG_PRINTF("testing for outfix\n");
    assert(allMatchStatesHaveReports(g));
    if (ng.rose->addOutfix(g)) {
        return true;
    }

    return false;
}

// Returns true if all components have been added.
static
bool processComponents(NG &ng, ExpressionInfo &expr,
                       deque<unique_ptr<NGHolder>> &g_comp,
                       const som_type som) {
    const u32 num_components = g_comp.size();

    u32 failed = 0;
    for (u32 i = 0; i < num_components; i++) {
        if (!g_comp[i]) {
            continue;
        }
        if (addComponent(ng, *g_comp[i], expr, som, i)) {
            g_comp[i].reset();
            continue;
        }

        if (som) { /* bail immediately */
            return false;
        }
        failed++;
    }

    if (!failed) {
        DEBUG_PRINTF("all components claimed\n");
        return true;
    }

    DEBUG_PRINTF("%u components still remain\n", failed);
    return false;
}

bool NG::addGraph(ExpressionInfo &expr, unique_ptr<NGHolder> g_ptr) {
    assert(g_ptr);
    NGHolder &g = *g_ptr;

    // remove reports that aren't on vertices connected to accept.
    clearReports(g);

    som_type som = expr.som;
    if (som && isVacuous(g)) {
        throw CompileError(expr.index, "Start of match is not "
                           "currently supported for patterns which match an "
                           "empty buffer.");
    }

    dumpDotWrapper(g, expr, "01_initial", cc.grey);
    assert(allMatchStatesHaveReports(g));

    /* ensure utf8 starts at cp boundary */
    ensureCodePointStart(rm, g, expr);

    if (can_never_match(g)) {
        throw CompileError(expr.index, "Pattern can never match.");
    }

    bool hamming = expr.hamm_distance > 0;
    u32 e_dist = hamming ? expr.hamm_distance : expr.edit_distance;

    DEBUG_PRINTF("edit distance = %u hamming = %s\n", e_dist, hamming ? "true" : "false");

    // validate graph's suitability for fuzzing before resolving asserts
    validate_fuzzy_compile(g, e_dist, hamming, expr.utf8, cc.grey);

    resolveAsserts(rm, g, expr);
    dumpDotWrapper(g, expr, "02_post_assert_resolve", cc.grey);
    assert(allMatchStatesHaveReports(g));

    make_fuzzy(g, e_dist, hamming, cc.grey);
    dumpDotWrapper(g, expr, "02a_post_fuzz", cc.grey);

    pruneUseless(g);
    pruneEmptyVertices(g);

    if (can_never_match(g)) {
        throw CompileError(expr.index, "Pattern can never match.");
    }

    optimiseVirtualStarts(g); /* good for som */

    propagateExtendedParams(g, expr, rm);
    reduceExtendedParams(g, rm, som);

    // We may have removed all the edges to accept, in which case this
    // expression cannot match.
    if (can_never_match(g)) {
        throw CompileError(expr.index, "Extended parameter constraints can not "
                                       "be satisfied for any match from this "
                                       "expression.");
    }

    if (any_of_in(all_reports(g), [&](ReportID id) {
            return rm.getReport(id).minLength;
        })) {
        // We have at least one report with a minimum length constraint, which
        // we currently use SOM to satisfy.
        som = SOM_LEFT;
        ssm.somPrecision(8);
    }

    if (som) {
        rose->setSom();
    }

    // first, we can perform graph work that can be done on an individual
    // expression basis.

    if (expr.utf8) {
        relaxForbiddenUtf8(g, expr);
    }

    if (all_of_in(all_reports(g), [&](ReportID id) {
            const auto &report = rm.getReport(id);
            return report.ekey != INVALID_EKEY && !report.minLength &&
                   !report.minOffset;
        })) {
        // In highlander mode: if we don't have constraints on our reports that
        // may prevent us accepting our first match (i.e. extended params) we
        // can prune the other out-edges of all vertices connected to accept.
        // TODO: shift the report checking down into pruneHighlanderAccepts()
        // to allow us to handle the parts we can in mixed cases.
        pruneHighlanderAccepts(g, rm);
    }

    dumpDotWrapper(g, expr, "02b_fairly_early", cc.grey);

    // If we're a vacuous pattern, we can handle this early.
    if (splitOffVacuous(boundary, rm, g, expr)) {
        DEBUG_PRINTF("split off vacuous\n");
    }

    // We might be done at this point: if we've run out of vertices, we can
    // stop processing.
    if (num_vertices(g) == N_SPECIALS) {
        DEBUG_PRINTF("all vertices claimed by vacuous handling\n");
        return true;
    }

    // Now that vacuous edges have been removed, update the min width exclusive
    // of boundary reports.
    minWidth = min(minWidth, findMinWidth(g));

    // Add the pattern to the small write builder.
    smwr->add(g, expr);

    if (!som) {
        removeSiblingsOfStartDotStar(g);
    }

    dumpDotWrapper(g, expr, "03_early", cc.grey);

    // Perform a reduction pass to merge sibling character classes together.
    if (cc.grey.performGraphSimplification) {
        removeRedundancy(g, som);
        prunePathsRedundantWithSuccessorOfCyclics(g, som);
    }

    dumpDotWrapper(g, expr, "04_reduced", cc.grey);

    // If we've got some literals that span the graph from start to accept, we
    // can split them off into Rose from here.
    if (!som) {
        if (splitOffLiterals(*this, g)) {
            DEBUG_PRINTF("some vertices claimed by literals\n");
        }
    }

    // We might be done at this point: if we've run out of vertices, we can
    // stop processing.
    if (num_vertices(g) == N_SPECIALS) {
        DEBUG_PRINTF("all vertices claimed before calc components\n");
        return true;
    }

    // Split the graph into a set of connected components and process those.
    // Note: this invalidates g_ptr.

    auto g_comp = calcComponents(std::move(g_ptr), cc.grey);
    assert(!g_comp.empty());

    if (!som) {
        for (auto &gc : g_comp) {
            assert(gc);
            reformLeadingDots(*gc);
        }

        recalcComponents(g_comp, cc.grey);
    }

    if (processComponents(*this, expr, g_comp, som)) {
        return true;
    }

    // If we're in prefiltering mode, we can run the prefilter reductions and
    // have another shot at accepting the graph.

    if (cc.grey.prefilterReductions && expr.prefilter) {
        for (auto &gc : g_comp) {
            if (!gc) {
                continue;
            }
            prefilterReductions(*gc, cc);
        }

        if (processComponents(*this, expr, g_comp, som)) {
            return true;
        }
    }

    // We must have components that could not be compiled.
    for (u32 i = 0; i < g_comp.size(); i++) {
        if (g_comp[i]) {
            DEBUG_PRINTF("could not compile component %u with %zu vertices\n",
                         i, num_vertices(*g_comp[i]));
            throw CompileError(expr.index, "Pattern is too large.");
        }
    }

    assert(0); // should have thrown.
    return false;
}

/** \brief Used from SOM mode to add an arbitrary NGHolder as an engine. */
bool NG::addHolder(NGHolder &g) {
    DEBUG_PRINTF("adding holder of %zu states\n", num_vertices(g));
    assert(allMatchStatesHaveReports(g));
    assert(hasCorrectlyNumberedVertices(g));

    /* We don't update the global minWidth here as we care about the min width
     * of the whole pattern - not a just a prefix of it. */

    bool prefilter = false;
    //dumpDotComp(comp, g, *this, 20, "prefix_init");

    som_type som = SOM_NONE; /* the prefixes created by the SOM code do not
                                themselves track som */
    bool utf8 = false; // handling done earlier
    reduceGraph(g, som, utf8, cc);

    // There may be redundant regions that we can remove
    if (cc.grey.performGraphSimplification) {
        removeRegionRedundancy(g, som);
    }

    // "Short Exhaustible Passthrough" patterns always become outfixes.
    if (isSEP(g, rm, cc.grey)) {
        DEBUG_PRINTF("graph is SEP\n");
        if (rose->addOutfix(g)) {
            return true;
        }
    }

    if (splitOffAnchoredAcyclic(*rose, g, cc)) {
        return true;
    }

    if (handleSmallLiteralSets(*rose, g, cc)
        || handleFixedWidth(*rose, g, cc.grey)) {
        return true;
    }

    if (handleDecoratedLiterals(*rose, g, cc)) {
        return true;
    }

    if (doViolet(*rose, g, prefilter, false, rm, cc)) {
        return true;
    }
    if (splitOffPuffs(*rose, rm, g, prefilter, cc)) {
        return true;
    }
    if (doViolet(*rose, g, prefilter, true, rm, cc)) {
        return true;
    }

    DEBUG_PRINTF("trying for outfix\n");
    if (rose->addOutfix(g)) {
        DEBUG_PRINTF("ok\n");
        return true;
    }
    DEBUG_PRINTF("trying for outfix - failed\n");
    DEBUG_PRINTF("nobody would take us\n");
    return false;
}

bool NG::addLiteral(const ue2_literal &literal, u32 expr_index,
                    u32 external_report, bool highlander, som_type som,
                    bool quiet) {
    assert(!literal.empty());

    if (!cc.grey.shortcutLiterals) {
        return false;
    }

    // We can't natively handle arbitrary literals with mixed case sensitivity
    // in Rose -- they require mechanisms like benefits masks, which have
    // length limits etc. Better to let those go through full graph processing.
    if (mixed_sensitivity(literal)) {
        DEBUG_PRINTF("mixed sensitivity\n");
        return false;
    }

    // Register external report and validate highlander constraints.
    rm.registerExtReport(external_report,
                         external_report_info(highlander, expr_index));

    ReportID id;
    if (som) {
        assert(!highlander); // not allowed, checked earlier.
        Report r = makeSomRelativeCallback(external_report, 0, literal.length());
        id = rm.getInternalId(r);
        rose->setSom();
    } else {
        u32 ekey = highlander ? rm.getExhaustibleKey(external_report)
                              : INVALID_EKEY;
        Report r = makeECallback(external_report, 0, ekey, quiet);
        id = rm.getInternalId(r);
    }

    DEBUG_PRINTF("success: graph is literal '%s', report ID %u\n",
                 dumpString(literal).c_str(), id);

    rose->add(false, false, literal, {id});

    minWidth = min(minWidth, depth(literal.length()));

    /* inform small write handler about this literal */
    smwr->add(literal, id);

    return true;
}

} // namespace ue2