aboutsummaryrefslogblamecommitdiffstats
path: root/contrib/libs/hyperscan/src/grey.cpp
blob: 86a93d25aaecdbb53bd719d6754733ae6d37ce65 (plain) (tree)
1
2
  
                                             
































                                                                              
                               





                                               
                                        






                                                    
                                      
                                   
                                      




                                                                          
                                                  
                                       






                                                     







                                                                       
                                                                             
















                                                                               
                                       


























                                                                              
                                                                              





                                                                              



                                                          














                                                                
                                                    



































                                                                             







                                                                    











                                                                            
                                 






                                             
                               
                            
                               





                                        
                                           
                                






                                              







                                     
                                  
















                                              
                                  

























                                                
                                       

                                             



                                           














                                           
                                              


















                                             
                                    













                                             
                                    













                                             
                                    


























                                                                                
/*
 * Copyright (c) 2015-2018, Intel Corporation
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *  * Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 *  * Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *  * Neither the name of Intel Corporation nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include "grey.h"
#include "ue2common.h"

#include <algorithm>
#include <cstdlib> // exit
#include <string>
#include <vector>

#define DEFAULT_MAX_HISTORY 110

using namespace std;

namespace ue2 {

Grey::Grey(void) :
                   optimiseComponentTree(true),
                   calcComponents(true),
                   performGraphSimplification(true),
                   prefilterReductions(true),
                   removeEdgeRedundancy(true),
                   allowGough(true),
                   allowHaigLit(true),
                   allowLitHaig(true),
                   allowLbr(true),
                   allowMcClellan(true),
                   allowSheng(true),
                   allowMcSheng(true),
                   allowPuff(true),
                   allowLiteral(true),
                   allowViolet(true),
                   allowExtendedNFA(true), /* bounded repeats of course */
                   allowLimExNFA(true),
                   allowAnchoredAcyclic(true),
                   allowSmallLiteralSet(true),
                   allowCastle(true),
                   allowDecoratedLiteral(true),
                   allowApproximateMatching(true),
                   allowNoodle(true),
                   fdrAllowTeddy(true),
                   fdrAllowFlood(true),
                   violetAvoidSuffixes(true),
                   violetAvoidWeakInfixes(true),
                   violetDoubleCut(true),
                   violetExtractStrongLiterals(true),
                   violetLiteralChains(true),
                   violetDoubleCutLiteralLen(3),
                   violetEarlyCleanLiteralLen(6),
                   puffImproveHead(true),
                   castleExclusive(true),
                   mergeSEP(true), /* short exhaustible passthroughs */
                   mergeRose(true), // roses inside rose
                   mergeSuffixes(true), // suffix nfas inside rose
                   mergeOutfixes(true),
                   onlyOneOutfix(false),
                   allowShermanStates(true),
                   allowMcClellan8(true),
                   allowWideStates(true), // enable wide state for McClellan8
                   highlanderPruneDFA(true),
                   minimizeDFA(true),
                   accelerateDFA(true),
                   accelerateNFA(true),
                   reverseAccelerate(true),
                   squashNFA(true),
                   compressNFAState(true),
                   numberNFAStatesWrong(false), /* debugging only */
                   highlanderSquash(true),
                   allowZombies(true),
                   floodAsPuffette(false),
                   nfaForceSize(0),
                   maxHistoryAvailable(DEFAULT_MAX_HISTORY),
                   minHistoryAvailable(0), /* debugging only */
                   maxAnchoredRegion(63), /* for rose's atable to run over */
                   minRoseLiteralLength(3),
                   minRoseNetflowLiteralLength(2),
                   maxRoseNetflowEdges(50000), /* otherwise no netflow pass. */
                   maxEditDistance(16),
                   minExtBoundedRepeatSize(32),
                   goughCopyPropagate(true),
                   goughRegisterAllocate(true),
                   shortcutLiterals(true),
                   roseGraphReduction(true),
                   roseRoleAliasing(true),
                   roseMasks(true),
                   roseConvertFloodProneSuffixes(true),
                   roseMergeRosesDuringAliasing(true),
                   roseMultiTopRoses(true),
                   roseHamsterMasks(true),
                   roseLookaroundMasks(true),
                   roseMcClellanPrefix(1),
                   roseMcClellanSuffix(1),
                   roseMcClellanOutfix(2),
                   roseTransformDelay(true),
                   earlyMcClellanPrefix(true),
                   earlyMcClellanInfix(true),
                   earlyMcClellanSuffix(true),
                   allowCountingMiracles(true),
                   allowSomChain(true),
                   somMaxRevNfaLength(126),
                   hamsterAccelForward(true),
                   hamsterAccelReverse(false),
                   miracleHistoryBonus(16),
                   equivalenceEnable(true),

                   allowSmallWrite(true), // McClellan dfas for small patterns
                   allowSmallWriteSheng(false), // allow use of Sheng for SMWR

                   smallWriteLargestBuffer(70), // largest buffer that can be
                                                // considered a small write
                                                // all blocks larger than this
                                                // are given to rose &co
                   smallWriteLargestBufferBad(35),
                   limitSmallWriteOutfixSize(1048576), // 1 MB
                   smallWriteMaxPatterns(10000),
                   smallWriteMaxLiterals(10000),
                   smallWriteMergeBatchSize(20),
                   allowTamarama(true), // Tamarama engine
                   tamaChunkSize(100),
                   dumpFlags(0),
                   limitPatternCount(8000000), // 8M patterns
                   limitPatternLength(16000),  // 16K bytes
                   limitGraphVertices(500000), // 500K vertices
                   limitGraphEdges(1000000), // 1M edges
                   limitReportCount(4*8000000),
                   limitLiteralCount(8000000), // 8M literals
                   limitLiteralLength(16000),
                   limitLiteralMatcherChars(1073741824), // 1 GB
                   limitLiteralMatcherSize(1073741824), // 1 GB
                   limitRoseRoleCount(4*8000000),
                   limitRoseEngineCount(8000000), // 8M engines
                   limitRoseAnchoredSize(1073741824), // 1 GB
                   limitEngineSize(1073741824), // 1 GB
                   limitDFASize(1073741824), // 1 GB
                   limitNFASize(1048576), // 1 MB
                   limitLBRSize(1048576), // 1 MB
                   limitApproxMatchingVertices(5000)
{
    assert(maxAnchoredRegion < 64); /* a[lm]_log_sum have limited capacity */
}

} // namespace ue2

#ifndef RELEASE_BUILD

#include <boost/lexical_cast.hpp>
using boost::lexical_cast;

namespace ue2 {

void applyGreyOverrides(Grey *g, const string &s) {
    string::const_iterator p = s.begin();
    string::const_iterator pe = s.end();
    string help = "help:0";
    bool invalid_key_seen = false;
    Grey defaultg;

    if (s == "help" || s == "help:") {
        printf("Valid grey overrides:\n");
        p = help.begin();
        pe = help.end();
    }

    while (p != pe) {
        string::const_iterator ke = find(p, pe, ':');

        if (ke == pe) {
            break;
        }

        string key(p, ke);

        string::const_iterator ve = find(ke, pe, ';');

        unsigned int value = 0;
        try {
            value = lexical_cast<unsigned int>(string(ke + 1, ve));
        } catch (boost::bad_lexical_cast &e) {
            printf("Invalid grey override key %s:%s\n", key.c_str(),
                   string(ke + 1, ve).c_str());
            invalid_key_seen = true;
            break;
        }
        bool done = false;

        /* surely there exists a nice template to go with this macro to make
         * all the boring code disappear */
#define G_UPDATE(k) do {                                                \
            if (key == ""#k) { g->k = value; done = 1;}                 \
            if (key == "help") {                                        \
                printf("\t%-30s\tdefault: %s\n", #k,                    \
                       lexical_cast<string>(defaultg.k).c_str());       \
            }                                                           \
        } while (0)

        G_UPDATE(optimiseComponentTree);
        G_UPDATE(calcComponents);
        G_UPDATE(performGraphSimplification);
        G_UPDATE(prefilterReductions);
        G_UPDATE(removeEdgeRedundancy);
        G_UPDATE(allowGough);
        G_UPDATE(allowHaigLit);
        G_UPDATE(allowLitHaig);
        G_UPDATE(allowLbr);
        G_UPDATE(allowMcClellan);
        G_UPDATE(allowSheng);
        G_UPDATE(allowMcSheng);
        G_UPDATE(allowPuff);
        G_UPDATE(allowLiteral);
        G_UPDATE(allowViolet);
        G_UPDATE(allowExtendedNFA);
        G_UPDATE(allowLimExNFA);
        G_UPDATE(allowAnchoredAcyclic);
        G_UPDATE(allowSmallLiteralSet);
        G_UPDATE(allowCastle);
        G_UPDATE(allowDecoratedLiteral);
        G_UPDATE(allowNoodle);
        G_UPDATE(allowApproximateMatching);
        G_UPDATE(fdrAllowTeddy);
        G_UPDATE(fdrAllowFlood);
        G_UPDATE(violetAvoidSuffixes);
        G_UPDATE(violetAvoidWeakInfixes);
        G_UPDATE(violetDoubleCut);
        G_UPDATE(violetExtractStrongLiterals);
        G_UPDATE(violetLiteralChains);
        G_UPDATE(violetDoubleCutLiteralLen);
        G_UPDATE(violetEarlyCleanLiteralLen);
        G_UPDATE(puffImproveHead);
        G_UPDATE(castleExclusive);
        G_UPDATE(mergeSEP);
        G_UPDATE(mergeRose);
        G_UPDATE(mergeSuffixes);
        G_UPDATE(mergeOutfixes);
        G_UPDATE(onlyOneOutfix);
        G_UPDATE(allowShermanStates);
        G_UPDATE(allowMcClellan8);
        G_UPDATE(allowWideStates);
        G_UPDATE(highlanderPruneDFA);
        G_UPDATE(minimizeDFA);
        G_UPDATE(accelerateDFA);
        G_UPDATE(accelerateNFA);
        G_UPDATE(reverseAccelerate);
        G_UPDATE(squashNFA);
        G_UPDATE(compressNFAState);
        G_UPDATE(numberNFAStatesWrong);
        G_UPDATE(allowZombies);
        G_UPDATE(floodAsPuffette);
        G_UPDATE(nfaForceSize);
        G_UPDATE(highlanderSquash);
        G_UPDATE(maxHistoryAvailable);
        G_UPDATE(minHistoryAvailable);
        G_UPDATE(maxAnchoredRegion);
        G_UPDATE(minRoseLiteralLength);
        G_UPDATE(minRoseNetflowLiteralLength);
        G_UPDATE(maxRoseNetflowEdges);
        G_UPDATE(maxEditDistance);
        G_UPDATE(minExtBoundedRepeatSize);
        G_UPDATE(goughCopyPropagate);
        G_UPDATE(goughRegisterAllocate);
        G_UPDATE(shortcutLiterals);
        G_UPDATE(roseGraphReduction);
        G_UPDATE(roseRoleAliasing);
        G_UPDATE(roseMasks);
        G_UPDATE(roseConvertFloodProneSuffixes);
        G_UPDATE(roseMergeRosesDuringAliasing);
        G_UPDATE(roseMultiTopRoses);
        G_UPDATE(roseHamsterMasks);
        G_UPDATE(roseLookaroundMasks);
        G_UPDATE(roseMcClellanPrefix);
        G_UPDATE(roseMcClellanSuffix);
        G_UPDATE(roseMcClellanOutfix);
        G_UPDATE(roseTransformDelay);
        G_UPDATE(earlyMcClellanPrefix);
        G_UPDATE(earlyMcClellanInfix);
        G_UPDATE(earlyMcClellanSuffix);
        G_UPDATE(allowSomChain);
        G_UPDATE(allowCountingMiracles);
        G_UPDATE(somMaxRevNfaLength);
        G_UPDATE(hamsterAccelForward);
        G_UPDATE(hamsterAccelReverse);
        G_UPDATE(miracleHistoryBonus);
        G_UPDATE(equivalenceEnable);
        G_UPDATE(allowSmallWrite);
        G_UPDATE(allowSmallWriteSheng);
        G_UPDATE(smallWriteLargestBuffer);
        G_UPDATE(smallWriteLargestBufferBad);
        G_UPDATE(limitSmallWriteOutfixSize);
        G_UPDATE(smallWriteMaxPatterns);
        G_UPDATE(smallWriteMaxLiterals);
        G_UPDATE(smallWriteMergeBatchSize);
        G_UPDATE(allowTamarama);
        G_UPDATE(tamaChunkSize);
        G_UPDATE(limitPatternCount);
        G_UPDATE(limitPatternLength);
        G_UPDATE(limitGraphVertices);
        G_UPDATE(limitGraphEdges);
        G_UPDATE(limitReportCount);
        G_UPDATE(limitLiteralCount);
        G_UPDATE(limitLiteralLength);
        G_UPDATE(limitLiteralMatcherChars);
        G_UPDATE(limitLiteralMatcherSize);
        G_UPDATE(limitRoseRoleCount);
        G_UPDATE(limitRoseEngineCount);
        G_UPDATE(limitRoseAnchoredSize);
        G_UPDATE(limitEngineSize);
        G_UPDATE(limitDFASize);
        G_UPDATE(limitNFASize);
        G_UPDATE(limitLBRSize);
        G_UPDATE(limitApproxMatchingVertices);

#undef G_UPDATE
        if (key == "simple_som") {
            g->allowHaigLit = false;
            g->allowLitHaig = false;
            g->allowSomChain = false;
            g->somMaxRevNfaLength = 0;
            done = true;
        }
        if (key == "forceOutfixesNFA") {
            g->allowAnchoredAcyclic = false;
            g->allowCastle = false;
            g->allowDecoratedLiteral = false;
            g->allowGough = false;
            g->allowHaigLit = false;
            g->allowLbr = false;
            g->allowLimExNFA = true;
            g->allowLitHaig = false;
            g->allowMcClellan = false;
            g->allowPuff = false;
            g->allowLiteral = false;
            g->allowViolet = false;
            g->allowSmallLiteralSet = false;
            g->roseMasks = false;
            done = true;
        }
        if (key == "forceOutfixesDFA") {
            g->allowAnchoredAcyclic = false;
            g->allowCastle = false;
            g->allowDecoratedLiteral = false;
            g->allowGough = false;
            g->allowHaigLit = false;
            g->allowLbr = false;
            g->allowLimExNFA = false;
            g->allowLitHaig = false;
            g->allowMcClellan = true;
            g->allowPuff = false;
            g->allowLiteral = false;
            g->allowViolet = false;
            g->allowSmallLiteralSet = false;
            g->roseMasks = false;
            done = true;
        }
        if (key == "forceOutfixes") {
            g->allowAnchoredAcyclic = false;
            g->allowCastle = false;
            g->allowDecoratedLiteral = false;
            g->allowGough = true;
            g->allowHaigLit = false;
            g->allowLbr = false;
            g->allowLimExNFA = true;
            g->allowLitHaig = false;
            g->allowMcClellan = true;
            g->allowPuff = false;
            g->allowLiteral = false;
            g->allowViolet = false;
            g->allowSmallLiteralSet = false;
            g->roseMasks = false;
            done = true;
        }

        if (!done && key != "help") {
            printf("Invalid grey override key %s:%u\n", key.c_str(), value);
            invalid_key_seen = true;
        }

        p = ve;

        if (p != pe) {
            ++p;
        }
    }

    if (invalid_key_seen) {
        applyGreyOverrides(g, "help");
        exit(1);
    }

    assert(g->maxAnchoredRegion < 64); /* a[lm]_log_sum have limited capacity */
}

} // namespace ue2

#endif