diff options
author | robot-piglet <robot-piglet@yandex-team.com> | 2023-05-26 18:02:46 +0300 |
---|---|---|
committer | robot-piglet <robot-piglet@yandex-team.com> | 2023-05-26 18:02:46 +0300 |
commit | d85fbebd3ea97ba43da1f6d672dde2b18347f11a (patch) | |
tree | 592f624cde6617872108479e43b781b2e16b565c /contrib/libs/libfuzzer/FuzzerMerge.cpp | |
parent | d8341f8abab29d0fcfb7096a3d1b3c6fd399781b (diff) | |
download | ydb-d85fbebd3ea97ba43da1f6d672dde2b18347f11a.tar.gz |
Intermediate changes
Diffstat (limited to 'contrib/libs/libfuzzer/FuzzerMerge.cpp')
-rw-r--r-- | contrib/libs/libfuzzer/FuzzerMerge.cpp | 535 |
1 files changed, 0 insertions, 535 deletions
diff --git a/contrib/libs/libfuzzer/FuzzerMerge.cpp b/contrib/libs/libfuzzer/FuzzerMerge.cpp deleted file mode 100644 index 24bd11958e8..00000000000 --- a/contrib/libs/libfuzzer/FuzzerMerge.cpp +++ /dev/null @@ -1,535 +0,0 @@ -//===- FuzzerMerge.cpp - merging corpora ----------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// Merging corpora. -//===----------------------------------------------------------------------===// - -#include "FuzzerCommand.h" -#include "FuzzerMerge.h" -#include "FuzzerIO.h" -#include "FuzzerInternal.h" -#include "FuzzerTracePC.h" -#include "FuzzerUtil.h" - -#include <fstream> -#include <iterator> -#include <set> -#include <sstream> -#include <unordered_set> - -namespace fuzzer { - -bool Merger::Parse(const std::string &Str, bool ParseCoverage) { - std::istringstream SS(Str); - return Parse(SS, ParseCoverage); -} - -void Merger::ParseOrExit(std::istream &IS, bool ParseCoverage) { - if (!Parse(IS, ParseCoverage)) { - Printf("MERGE: failed to parse the control file (unexpected error)\n"); - exit(1); - } -} - -// The control file example: -// -// 3 # The number of inputs -// 1 # The number of inputs in the first corpus, <= the previous number -// file0 -// file1 -// file2 # One file name per line. -// STARTED 0 123 # FileID, file size -// FT 0 1 4 6 8 # FileID COV1 COV2 ... -// COV 0 7 8 9 # FileID COV1 COV1 -// STARTED 1 456 # If FT is missing, the input crashed while processing. -// STARTED 2 567 -// FT 2 8 9 -// COV 2 11 12 -bool Merger::Parse(std::istream &IS, bool ParseCoverage) { - LastFailure.clear(); - std::string Line; - - // Parse NumFiles. - if (!std::getline(IS, Line, '\n')) return false; - std::istringstream L1(Line); - size_t NumFiles = 0; - L1 >> NumFiles; - if (NumFiles == 0 || NumFiles > 10000000) return false; - - // Parse NumFilesInFirstCorpus. - if (!std::getline(IS, Line, '\n')) return false; - std::istringstream L2(Line); - NumFilesInFirstCorpus = NumFiles + 1; - L2 >> NumFilesInFirstCorpus; - if (NumFilesInFirstCorpus > NumFiles) return false; - - // Parse file names. - Files.resize(NumFiles); - for (size_t i = 0; i < NumFiles; i++) - if (!std::getline(IS, Files[i].Name, '\n')) - return false; - - // Parse STARTED, FT, and COV lines. - size_t ExpectedStartMarker = 0; - const size_t kInvalidStartMarker = -1; - size_t LastSeenStartMarker = kInvalidStartMarker; - std::vector<uint32_t> TmpFeatures; - std::set<uint32_t> PCs; - while (std::getline(IS, Line, '\n')) { - std::istringstream ISS1(Line); - std::string Marker; - uint32_t N; - if (!(ISS1 >> Marker) || !(ISS1 >> N)) - return false; - if (Marker == "STARTED") { - // STARTED FILE_ID FILE_SIZE - if (ExpectedStartMarker != N) - return false; - ISS1 >> Files[ExpectedStartMarker].Size; - LastSeenStartMarker = ExpectedStartMarker; - assert(ExpectedStartMarker < Files.size()); - ExpectedStartMarker++; - } else if (Marker == "FT") { - // FT FILE_ID COV1 COV2 COV3 ... - size_t CurrentFileIdx = N; - if (CurrentFileIdx != LastSeenStartMarker) - return false; - LastSeenStartMarker = kInvalidStartMarker; - if (ParseCoverage) { - TmpFeatures.clear(); // use a vector from outer scope to avoid resizes. - while (ISS1 >> N) - TmpFeatures.push_back(N); - std::sort(TmpFeatures.begin(), TmpFeatures.end()); - Files[CurrentFileIdx].Features = TmpFeatures; - } - } else if (Marker == "COV") { - size_t CurrentFileIdx = N; - if (ParseCoverage) - while (ISS1 >> N) - if (PCs.insert(N).second) - Files[CurrentFileIdx].Cov.push_back(N); - } else { - return false; - } - } - if (LastSeenStartMarker != kInvalidStartMarker) - LastFailure = Files[LastSeenStartMarker].Name; - - FirstNotProcessedFile = ExpectedStartMarker; - return true; -} - -size_t Merger::ApproximateMemoryConsumption() const { - size_t Res = 0; - for (const auto &F: Files) - Res += sizeof(F) + F.Features.size() * sizeof(F.Features[0]); - return Res; -} - -// Decides which files need to be merged (add those to NewFiles). -// Returns the number of new features added. -size_t Merger::Merge(const std::set<uint32_t> &InitialFeatures, - std::set<uint32_t> *NewFeatures, - const std::set<uint32_t> &InitialCov, - std::set<uint32_t> *NewCov, - std::vector<std::string> *NewFiles) { - NewFiles->clear(); - NewFeatures->clear(); - NewCov->clear(); - assert(NumFilesInFirstCorpus <= Files.size()); - std::set<uint32_t> AllFeatures = InitialFeatures; - - // What features are in the initial corpus? - for (size_t i = 0; i < NumFilesInFirstCorpus; i++) { - auto &Cur = Files[i].Features; - AllFeatures.insert(Cur.begin(), Cur.end()); - } - // Remove all features that we already know from all other inputs. - for (size_t i = NumFilesInFirstCorpus; i < Files.size(); i++) { - auto &Cur = Files[i].Features; - std::vector<uint32_t> Tmp; - std::set_difference(Cur.begin(), Cur.end(), AllFeatures.begin(), - AllFeatures.end(), std::inserter(Tmp, Tmp.begin())); - Cur.swap(Tmp); - } - - // Sort. Give preference to - // * smaller files - // * files with more features. - std::sort(Files.begin() + NumFilesInFirstCorpus, Files.end(), - [&](const MergeFileInfo &a, const MergeFileInfo &b) -> bool { - if (a.Size != b.Size) - return a.Size < b.Size; - return a.Features.size() > b.Features.size(); - }); - - // One greedy pass: add the file's features to AllFeatures. - // If new features were added, add this file to NewFiles. - for (size_t i = NumFilesInFirstCorpus; i < Files.size(); i++) { - auto &Cur = Files[i].Features; - // Printf("%s -> sz %zd ft %zd\n", Files[i].Name.c_str(), - // Files[i].Size, Cur.size()); - bool FoundNewFeatures = false; - for (auto Fe: Cur) { - if (AllFeatures.insert(Fe).second) { - FoundNewFeatures = true; - NewFeatures->insert(Fe); - } - } - if (FoundNewFeatures) - NewFiles->push_back(Files[i].Name); - for (auto Cov : Files[i].Cov) - if (InitialCov.find(Cov) == InitialCov.end()) - NewCov->insert(Cov); - } - return NewFeatures->size(); -} - -std::set<uint32_t> Merger::AllFeatures() const { - std::set<uint32_t> S; - for (auto &File : Files) - S.insert(File.Features.begin(), File.Features.end()); - return S; -} - -// Inner process. May crash if the target crashes. -void Fuzzer::CrashResistantMergeInternalStep(const std::string &CFPath, - bool IsSetCoverMerge) { - Printf("MERGE-INNER: using the control file '%s'\n", CFPath.c_str()); - Merger M; - std::ifstream IF(CFPath); - M.ParseOrExit(IF, false); - IF.close(); - if (!M.LastFailure.empty()) - Printf("MERGE-INNER: '%s' caused a failure at the previous merge step\n", - M.LastFailure.c_str()); - - Printf("MERGE-INNER: %zd total files;" - " %zd processed earlier; will process %zd files now\n", - M.Files.size(), M.FirstNotProcessedFile, - M.Files.size() - M.FirstNotProcessedFile); - - std::ofstream OF(CFPath, std::ofstream::out | std::ofstream::app); - std::set<size_t> AllFeatures; - auto PrintStatsWrapper = [this, &AllFeatures](const char* Where) { - this->PrintStats(Where, "\n", 0, AllFeatures.size()); - }; - std::set<const TracePC::PCTableEntry *> AllPCs; - for (size_t i = M.FirstNotProcessedFile; i < M.Files.size(); i++) { - Fuzzer::MaybeExitGracefully(); - auto U = FileToVector(M.Files[i].Name); - if (U.size() > MaxInputLen) { - U.resize(MaxInputLen); - U.shrink_to_fit(); - } - - // Write the pre-run marker. - OF << "STARTED " << i << " " << U.size() << "\n"; - OF.flush(); // Flush is important since Command::Execute may crash. - // Run. - TPC.ResetMaps(); - ExecuteCallback(U.data(), U.size()); - // Collect coverage. We are iterating over the files in this order: - // * First, files in the initial corpus ordered by size, smallest first. - // * Then, all other files, smallest first. - std::set<size_t> Features; - if (IsSetCoverMerge) - TPC.CollectFeatures([&](size_t Feature) { Features.insert(Feature); }); - else - TPC.CollectFeatures([&](size_t Feature) { - if (AllFeatures.insert(Feature).second) - Features.insert(Feature); - }); - TPC.UpdateObservedPCs(); - // Show stats. - if (!(TotalNumberOfRuns & (TotalNumberOfRuns - 1))) - PrintStatsWrapper("pulse "); - if (TotalNumberOfRuns == M.NumFilesInFirstCorpus) - PrintStatsWrapper("LOADED"); - // Write the post-run marker and the coverage. - OF << "FT " << i; - for (size_t F : Features) - OF << " " << F; - OF << "\n"; - OF << "COV " << i; - TPC.ForEachObservedPC([&](const TracePC::PCTableEntry *TE) { - if (AllPCs.insert(TE).second) - OF << " " << TPC.PCTableEntryIdx(TE); - }); - OF << "\n"; - OF.flush(); - } - PrintStatsWrapper("DONE "); -} - -// Merges all corpora into the first corpus. A file is added into -// the first corpus only if it adds new features. Unlike `Merger::Merge`, -// this implementation calculates an approximation of the minimum set -// of corpora files, that cover all known features (set cover problem). -// Generally, this means that files with more features are preferred for -// merge into the first corpus. When two files have the same number of -// features, the smaller one is preferred. -size_t Merger::SetCoverMerge(const std::set<uint32_t> &InitialFeatures, - std::set<uint32_t> *NewFeatures, - const std::set<uint32_t> &InitialCov, - std::set<uint32_t> *NewCov, - std::vector<std::string> *NewFiles) { - assert(NumFilesInFirstCorpus <= Files.size()); - NewFiles->clear(); - NewFeatures->clear(); - NewCov->clear(); - std::set<uint32_t> AllFeatures; - // 1 << 21 - 1 is the maximum feature index. - // See 'kFeatureSetSize' in 'FuzzerCorpus.h'. - const uint32_t kFeatureSetSize = 1 << 21; - std::vector<bool> Covered(kFeatureSetSize, false); - size_t NumCovered = 0; - - std::set<uint32_t> ExistingFeatures = InitialFeatures; - for (size_t i = 0; i < NumFilesInFirstCorpus; ++i) - ExistingFeatures.insert(Files[i].Features.begin(), Files[i].Features.end()); - - // Mark the existing features as covered. - for (const auto &F : ExistingFeatures) { - if (!Covered[F % kFeatureSetSize]) { - ++NumCovered; - Covered[F % kFeatureSetSize] = true; - } - // Calculate an underestimation of the set of covered features - // since the `Covered` bitvector is smaller than the feature range. - AllFeatures.insert(F % kFeatureSetSize); - } - - std::set<size_t> RemainingFiles; - for (size_t i = NumFilesInFirstCorpus; i < Files.size(); ++i) { - // Construct an incremental sequence which represent the - // indices to all files (excluding those in the initial corpus). - // RemainingFiles = range(NumFilesInFirstCorpus..Files.size()). - RemainingFiles.insert(i); - // Insert this file's unique features to all features. - for (const auto &F : Files[i].Features) - AllFeatures.insert(F % kFeatureSetSize); - } - - // Integrate files into Covered until set is complete. - while (NumCovered != AllFeatures.size()) { - // Index to file with largest number of unique features. - size_t MaxFeaturesIndex = NumFilesInFirstCorpus; - // Indices to remove from RemainingFiles. - std::set<size_t> RemoveIndices; - // Running max unique feature count. - // Updated upon finding a file with more features. - size_t MaxNumFeatures = 0; - - // Iterate over all files not yet integrated into Covered, - // to find the file which has the largest number of - // features that are not already in Covered. - for (const auto &i : RemainingFiles) { - const auto &File = Files[i]; - size_t CurrentUnique = 0; - // Count number of features in this file - // which are not yet in Covered. - for (const auto &F : File.Features) - if (!Covered[F % kFeatureSetSize]) - ++CurrentUnique; - - if (CurrentUnique == 0) { - // All features in this file are already in Covered: skip next time. - RemoveIndices.insert(i); - } else if (CurrentUnique > MaxNumFeatures || - (CurrentUnique == MaxNumFeatures && - File.Size < Files[MaxFeaturesIndex].Size)) { - // Update the max features file based on unique features - // Break ties by selecting smaller files. - MaxNumFeatures = CurrentUnique; - MaxFeaturesIndex = i; - } - } - // Must be a valid index/ - assert(MaxFeaturesIndex < Files.size()); - // Remove any feature-less files found. - for (const auto &i : RemoveIndices) - RemainingFiles.erase(i); - if (MaxNumFeatures == 0) { - // Did not find a file that adds unique features. - // This means that we should have no remaining files. - assert(RemainingFiles.size() == 0); - assert(NumCovered == AllFeatures.size()); - break; - } - - // MaxFeaturesIndex must be an element of Remaining. - assert(RemainingFiles.find(MaxFeaturesIndex) != RemainingFiles.end()); - // Remove the file with the most features from Remaining. - RemainingFiles.erase(MaxFeaturesIndex); - const auto &MaxFeatureFile = Files[MaxFeaturesIndex]; - // Add the features of the max feature file to Covered. - for (const auto &F : MaxFeatureFile.Features) { - if (!Covered[F % kFeatureSetSize]) { - ++NumCovered; - Covered[F % kFeatureSetSize] = true; - NewFeatures->insert(F); - } - } - // Add the index to this file to the result. - NewFiles->push_back(MaxFeatureFile.Name); - // Update NewCov with the additional coverage - // that MaxFeatureFile provides. - for (const auto &C : MaxFeatureFile.Cov) - if (InitialCov.find(C) == InitialCov.end()) - NewCov->insert(C); - } - - return NewFeatures->size(); -} - -static size_t -WriteNewControlFile(const std::string &CFPath, - const std::vector<SizedFile> &OldCorpus, - const std::vector<SizedFile> &NewCorpus, - const std::vector<MergeFileInfo> &KnownFiles) { - std::unordered_set<std::string> FilesToSkip; - for (auto &SF: KnownFiles) - FilesToSkip.insert(SF.Name); - - std::vector<std::string> FilesToUse; - auto MaybeUseFile = [=, &FilesToUse](std::string Name) { - if (FilesToSkip.find(Name) == FilesToSkip.end()) - FilesToUse.push_back(Name); - }; - for (auto &SF: OldCorpus) - MaybeUseFile(SF.File); - auto FilesToUseFromOldCorpus = FilesToUse.size(); - for (auto &SF: NewCorpus) - MaybeUseFile(SF.File); - - RemoveFile(CFPath); - std::ofstream ControlFile(CFPath); - ControlFile << FilesToUse.size() << "\n"; - ControlFile << FilesToUseFromOldCorpus << "\n"; - for (auto &FN: FilesToUse) - ControlFile << FN << "\n"; - - if (!ControlFile) { - Printf("MERGE-OUTER: failed to write to the control file: %s\n", - CFPath.c_str()); - exit(1); - } - - return FilesToUse.size(); -} - -// Outer process. Does not call the target code and thus should not fail. -void CrashResistantMerge(const std::vector<std::string> &Args, - const std::vector<SizedFile> &OldCorpus, - const std::vector<SizedFile> &NewCorpus, - std::vector<std::string> *NewFiles, - const std::set<uint32_t> &InitialFeatures, - std::set<uint32_t> *NewFeatures, - const std::set<uint32_t> &InitialCov, - std::set<uint32_t> *NewCov, const std::string &CFPath, - bool V, /*Verbose*/ - bool IsSetCoverMerge) { - if (NewCorpus.empty() && OldCorpus.empty()) return; // Nothing to merge. - size_t NumAttempts = 0; - std::vector<MergeFileInfo> KnownFiles; - if (FileSize(CFPath)) { - VPrintf(V, "MERGE-OUTER: non-empty control file provided: '%s'\n", - CFPath.c_str()); - Merger M; - std::ifstream IF(CFPath); - if (M.Parse(IF, /*ParseCoverage=*/true)) { - VPrintf(V, "MERGE-OUTER: control file ok, %zd files total," - " first not processed file %zd\n", - M.Files.size(), M.FirstNotProcessedFile); - if (!M.LastFailure.empty()) - VPrintf(V, "MERGE-OUTER: '%s' will be skipped as unlucky " - "(merge has stumbled on it the last time)\n", - M.LastFailure.c_str()); - if (M.FirstNotProcessedFile >= M.Files.size()) { - // Merge has already been completed with the given merge control file. - if (M.Files.size() == OldCorpus.size() + NewCorpus.size()) { - VPrintf( - V, - "MERGE-OUTER: nothing to do, merge has been completed before\n"); - exit(0); - } - - // Number of input files likely changed, start merge from scratch, but - // reuse coverage information from the given merge control file. - VPrintf( - V, - "MERGE-OUTER: starting merge from scratch, but reusing coverage " - "information from the given control file\n"); - KnownFiles = M.Files; - } else { - // There is a merge in progress, continue. - NumAttempts = M.Files.size() - M.FirstNotProcessedFile; - } - } else { - VPrintf(V, "MERGE-OUTER: bad control file, will overwrite it\n"); - } - } - - if (!NumAttempts) { - // The supplied control file is empty or bad, create a fresh one. - VPrintf(V, "MERGE-OUTER: " - "%zd files, %zd in the initial corpus, %zd processed earlier\n", - OldCorpus.size() + NewCorpus.size(), OldCorpus.size(), - KnownFiles.size()); - NumAttempts = WriteNewControlFile(CFPath, OldCorpus, NewCorpus, KnownFiles); - } - - // Execute the inner process until it passes. - // Every inner process should execute at least one input. - Command BaseCmd(Args); - BaseCmd.removeFlag("merge"); - BaseCmd.removeFlag("set_cover_merge"); - BaseCmd.removeFlag("fork"); - BaseCmd.removeFlag("collect_data_flow"); - for (size_t Attempt = 1; Attempt <= NumAttempts; Attempt++) { - Fuzzer::MaybeExitGracefully(); - VPrintf(V, "MERGE-OUTER: attempt %zd\n", Attempt); - Command Cmd(BaseCmd); - Cmd.addFlag("merge_control_file", CFPath); - // If we are going to use the set cover implementation for - // minimization add the merge_inner=2 internal flag. - Cmd.addFlag("merge_inner", IsSetCoverMerge ? "2" : "1"); - if (!V) { - Cmd.setOutputFile(getDevNull()); - Cmd.combineOutAndErr(); - } - auto ExitCode = ExecuteCommand(Cmd); - if (!ExitCode) { - VPrintf(V, "MERGE-OUTER: successful in %zd attempt(s)\n", Attempt); - break; - } - } - // Read the control file and do the merge. - Merger M; - std::ifstream IF(CFPath); - IF.seekg(0, IF.end); - VPrintf(V, "MERGE-OUTER: the control file has %zd bytes\n", - (size_t)IF.tellg()); - IF.seekg(0, IF.beg); - M.ParseOrExit(IF, true); - IF.close(); - VPrintf(V, - "MERGE-OUTER: consumed %zdMb (%zdMb rss) to parse the control file\n", - M.ApproximateMemoryConsumption() >> 20, GetPeakRSSMb()); - - M.Files.insert(M.Files.end(), KnownFiles.begin(), KnownFiles.end()); - if (IsSetCoverMerge) - M.SetCoverMerge(InitialFeatures, NewFeatures, InitialCov, NewCov, NewFiles); - else - M.Merge(InitialFeatures, NewFeatures, InitialCov, NewCov, NewFiles); - VPrintf(V, "MERGE-OUTER: %zd new files with %zd new features added; " - "%zd new coverage edges\n", - NewFiles->size(), NewFeatures->size(), NewCov->size()); -} - -} // namespace fuzzer |