1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
|
//=== Taint.cpp - Taint tracking and basic propagation rules. ------*- C++ -*-//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Defines basic, non-domain-specific mechanisms for tracking tainted values.
//
//===----------------------------------------------------------------------===//
#include "clang/StaticAnalyzer/Checkers/Taint.h"
#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
#include <optional>
using namespace clang;
using namespace ento;
using namespace taint;
// Fully tainted symbols.
REGISTER_MAP_WITH_PROGRAMSTATE(TaintMap, SymbolRef, TaintTagType)
// Partially tainted symbols.
REGISTER_MAP_FACTORY_WITH_PROGRAMSTATE(TaintedSubRegions, const SubRegion *,
TaintTagType)
REGISTER_MAP_WITH_PROGRAMSTATE(DerivedSymTaint, SymbolRef, TaintedSubRegions)
void taint::printTaint(ProgramStateRef State, raw_ostream &Out, const char *NL,
const char *Sep) {
TaintMapTy TM = State->get<TaintMap>();
if (!TM.isEmpty())
Out << "Tainted symbols:" << NL;
for (const auto &I : TM)
Out << I.first << " : " << I.second << NL;
}
void taint::dumpTaint(ProgramStateRef State) {
printTaint(State, llvm::errs());
}
ProgramStateRef taint::addTaint(ProgramStateRef State, const Stmt *S,
const LocationContext *LCtx,
TaintTagType Kind) {
return addTaint(State, State->getSVal(S, LCtx), Kind);
}
ProgramStateRef taint::addTaint(ProgramStateRef State, SVal V,
TaintTagType Kind) {
SymbolRef Sym = V.getAsSymbol();
if (Sym)
return addTaint(State, Sym, Kind);
// If the SVal represents a structure, try to mass-taint all values within the
// structure. For now it only works efficiently on lazy compound values that
// were conjured during a conservative evaluation of a function - either as
// return values of functions that return structures or arrays by value, or as
// values of structures or arrays passed into the function by reference,
// directly or through pointer aliasing. Such lazy compound values are
// characterized by having exactly one binding in their captured store within
// their parent region, which is a conjured symbol default-bound to the base
// region of the parent region.
if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) {
if (std::optional<SVal> binding =
State->getStateManager().getStoreManager().getDefaultBinding(
*LCV)) {
if (SymbolRef Sym = binding->getAsSymbol())
return addPartialTaint(State, Sym, LCV->getRegion(), Kind);
}
}
const MemRegion *R = V.getAsRegion();
return addTaint(State, R, Kind);
}
ProgramStateRef taint::addTaint(ProgramStateRef State, const MemRegion *R,
TaintTagType Kind) {
if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R))
return addTaint(State, SR->getSymbol(), Kind);
return State;
}
ProgramStateRef taint::addTaint(ProgramStateRef State, SymbolRef Sym,
TaintTagType Kind) {
// If this is a symbol cast, remove the cast before adding the taint. Taint
// is cast agnostic.
while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym))
Sym = SC->getOperand();
ProgramStateRef NewState = State->set<TaintMap>(Sym, Kind);
assert(NewState);
return NewState;
}
ProgramStateRef taint::removeTaint(ProgramStateRef State, SVal V) {
SymbolRef Sym = V.getAsSymbol();
if (Sym)
return removeTaint(State, Sym);
const MemRegion *R = V.getAsRegion();
return removeTaint(State, R);
}
ProgramStateRef taint::removeTaint(ProgramStateRef State, const MemRegion *R) {
if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R))
return removeTaint(State, SR->getSymbol());
return State;
}
ProgramStateRef taint::removeTaint(ProgramStateRef State, SymbolRef Sym) {
// If this is a symbol cast, remove the cast before adding the taint. Taint
// is cast agnostic.
while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym))
Sym = SC->getOperand();
ProgramStateRef NewState = State->remove<TaintMap>(Sym);
assert(NewState);
return NewState;
}
ProgramStateRef taint::addPartialTaint(ProgramStateRef State,
SymbolRef ParentSym,
const SubRegion *SubRegion,
TaintTagType Kind) {
// Ignore partial taint if the entire parent symbol is already tainted.
if (const TaintTagType *T = State->get<TaintMap>(ParentSym))
if (*T == Kind)
return State;
// Partial taint applies if only a portion of the symbol is tainted.
if (SubRegion == SubRegion->getBaseRegion())
return addTaint(State, ParentSym, Kind);
const TaintedSubRegions *SavedRegs = State->get<DerivedSymTaint>(ParentSym);
TaintedSubRegions::Factory &F = State->get_context<TaintedSubRegions>();
TaintedSubRegions Regs = SavedRegs ? *SavedRegs : F.getEmptyMap();
Regs = F.add(Regs, SubRegion, Kind);
ProgramStateRef NewState = State->set<DerivedSymTaint>(ParentSym, Regs);
assert(NewState);
return NewState;
}
bool taint::isTainted(ProgramStateRef State, const Stmt *S,
const LocationContext *LCtx, TaintTagType Kind) {
SVal val = State->getSVal(S, LCtx);
return isTainted(State, val, Kind);
}
bool taint::isTainted(ProgramStateRef State, SVal V, TaintTagType Kind) {
if (SymbolRef Sym = V.getAsSymbol())
return isTainted(State, Sym, Kind);
if (const MemRegion *Reg = V.getAsRegion())
return isTainted(State, Reg, Kind);
return false;
}
bool taint::isTainted(ProgramStateRef State, const MemRegion *Reg,
TaintTagType K) {
if (!Reg)
return false;
// Element region (array element) is tainted if either the base or the offset
// are tainted.
if (const ElementRegion *ER = dyn_cast<ElementRegion>(Reg))
return isTainted(State, ER->getSuperRegion(), K) ||
isTainted(State, ER->getIndex(), K);
if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(Reg))
return isTainted(State, SR->getSymbol(), K);
if (const SubRegion *ER = dyn_cast<SubRegion>(Reg))
return isTainted(State, ER->getSuperRegion(), K);
return false;
}
bool taint::isTainted(ProgramStateRef State, SymbolRef Sym, TaintTagType Kind) {
if (!Sym)
return false;
// Traverse all the symbols this symbol depends on to see if any are tainted.
for (SymExpr::symbol_iterator SI = Sym->symbol_begin(),
SE = Sym->symbol_end();
SI != SE; ++SI) {
if (!isa<SymbolData>(*SI))
continue;
if (const TaintTagType *Tag = State->get<TaintMap>(*SI)) {
if (*Tag == Kind)
return true;
}
if (const auto *SD = dyn_cast<SymbolDerived>(*SI)) {
// If this is a SymbolDerived with a tainted parent, it's also tainted.
if (isTainted(State, SD->getParentSymbol(), Kind))
return true;
// If this is a SymbolDerived with the same parent symbol as another
// tainted SymbolDerived and a region that's a sub-region of that tainted
// symbol, it's also tainted.
if (const TaintedSubRegions *Regs =
State->get<DerivedSymTaint>(SD->getParentSymbol())) {
const TypedValueRegion *R = SD->getRegion();
for (auto I : *Regs) {
// FIXME: The logic to identify tainted regions could be more
// complete. For example, this would not currently identify
// overlapping fields in a union as tainted. To identify this we can
// check for overlapping/nested byte offsets.
if (Kind == I.second && R->isSubRegionOf(I.first))
return true;
}
}
}
// If memory region is tainted, data is also tainted.
if (const auto *SRV = dyn_cast<SymbolRegionValue>(*SI)) {
if (isTainted(State, SRV->getRegion(), Kind))
return true;
}
// If this is a SymbolCast from a tainted value, it's also tainted.
if (const auto *SC = dyn_cast<SymbolCast>(*SI)) {
if (isTainted(State, SC->getOperand(), Kind))
return true;
}
}
return false;
}
PathDiagnosticPieceRef TaintBugVisitor::VisitNode(const ExplodedNode *N,
BugReporterContext &BRC,
PathSensitiveBugReport &BR) {
// Find the ExplodedNode where the taint was first introduced
if (!isTainted(N->getState(), V) ||
isTainted(N->getFirstPred()->getState(), V))
return nullptr;
const Stmt *S = N->getStmtForDiagnostics();
if (!S)
return nullptr;
const LocationContext *NCtx = N->getLocationContext();
PathDiagnosticLocation L =
PathDiagnosticLocation::createBegin(S, BRC.getSourceManager(), NCtx);
if (!L.isValid() || !L.asLocation().isValid())
return nullptr;
return std::make_shared<PathDiagnosticEventPiece>(L, "Taint originated here");
}
|