diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h index c0d3fbd0eb961a..0d9566285f5d4e 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h @@ -34,6 +34,7 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" #include #include #include @@ -99,6 +100,8 @@ class MemRegion : public llvm::FoldingSetNode { #define REGION(Id, Parent) Id ## Kind, #define REGION_RANGE(Id, First, Last) BEGIN_##Id = First, END_##Id = Last, #include "clang/StaticAnalyzer/Core/PathSensitive/Regions.def" +#undef REGION +#undef REGION_RANGE }; private: @@ -171,6 +174,8 @@ class MemRegion : public llvm::FoldingSetNode { Kind getKind() const { return kind; } + StringRef getKindStr() const; + template const RegionTy* getAs() const; template LLVM_ATTRIBUTE_RETURNS_NONNULL const RegionTy *castAs() const; diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp index 238e87a712a43a..8dd08f14b2728b 100644 --- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "InterCheckerAPI.h" +#include "clang/AST/OperationKinds.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/CharInfo.h" #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" @@ -22,10 +23,13 @@ #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h" +#include "llvm/ADT/APSInt.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -304,6 +308,10 @@ class CStringChecker : public Checker< eval::Call, // Re-usable checks ProgramStateRef checkNonNull(CheckerContext &C, ProgramStateRef State, AnyArgExpr Arg, SVal l) const; + // Check whether the origin region behind \p Element (like the actual array + // region \p Element is from) is initialized. + ProgramStateRef checkInit(CheckerContext &C, ProgramStateRef state, + AnyArgExpr Buffer, SVal Element, SVal Size) const; ProgramStateRef CheckLocation(CheckerContext &C, ProgramStateRef state, AnyArgExpr Buffer, SVal Element, AccessKind Access, @@ -329,7 +337,7 @@ class CStringChecker : public Checker< eval::Call, const Stmt *S, StringRef WarningMsg) const; void emitAdditionOverflowBug(CheckerContext &C, ProgramStateRef State) const; void emitUninitializedReadBug(CheckerContext &C, ProgramStateRef State, - const Expr *E) const; + const Expr *E, StringRef Msg) const; ProgramStateRef checkAdditionOverflow(CheckerContext &C, ProgramStateRef state, NonLoc left, @@ -351,16 +359,16 @@ REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal) // Individual checks and utility methods. //===----------------------------------------------------------------------===// -std::pair -CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef state, SVal V, +std::pair +CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef State, SVal V, QualType Ty) { std::optional val = V.getAs(); if (!val) - return std::pair(state, state); + return std::pair(State, State); SValBuilder &svalBuilder = C.getSValBuilder(); DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty); - return state->assume(svalBuilder.evalEQ(state, *val, zero)); + return State->assume(svalBuilder.evalEQ(State, *val, zero)); } ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C, @@ -393,6 +401,149 @@ ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C, return stateNonNull; } +static std::optional getIndex(ProgramStateRef State, + const ElementRegion *ER, CharKind CK) { + SValBuilder &SVB = State->getStateManager().getSValBuilder(); + ASTContext &Ctx = SVB.getContext(); + + if (CK == CharKind::Regular) { + if (ER->getValueType() != Ctx.CharTy) + return {}; + return ER->getIndex(); + } + + if (ER->getValueType() != Ctx.WideCharTy) + return {}; + + QualType SizeTy = Ctx.getSizeType(); + NonLoc WideSize = + SVB.makeIntVal(Ctx.getTypeSizeInChars(Ctx.WideCharTy).getQuantity(), + SizeTy) + .castAs(); + SVal Offset = + SVB.evalBinOpNN(State, BO_Mul, ER->getIndex(), WideSize, SizeTy); + if (Offset.isUnknown()) + return {}; + return Offset.castAs(); +} + +// Basically 1 -> 1st, 12 -> 12th, etc. +static void printIdxWithOrdinalSuffix(llvm::raw_ostream &Os, unsigned Idx) { + Os << Idx << llvm::getOrdinalSuffix(Idx); +} + +ProgramStateRef CStringChecker::checkInit(CheckerContext &C, + ProgramStateRef State, + AnyArgExpr Buffer, SVal Element, + SVal Size) const { + + // If a previous check has failed, propagate the failure. + if (!State) + return nullptr; + + const MemRegion *R = Element.getAsRegion(); + const auto *ER = dyn_cast_or_null(R); + if (!ER) + return State; + + const auto *SuperR = ER->getSuperRegion()->getAs(); + if (!SuperR) + return State; + + // FIXME: We ought to able to check objects as well. Maybe + // UninitializedObjectChecker could help? + if (!SuperR->getValueType()->isArrayType()) + return State; + + SValBuilder &SVB = C.getSValBuilder(); + ASTContext &Ctx = SVB.getContext(); + + const QualType ElemTy = Ctx.getBaseElementType(SuperR->getValueType()); + const NonLoc Zero = SVB.makeZeroArrayIndex(); + + std::optional FirstElementVal = + State->getLValue(ElemTy, Zero, loc::MemRegionVal(SuperR)).getAs(); + if (!FirstElementVal) + return State; + + // Ensure that we wouldn't read uninitialized value. + if (Filter.CheckCStringUninitializedRead && + State->getSVal(*FirstElementVal).isUndef()) { + llvm::SmallString<258> Buf; + llvm::raw_svector_ostream OS(Buf); + OS << "The first element of the "; + printIdxWithOrdinalSuffix(OS, Buffer.ArgumentIndex + 1); + OS << " argument is undefined"; + emitUninitializedReadBug(C, State, Buffer.Expression, OS.str()); + return nullptr; + } + + // We won't check whether the entire region is fully initialized -- lets just + // check that the first and the last element is. So, onto checking the last + // element: + const QualType IdxTy = SVB.getArrayIndexType(); + + NonLoc ElemSize = + SVB.makeIntVal(Ctx.getTypeSizeInChars(ElemTy).getQuantity(), IdxTy) + .castAs(); + + // FIXME: Check that the size arg to the cstring function is divisible by + // size of the actual element type? + + // The type of the argument to the cstring function is either char or wchar, + // but thats not the type of the original array (or memory region). + // Suppose the following: + // int t[5]; + // memcpy(dst, t, sizeof(t) / sizeof(t[0])); + // When checking whether t is fully initialized, we see it as char array of + // size sizeof(int)*5. If we check the last element as a character, we read + // the last byte of an integer, which will be undefined. But just because + // that value is undefined, it doesn't mean that the element is uninitialized! + // For this reason, we need to retrieve the actual last element with the + // correct type. + + // Divide the size argument to the cstring function by the actual element + // type. This value will be size of the array, or the index to the + // past-the-end element. + std::optional Offset = + SVB.evalBinOpNN(State, clang::BO_Div, Size.castAs(), ElemSize, + IdxTy) + .getAs(); + + // Retrieve the index of the last element. + const NonLoc One = SVB.makeIntVal(1, IdxTy).castAs(); + SVal LastIdx = SVB.evalBinOpNN(State, BO_Sub, *Offset, One, IdxTy); + + if (!Offset) + return State; + + SVal LastElementVal = + State->getLValue(ElemTy, LastIdx, loc::MemRegionVal(SuperR)); + if (!isa(LastElementVal)) + return State; + + if (Filter.CheckCStringUninitializedRead && + State->getSVal(LastElementVal.castAs()).isUndef()) { + const llvm::APSInt *IdxInt = LastIdx.getAsInteger(); + // If we can't get emit a sensible last element index, just bail out -- + // prefer to emit nothing in favour of emitting garbage quality reports. + if (!IdxInt) { + C.addSink(); + return nullptr; + } + llvm::SmallString<258> Buf; + llvm::raw_svector_ostream OS(Buf); + OS << "The last accessed element (at index "; + OS << IdxInt->getExtValue(); + OS << ") in the "; + printIdxWithOrdinalSuffix(OS, Buffer.ArgumentIndex + 1); + OS << " argument is undefined"; + emitUninitializedReadBug(C, State, Buffer.Expression, OS.str()); + return nullptr; + } + return State; +} + // FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor? ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C, ProgramStateRef state, @@ -413,38 +564,17 @@ ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C, if (!ER) return state; - SValBuilder &svalBuilder = C.getSValBuilder(); - ASTContext &Ctx = svalBuilder.getContext(); - // Get the index of the accessed element. - NonLoc Idx = ER->getIndex(); - - if (CK == CharKind::Regular) { - if (ER->getValueType() != Ctx.CharTy) - return state; - } else { - if (ER->getValueType() != Ctx.WideCharTy) - return state; - - QualType SizeTy = Ctx.getSizeType(); - NonLoc WideSize = - svalBuilder - .makeIntVal(Ctx.getTypeSizeInChars(Ctx.WideCharTy).getQuantity(), - SizeTy) - .castAs(); - SVal Offset = svalBuilder.evalBinOpNN(state, BO_Mul, Idx, WideSize, SizeTy); - if (Offset.isUnknown()) - return state; - Idx = Offset.castAs(); - } + std::optional Idx = getIndex(state, ER, CK); + if (!Idx) + return state; // Get the size of the array. const auto *superReg = cast(ER->getSuperRegion()); DefinedOrUnknownSVal Size = getDynamicExtent(state, superReg, C.getSValBuilder()); - ProgramStateRef StInBound, StOutBound; - std::tie(StInBound, StOutBound) = state->assumeInBoundDual(Idx, Size); + auto [StInBound, StOutBound] = state->assumeInBoundDual(*Idx, Size); if (StOutBound && !StInBound) { // These checks are either enabled by the CString out-of-bounds checker // explicitly or implicitly by the Malloc checker. @@ -459,15 +589,6 @@ ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C, return nullptr; } - // Ensure that we wouldn't read uninitialized value. - if (Access == AccessKind::read) { - if (Filter.CheckCStringUninitializedRead && - StInBound->getSVal(ER).isUndef()) { - emitUninitializedReadBug(C, StInBound, Buffer.Expression); - return nullptr; - } - } - // Array bound check succeeded. From this point forward the array bound // should always succeed. return StInBound; @@ -502,6 +623,7 @@ CStringChecker::CheckBufferAccess(CheckerContext &C, ProgramStateRef State, // Check if the first byte of the buffer is accessible. State = CheckLocation(C, State, Buffer, BufStart, Access, CK); + if (!State) return nullptr; @@ -526,6 +648,8 @@ CStringChecker::CheckBufferAccess(CheckerContext &C, ProgramStateRef State, SVal BufEnd = svalBuilder.evalBinOpLN(State, BO_Add, *BufLoc, LastOffset, PtrTy); State = CheckLocation(C, State, Buffer, BufEnd, Access, CK); + if (Access == AccessKind::read) + State = checkInit(C, State, Buffer, BufEnd, *Length); // If the buffer isn't large enough, abort. if (!State) @@ -694,16 +818,17 @@ void CStringChecker::emitNullArgBug(CheckerContext &C, ProgramStateRef State, void CStringChecker::emitUninitializedReadBug(CheckerContext &C, ProgramStateRef State, - const Expr *E) const { + const Expr *E, + StringRef Msg) const { if (ExplodedNode *N = C.generateErrorNode(State)) { - const char *Msg = - "Bytes string function accesses uninitialized/garbage values"; if (!BT_UninitRead) BT_UninitRead.reset(new BugType(Filter.CheckNameCStringUninitializedRead, "Accessing unitialized/garbage values")); auto Report = std::make_unique(*BT_UninitRead, Msg, N); + Report->addNote("Other elements might also be undefined", + Report->getLocation()); Report->addRange(E->getSourceRange()); bugreporter::trackExpressionValue(N, E, *Report); C.emitReport(std::move(Report)); diff --git a/clang/lib/StaticAnalyzer/Core/MemRegion.cpp b/clang/lib/StaticAnalyzer/Core/MemRegion.cpp index 6fe929b1cb94a7..693791c3aee8b9 100644 --- a/clang/lib/StaticAnalyzer/Core/MemRegion.cpp +++ b/clang/lib/StaticAnalyzer/Core/MemRegion.cpp @@ -630,6 +630,17 @@ bool MemRegion::canPrintPrettyAsExpr() const { return false; } +StringRef MemRegion::getKindStr() const { + switch (getKind()) { +#define REGION(Id, Parent) \ + case Id##Kind: \ + return #Id; +#include "clang/StaticAnalyzer/Core/PathSensitive/Regions.def" +#undef REGION + } + llvm_unreachable("Unkown kind!"); +} + void MemRegion::printPretty(raw_ostream &os) const { assert(canPrintPretty() && "This region cannot be printed pretty."); os << "'"; diff --git a/clang/test/Analysis/bstring_UninitRead.c b/clang/test/Analysis/bstring_UninitRead.c index c535e018e62c27..45e38dd3162988 100644 --- a/clang/test/Analysis/bstring_UninitRead.c +++ b/clang/test/Analysis/bstring_UninitRead.c @@ -1,12 +1,11 @@ // RUN: %clang_analyze_cc1 -verify %s \ // RUN: -analyzer-checker=core,alpha.unix.cstring - -// This file is generally for the alpha.unix.cstring.UninitializedRead Checker, the reason for putting it into -// the separate file because the checker is break the some existing test cases in bstring.c file , so we don't -// wanna mess up with some existing test case so it's better to create separate file for it, this file also include -// the broken test for the reference in future about the broken tests. - +//===----------------------------------------------------------------------===// +// mempcpy() using character array. This is the easiest case, as memcpy +// intepretrs the dst and src buffers as character arrays (regardless of their +// actual type). +//===----------------------------------------------------------------------===// typedef typeof(sizeof(int)) size_t; @@ -14,46 +13,120 @@ void clang_analyzer_eval(int); void *memcpy(void *restrict s1, const void *restrict s2, size_t n); -void top(char *dst) { +void memcpy_array_fully_uninit(char *dst) { + char buf[10]; + memcpy(dst, buf, 10); // expected-warning{{The first element of the 2nd argument is undefined}} + // expected-note@-1{{Other elements might also be undefined}} + (void)buf; +} + +void memcpy_array_partially_uninit(char *dst) { char buf[10]; - memcpy(dst, buf, 10); // expected-warning{{Bytes string function accesses uninitialized/garbage values}} + buf[0] = 'i'; + memcpy(dst, buf, 10); // expected-warning{{The last accessed element (at index 9) in the 2nd argument is undefined}} + // expected-note@-1{{Other elements might also be undefined}} + (void)buf; +} + +void memcpy_array_only_init_portion(char *dst) { + char buf[10]; + buf[0] = 'i'; + memcpy(dst, buf, 1); + (void)buf; +} + +void memcpy_array_partially_init_error(char *dst) { + char buf[10]; + buf[0] = 'i'; + memcpy(dst, buf, 2); // expected-warning{{The last accessed element (at index 1) in the 2nd argument is undefined}} + // expected-note@-1{{Other elements might also be undefined}} + (void)buf; +} + +// The interesting case here is that the portion we're copying is initialized, +// but not the whole matrix. We need to be careful to extract buf[1], and not +// buf when trying to peel region layers off from the source argument. +void memcpy_array_from_matrix(char *dst) { + char buf[2][2]; + buf[1][0] = 'i'; + buf[1][1] = 'j'; + // FIXME: This is a FP -- we mistakenly retrieve the first element of buf, + // instead of the first element of buf[1]. getLValueElement simply peels off + // another ElementRegion layer, when in this case it really shouldn't. + memcpy(dst, buf[1], 2); // expected-warning{{The first element of the 2nd argument is undefined}} + // expected-note@-1{{Other elements might also be undefined}} (void)buf; } -//===----------------------------------------------------------------------=== -// mempcpy() -//===----------------------------------------------------------------------=== +//===----------------------------------------------------------------------===// +// mempcpy() using non-character arrays. +//===----------------------------------------------------------------------===// void *mempcpy(void *restrict s1, const void *restrict s2, size_t n); -void mempcpy14() { +void memcpy_int_array_fully_init() { int src[] = {1, 2, 3, 4}; int dst[5] = {0}; int *p; - p = mempcpy(dst, src, 4 * sizeof(int)); // expected-warning{{Bytes string function accesses uninitialized/garbage values}} - // FIXME: This behaviour is actually surprising and needs to be fixed, - // mempcpy seems to consider the very last byte of the src buffer uninitialized - // and returning undef unfortunately. It should have returned unknown or a conjured value instead. + p = mempcpy(dst, src, 4 * sizeof(int)); + clang_analyzer_eval(p == &dst[4]); +} - clang_analyzer_eval(p == &dst[4]); // no-warning (above is fatal) +void memcpy_int_array_fully_init2(int *dest) { + int t[] = {1, 2, 3}; + memcpy(dest, t, sizeof(t)); } +//===----------------------------------------------------------------------===// +// mempcpy() using nonarrays. +//===----------------------------------------------------------------------===// + struct st { int i; int j; }; - -void mempcpy15() { +void mempcpy_struct_partially_uninit() { struct st s1 = {0}; struct st s2; struct st *p1; struct st *p2; p1 = (&s2) + 1; - p2 = mempcpy(&s2, &s1, sizeof(struct st)); // expected-warning{{Bytes string function accesses uninitialized/garbage values}} - // FIXME: It seems same as mempcpy14() case. - - clang_analyzer_eval(p1 == p2); // no-warning (above is fatal) + + // FIXME: Maybe ask UninitializedObjectChecker whether s1 is fully + // initialized? + p2 = mempcpy(&s2, &s1, sizeof(struct st)); + + clang_analyzer_eval(p1 == p2); +} + +void mempcpy_struct_fully_uninit() { + struct st s1; + struct st s2; + + // FIXME: Maybe ask UninitializedObjectChecker whether s1 is fully + // initialized? + mempcpy(&s2, &s1, sizeof(struct st)); +} + +// Creduced crash. In this case, an symbolicregion is wrapped in an +// elementregion for the src argument. +void *ga_copy_strings_from_0; +void *memmove(); +void alloc(); +void ga_copy_strings() { + int i = 0; + for (;; ++i) + memmove(alloc, ((char **)ga_copy_strings_from_0)[i], 1); +} + +// Creduced crash. In this case, retrieving the Loc for the first element failed. +char mov_mdhd_language_map[][4] = {}; +int ff_mov_lang_to_iso639_code; +char *ff_mov_lang_to_iso639_to; +void ff_mov_lang_to_iso639() { + memcpy(ff_mov_lang_to_iso639_to, + mov_mdhd_language_map[ff_mov_lang_to_iso639_code], 4); }