diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst index 0d87df36ced0e9..eb8b58323da4d7 100644 --- a/clang/docs/analyzer/checkers.rst +++ b/clang/docs/analyzer/checkers.rst @@ -3033,13 +3033,6 @@ Further examples of injection vulnerabilities this checker can find. sprintf(buf, s); // warn: untrusted data used as a format string } - void test() { - size_t ts; - scanf("%zd", &ts); // 'ts' marked as tainted - int *p = (int *)malloc(ts * sizeof(int)); - // warn: untrusted data used as buffer size - } - There are built-in sources, propagations and sinks even if no external taint configuration is provided. @@ -3067,9 +3060,7 @@ Default propagations rules: Default sinks: ``printf``, ``setproctitle``, ``system``, ``popen``, ``execl``, ``execle``, - ``execlp``, ``execv``, ``execvp``, ``execvP``, ``execve``, ``dlopen``, - ``memcpy``, ``memmove``, ``strncpy``, ``strndup``, ``malloc``, ``calloc``, - ``alloca``, ``memccpy``, ``realloc``, ``bcopy`` + ``execlp``, ``execv``, ``execvp``, ``execvP``, ``execve``, ``dlopen`` Please note that there are no built-in filter functions. diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp index 63844563de44f1..f9548b5c3010bf 100644 --- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp @@ -1338,6 +1338,9 @@ void CStringChecker::evalCopyCommon(CheckerContext &C, const CallEvent &Call, // If the size can be nonzero, we have to check the other arguments. if (stateNonZeroSize) { + // TODO: If Size is tainted and we cannot prove that it is smaller or equal + // to the size of the destination buffer, then emit a warning + // that an attacker may provoke a buffer overflow error. state = stateNonZeroSize; // Ensure the destination is not null. If it is NULL there will be a diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp index 89054512d65ad6..d17f5ddf070553 100644 --- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -59,13 +59,6 @@ constexpr llvm::StringLiteral MsgSanitizeSystemArgs = "Untrusted data is passed to a system call " "(CERT/STR02-C. Sanitize data passed to complex subsystems)"; -/// Check if tainted data is used as a buffer size in strn.. functions, -/// and allocators. -constexpr llvm::StringLiteral MsgTaintedBufferSize = - "Untrusted data is used to specify the buffer size " - "(CERT/STR31-C. Guarantee that storage for strings has sufficient space " - "for character data and the null terminator)"; - /// Check if tainted data is used as a custom sink's parameter. constexpr llvm::StringLiteral MsgCustomSink = "Untrusted data is passed to a user-defined sink"; @@ -298,14 +291,6 @@ class GenericTaintRule { return {{}, {}, std::move(SrcArgs), std::move(DstArgs)}; } - /// Make a rule that taints all PropDstArgs if any of PropSrcArgs is tainted. - static GenericTaintRule - SinkProp(ArgSet &&SinkArgs, ArgSet &&SrcArgs, ArgSet &&DstArgs, - std::optional Msg = std::nullopt) { - return { - std::move(SinkArgs), {}, std::move(SrcArgs), std::move(DstArgs), Msg}; - } - /// Process a function which could either be a taint source, a taint sink, a /// taint filter or a taint propagator. void process(const GenericTaintChecker &Checker, const CallEvent &Call, @@ -733,12 +718,21 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const { {{CDM::CLibraryMaybeHardened, {{"stpcpy"}}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})}, {{CDM::CLibraryMaybeHardened, {{"strcat"}}}, - TR::Prop({{1}}, {{0, ReturnValueIndex}})}, + TR::Prop({{0, 1}}, {{0, ReturnValueIndex}})}, {{CDM::CLibraryMaybeHardened, {{"wcsncat"}}}, - TR::Prop({{1}}, {{0, ReturnValueIndex}})}, + TR::Prop({{0, 1}}, {{0, ReturnValueIndex}})}, {{CDM::CLibrary, {{"strdup"}}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, {{CDM::CLibrary, {{"strdupa"}}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, {{CDM::CLibrary, {{"wcsdup"}}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, BI.getName(Builtin::BImemcpy)}, + TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})}, + {{CDM::CLibrary, {BI.getName(Builtin::BImemmove)}}, + TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})}, + {{CDM::CLibrary, {BI.getName(Builtin::BIstrncpy)}}, + TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})}, + {{CDM::CLibrary, {BI.getName(Builtin::BIstrndup)}}, + TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"bcopy"}}, TR::Prop({{0, 2}}, {{1}})}, // Sinks {{{"system"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, @@ -752,30 +746,15 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const { {{{"execvp"}}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)}, {{{"execvpe"}}, TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)}, {{{"dlopen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, - {{CDM::CLibrary, {{"malloc"}}}, TR::Sink({{0}}, MsgTaintedBufferSize)}, - {{CDM::CLibrary, {{"calloc"}}}, TR::Sink({{0}}, MsgTaintedBufferSize)}, - {{CDM::CLibrary, {{"alloca"}}}, TR::Sink({{0}}, MsgTaintedBufferSize)}, - {{CDM::CLibrary, {{"memccpy"}}}, TR::Sink({{3}}, MsgTaintedBufferSize)}, - {{CDM::CLibrary, {{"realloc"}}}, TR::Sink({{1}}, MsgTaintedBufferSize)}, + + // malloc, calloc, alloca, realloc, memccpy + // are intentionally not marked as taint sinks because unconditional + // reporting for these functions generates many false positives. + // These taint sinks should be implemented in other checkers with more + // sophisticated sanitation heuristics. {{{{"setproctitle"}}}, TR::Sink({{0}, 1}, MsgUncontrolledFormatString)}, {{{{"setproctitle_fast"}}}, - TR::Sink({{0}, 1}, MsgUncontrolledFormatString)}, - - // SinkProps - {{CDM::CLibraryMaybeHardened, BI.getName(Builtin::BImemcpy)}, - TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}}, - MsgTaintedBufferSize)}, - {{CDM::CLibraryMaybeHardened, {BI.getName(Builtin::BImemmove)}}, - TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}}, - MsgTaintedBufferSize)}, - {{CDM::CLibraryMaybeHardened, {BI.getName(Builtin::BIstrncpy)}}, - TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}}, - MsgTaintedBufferSize)}, - {{CDM::CLibrary, {BI.getName(Builtin::BIstrndup)}}, - TR::SinkProp({{1}}, {{0, 1}}, {{ReturnValueIndex}}, - MsgTaintedBufferSize)}, - {{CDM::CLibrary, {{"bcopy"}}}, - TR::SinkProp({{2}}, {{0, 2}}, {{1}}, MsgTaintedBufferSize)}}; + TR::Sink({{0}, 1}, MsgUncontrolledFormatString)}}; // `getenv` returns taint only in untrusted environments. if (TR::UntrustedEnv(C)) { diff --git a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp index 11651fd491f743..dd204b62dcc040 100644 --- a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp @@ -1819,6 +1819,10 @@ ProgramStateRef MallocChecker::MallocMemAux(CheckerContext &C, if (Size.isUndef()) Size = UnknownVal(); + // TODO: If Size is tainted and we cannot prove that it is within + // reasonable bounds, emit a warning that an attacker may + // provoke a memory exhaustion error. + // Set the region's extent. State = setDynamicExtent(State, RetVal.getAsRegion(), Size.castAs(), SVB); diff --git a/clang/test/Analysis/taint-diagnostic-visitor.c b/clang/test/Analysis/taint-diagnostic-visitor.c index 2ba7d9938fc3d8..b8b3710a7013e7 100644 --- a/clang/test/Analysis/taint-diagnostic-visitor.c +++ b/clang/test/Analysis/taint-diagnostic-visitor.c @@ -10,7 +10,8 @@ int scanf(const char *restrict format, ...); int system(const char *command); char* getenv( const char* env_var ); size_t strlen( const char* str ); -int atoi( const char* str ); +char *strcat( char *dest, const char *src ); +char* strcpy( char* dest, const char* src ); void *malloc(size_t size ); void free( void *ptr ); char *fgets(char *str, int n, FILE *stream); @@ -53,34 +54,32 @@ void taintDiagnosticVLA(void) { // Tests if the originated note is correctly placed even if the path is // propagating through variables and expressions -char *taintDiagnosticPropagation(){ - char *pathbuf; - char *size=getenv("SIZE"); // expected-note {{Taint originated here}} - // expected-note@-1 {{Taint propagated to the return value}} - if (size){ // expected-note {{Assuming 'size' is non-null}} - // expected-note@-1 {{Taking true branch}} - pathbuf=(char*) malloc(atoi(size)); // expected-warning{{Untrusted data is used to specify the buffer size}} - // expected-note@-1{{Untrusted data is used to specify the buffer size}} - // expected-note@-2 {{Taint propagated to the return value}} - return pathbuf; +int taintDiagnosticPropagation(){ + int res; + char *cmd=getenv("CMD"); // expected-note {{Taint originated here}} + // expected-note@-1 {{Taint propagated to the return value}} + if (cmd){ // expected-note {{Assuming 'cmd' is non-null}} + // expected-note@-1 {{Taking true branch}} + res = system(cmd); // expected-warning{{Untrusted data is passed to a system call}} + // expected-note@-1{{Untrusted data is passed to a system call}} + return res; } - return 0; + return -1; } // Taint origin should be marked correctly even if there are multiple taint // sources in the function -char *taintDiagnosticPropagation2(){ - char *pathbuf; +int taintDiagnosticPropagation2(){ + int res; char *user_env2=getenv("USER_ENV_VAR2");//unrelated taint source - char *size=getenv("SIZE"); // expected-note {{Taint originated here}} - // expected-note@-1 {{Taint propagated to the return value}} + char *cmd=getenv("CMD"); // expected-note {{Taint originated here}} + // expected-note@-1 {{Taint propagated to the return value}} char *user_env=getenv("USER_ENV_VAR");//unrelated taint source - if (size){ // expected-note {{Assuming 'size' is non-null}} - // expected-note@-1 {{Taking true branch}} - pathbuf=(char*) malloc(atoi(size)+1); // expected-warning{{Untrusted data is used to specify the buffer size}} - // expected-note@-1{{Untrusted data is used to specify the buffer size}} - // expected-note@-2 {{Taint propagated to the return value}} - return pathbuf; + if (cmd){ // expected-note {{Assuming 'cmd' is non-null}} + // expected-note@-1 {{Taking true branch}} + res = system(cmd); // expected-warning{{Untrusted data is passed to a system call}} + // expected-note@-1{{Untrusted data is passed to a system call}} + return res; } return 0; } @@ -95,22 +94,24 @@ void testReadStdIn(){ } void multipleTaintSources(void) { - int x,y,z; - scanf("%d", &x); // expected-note {{Taint originated here}} + char cmd[2048], file[1024]; + scanf ("%1022[^\n] ", cmd); // expected-note {{Taint originated here}} // expected-note@-1 {{Taint propagated to the 2nd argument}} - scanf("%d", &y); // expected-note {{Taint originated here}} + scanf ("%1023[^\n]", file); // expected-note {{Taint originated here}} // expected-note@-1 {{Taint propagated to the 2nd argument}} - scanf("%d", &z); - int* ptr = (int*) malloc(y + x); // expected-warning {{Untrusted data is used to specify the buffer size}} - // expected-note@-1{{Untrusted data is used to specify the buffer size}} - free (ptr); + strcat(cmd, file); // expected-note {{Taint propagated to the 1st argument}} + strcat(cmd, " "); // expected-note {{Taint propagated to the 1st argument}} + system(cmd); // expected-warning {{Untrusted data is passed to a system call}} + // expected-note@-1{{Untrusted data is passed to a system call}} } void multipleTaintedArgs(void) { - int x,y; - scanf("%d %d", &x, &y); // expected-note {{Taint originated here}} + char cmd[1024], file[1024], buf[2048]; + scanf("%1022s %1023s", cmd, file); // expected-note {{Taint originated here}} // expected-note@-1 {{Taint propagated to the 2nd argument, 3rd argument}} - int* ptr = (int*) malloc(x + y); // expected-warning {{Untrusted data is used to specify the buffer size}} - // expected-note@-1{{Untrusted data is used to specify the buffer size}} - free (ptr); + strcpy(buf, cmd);// expected-note {{Taint propagated to the 1st argument}} + strcat(buf, " ");// expected-note {{Taint propagated to the 1st argument}} + strcat(buf, file);// expected-note {{Taint propagated to the 1st argument}} + system(buf); // expected-warning {{Untrusted data is passed to a system call}} + // expected-note@-1{{Untrusted data is passed to a system call}} } diff --git a/clang/test/Analysis/taint-generic.c b/clang/test/Analysis/taint-generic.c index e85b4106a5806b..b0df85f237298d 100644 --- a/clang/test/Analysis/taint-generic.c +++ b/clang/test/Analysis/taint-generic.c @@ -305,15 +305,21 @@ void testGets_s(void) { void testTaintedBufferSize(void) { size_t ts; + // The functions malloc, calloc, bcopy and memcpy are not taint sinks in the + // default config of GenericTaintChecker (because that would cause too many + // false positives). + // FIXME: We should generate warnings when a value passed to these functions + // is tainted and _can be very large_ (because that's exploitable). This + // functionality probably belongs to the checkers that do more detailed + // modeling of these functions (MallocChecker and CStringChecker). scanf("%zd", &ts); - - int *buf1 = (int*)malloc(ts*sizeof(int)); // expected-warning {{Untrusted data is used to specify the buffer size}} - char *dst = (char*)calloc(ts, sizeof(char)); //expected-warning {{Untrusted data is used to specify the buffer size}} - bcopy(buf1, dst, ts); // expected-warning {{Untrusted data is used to specify the buffer size}} - __builtin_memcpy(dst, buf1, (ts + 4)*sizeof(char)); // expected-warning {{Untrusted data is used to specify the buffer size}} + int *buf1 = (int*)malloc(ts*sizeof(int)); // warn here, ts is unbounded and tainted + char *dst = (char*)calloc(ts, sizeof(char)); // warn here, ts is unbounded tainted + bcopy(buf1, dst, ts); // no warning here, since the size of buf1, dst equals ts. Cannot overflow. + __builtin_memcpy(dst, buf1, (ts + 4)*sizeof(char)); // warn here, dst overflows (whatever the value of ts) // If both buffers are trusted, do not issue a warning. - char *dst2 = (char*)malloc(ts*sizeof(char)); // expected-warning {{Untrusted data is used to specify the buffer size}} + char *dst2 = (char*)malloc(ts*sizeof(char)); // warn here, ts in unbounded strncat(dst2, dst, ts); // no-warning } @@ -353,7 +359,7 @@ void testStruct(void) { sock = socket(AF_INET, SOCK_STREAM, 0); read(sock, &tainted, sizeof(tainted)); - __builtin_memcpy(buffer, tainted.buf, tainted.length); // expected-warning {{Untrusted data is used to specify the buffer size}} + clang_analyzer_isTainted_int(tainted.length); // expected-warning {{YES }} } void testStructArray(void) { @@ -368,17 +374,17 @@ void testStructArray(void) { __builtin_memset(srcbuf, 0, sizeof(srcbuf)); read(sock, &tainted[0], sizeof(tainted)); - __builtin_memcpy(dstbuf, srcbuf, tainted[0].length); // expected-warning {{Untrusted data is used to specify the buffer size}} + clang_analyzer_isTainted_int(tainted[0].length); // expected-warning {{YES}} __builtin_memset(&tainted, 0, sizeof(tainted)); read(sock, &tainted, sizeof(tainted)); - __builtin_memcpy(dstbuf, srcbuf, tainted[0].length); // expected-warning {{Untrusted data is used to specify the buffer size}} + clang_analyzer_isTainted_int(tainted[0].length); // expected-warning {{YES}} __builtin_memset(&tainted, 0, sizeof(tainted)); // If we taint element 1, we should not raise an alert on taint for element 0 or element 2 read(sock, &tainted[1], sizeof(tainted)); - __builtin_memcpy(dstbuf, srcbuf, tainted[0].length); // no-warning - __builtin_memcpy(dstbuf, srcbuf, tainted[2].length); // no-warning + clang_analyzer_isTainted_int(tainted[0].length); // expected-warning {{NO}} + clang_analyzer_isTainted_int(tainted[2].length); // expected-warning {{NO}} } void testUnion(void) {