diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index f149684214567d..a3de4e60a830e3 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -43,6 +43,10 @@ code bases. C/C++ Language Potentially Breaking Changes ------------------------------------------- +- Clang now supports raw string literals in ``-std=gnuXY`` mode as an extension in + C99 and later. This behaviour can also be overridden using ``-f[no-]raw-string-literals``. + Support of raw string literals in C++ is not affected. Fixes (#GH85703). + C++ Specific Potentially Breaking Changes ----------------------------------------- - Clang now diagnoses function/variable templates that shadow their own template parameters, e.g. ``template void T();``. diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index a62bdc21298eee..1c7f37eb9c415e 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -375,6 +375,9 @@ def err_drv_negative_columns : Error< "invalid value '%1' in '%0', value must be 'none' or a positive integer">; def err_drv_small_columns : Error< "invalid value '%1' in '%0', value must be '%2' or greater">; +def warn_drv_fraw_string_literals_in_cxx11 : Warning< + "ignoring '-f%select{no-|}0raw-string-literals', which is only valid for C and C++ standards before C++11">, + InGroup; def err_drv_invalid_malign_branch_EQ : Error< "invalid argument '%0' to -malign-branch=; each element must be one of: %1">; diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index 491759e2fcdbb9..d806b917432ada 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -465,6 +465,8 @@ LANGOPT(MatrixTypes, 1, 0, "Enable or disable the builtin matrix type") LANGOPT(CXXAssumptions, 1, 1, "Enable or disable codegen and compile-time checks for C++23's [[assume]] attribute") +LANGOPT(RawStringLiterals, 1, 1, "Enable or disable raw string literals") + ENUM_LANGOPT(StrictFlexArraysLevel, StrictFlexArraysLevelKind, 2, StrictFlexArraysLevelKind::Default, "Rely on strict definition of flexible arrays") diff --git a/clang/include/clang/Basic/LangStandard.h b/clang/include/clang/Basic/LangStandard.h index f79b4aafb0b262..56a0d2c95e2b19 100644 --- a/clang/include/clang/Basic/LangStandard.h +++ b/clang/include/clang/Basic/LangStandard.h @@ -134,6 +134,13 @@ struct LangStandard { /// hasDigraphs - Language supports digraphs. bool hasDigraphs() const { return Flags & Digraphs; } + /// hasRawStringLiterals - Language supports R"()" raw string literals. + bool hasRawStringLiterals() const { + // GCC supports raw string literals in C99 and later, but not in C++ + // before C++11. + return isCPlusPlus11() || (!isCPlusPlus() && isC99() && isGNUMode()); + } + /// isGNUMode - Language includes GNU extensions. bool isGNUMode() const { return Flags & GNUMode; } diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 58ca6f2bea9e44..c616bd15916e84 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4235,6 +4235,12 @@ def fenable_matrix : Flag<["-"], "fenable-matrix">, Group, HelpText<"Enable matrix data type and related builtin functions">, MarshallingInfoFlag>; +defm raw_string_literals : BoolFOption<"raw-string-literals", + LangOpts<"RawStringLiterals">, Default, + PosFlag, + NegFlag, + BothFlags<[], [ClangOption, CC1Option], " raw string literals">>; + def fzero_call_used_regs_EQ : Joined<["-"], "fzero-call-used-regs=">, Group, Visibility<[ClangOption, CC1Option]>, diff --git a/clang/lib/Basic/LangOptions.cpp b/clang/lib/Basic/LangOptions.cpp index 61072b7b81bffe..e5adc034f60c1f 100644 --- a/clang/lib/Basic/LangOptions.cpp +++ b/clang/lib/Basic/LangOptions.cpp @@ -125,6 +125,7 @@ void LangOptions::setLangDefaults(LangOptions &Opts, Language Lang, Opts.HexFloats = Std.hasHexFloats(); Opts.WChar = Std.isCPlusPlus(); Opts.Digraphs = Std.hasDigraphs(); + Opts.RawStringLiterals = Std.hasRawStringLiterals(); Opts.HLSL = Lang == Language::HLSL; if (Opts.HLSL && Opts.IncludeDefaultHeader) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index aa285c39f14b43..e89d9136b9920c 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -6516,6 +6516,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, Args.AddLastArg(CmdArgs, options::OPT_fheinous_gnu_extensions); Args.AddLastArg(CmdArgs, options::OPT_fdigraphs, options::OPT_fno_digraphs); Args.AddLastArg(CmdArgs, options::OPT_fzero_call_used_regs_EQ); + Args.AddLastArg(CmdArgs, options::OPT_fraw_string_literals, + options::OPT_fno_raw_string_literals); if (Args.hasFlag(options::OPT_femulated_tls, options::OPT_fno_emulated_tls, Triple.hasDefaultEmulatedTLS())) diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index f42e28ba7e6294..0082c15aac7e46 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -610,6 +610,19 @@ static bool FixupInvocation(CompilerInvocation &Invocation, LangOpts.NewAlignOverride = 0; } + // The -f[no-]raw-string-literals option is only valid in C and in C++ + // standards before C++11. + if (LangOpts.CPlusPlus11) { + if (Args.hasArg(OPT_fraw_string_literals, OPT_fno_raw_string_literals)) { + Args.claimAllArgs(OPT_fraw_string_literals, OPT_fno_raw_string_literals); + Diags.Report(diag::warn_drv_fraw_string_literals_in_cxx11) + << bool(LangOpts.RawStringLiterals); + } + + // Do not allow disabling raw string literals in C++11 or later. + LangOpts.RawStringLiterals = true; + } + // Prevent the user from specifying both -fsycl-is-device and -fsycl-is-host. if (LangOpts.SYCLIsDevice && LangOpts.SYCLIsHost) Diags.Report(diag::err_drv_argument_not_allowed_with) << "-fsycl-is-device" diff --git a/clang/lib/Lex/DependencyDirectivesScanner.cpp b/clang/lib/Lex/DependencyDirectivesScanner.cpp index 0971daa1f36663..57652be8244b43 100644 --- a/clang/lib/Lex/DependencyDirectivesScanner.cpp +++ b/clang/lib/Lex/DependencyDirectivesScanner.cpp @@ -73,8 +73,8 @@ struct Scanner { // Set the lexer to use 'tok::at' for '@', instead of 'tok::unknown'. LangOpts.ObjC = true; LangOpts.LineComment = true; - // FIXME: we do not enable C11 or C++11, so we are missing u/u8/U"" and - // R"()" literals. + LangOpts.RawStringLiterals = true; + // FIXME: we do not enable C11 or C++11, so we are missing u/u8/U"". return LangOpts; } diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index e59c7805b38623..ef1e1f4bd9aeb4 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -3876,7 +3876,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { tok::utf16_char_constant); // UTF-16 raw string literal - if (Char == 'R' && LangOpts.CPlusPlus11 && + if (Char == 'R' && LangOpts.RawStringLiterals && getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"') return LexRawStringLiteral(Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), @@ -3898,7 +3898,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { SizeTmp2, Result), tok::utf8_char_constant); - if (Char2 == 'R' && LangOpts.CPlusPlus11) { + if (Char2 == 'R' && LangOpts.RawStringLiterals) { unsigned SizeTmp3; char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3); // UTF-8 raw string literal @@ -3934,7 +3934,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { tok::utf32_char_constant); // UTF-32 raw string literal - if (Char == 'R' && LangOpts.CPlusPlus11 && + if (Char == 'R' && LangOpts.RawStringLiterals && getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"') return LexRawStringLiteral(Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), @@ -3949,7 +3949,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { // Notify MIOpt that we read a non-whitespace/non-comment token. MIOpt.ReadToken(); - if (LangOpts.CPlusPlus11) { + if (LangOpts.RawStringLiterals) { Char = getCharAndSize(CurPtr, SizeTmp); if (Char == '"') @@ -3972,7 +3972,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { tok::wide_string_literal); // Wide raw string literal. - if (LangOpts.CPlusPlus11 && Char == 'R' && + if (LangOpts.RawStringLiterals && Char == 'R' && getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"') return LexRawStringLiteral(Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), diff --git a/clang/test/Driver/fraw-string-literals-cxx.cpp b/clang/test/Driver/fraw-string-literals-cxx.cpp new file mode 100644 index 00000000000000..f831bf5344e63b --- /dev/null +++ b/clang/test/Driver/fraw-string-literals-cxx.cpp @@ -0,0 +1,17 @@ +// RUN: %clang -fraw-string-literals -fsyntax-only -std=c++03 %s 2>&1 | FileCheck --check-prefix=CHECK-PRE-CXX11 --allow-empty %s +// RUN: %clang -fraw-string-literals -fsyntax-only -std=gnu++03 %s 2>&1 | FileCheck --check-prefix=CHECK-PRE-CXX11 --allow-empty %s +// RUN: %clang -fno-raw-string-literals -fsyntax-only -std=c++03 %s 2>&1 | FileCheck --check-prefix=CHECK-PRE-CXX11 --allow-empty %s +// RUN: %clang -fno-raw-string-literals -fsyntax-only -std=gnu++03 %s 2>&1 | FileCheck --check-prefix=CHECK-PRE-CXX11 --allow-empty %s +// RUN: %clang -fraw-string-literals -fsyntax-only -std=c++11 %s 2>&1 | FileCheck --check-prefix=CHECK-POS %s +// RUN: %clang -fraw-string-literals -fsyntax-only -std=gnu++11 %s 2>&1 | FileCheck --check-prefix=CHECK-POS %s +// RUN: %clang -fno-raw-string-literals -fsyntax-only -std=c++11 %s 2>&1 | FileCheck --check-prefix=CHECK-NEG %s +// RUN: %clang -fno-raw-string-literals -fsyntax-only -std=gnu++11 %s 2>&1 | FileCheck --check-prefix=CHECK-NEG %s +// RUN: %clang -fraw-string-literals -fsyntax-only -std=c++20 %s 2>&1 | FileCheck --check-prefix=CHECK-POS %s +// RUN: %clang -fraw-string-literals -fsyntax-only -std=gnu++20 %s 2>&1 | FileCheck --check-prefix=CHECK-POS %s +// RUN: %clang -fno-raw-string-literals -fsyntax-only -std=c++20 %s 2>&1 | FileCheck --check-prefix=CHECK-NEG %s +// RUN: %clang -fno-raw-string-literals -fsyntax-only -std=gnu++20 %s 2>&1 | FileCheck --check-prefix=CHECK-NEG %s + +// CHECK-PRE-CXX11-NOT: ignoring '-fraw-string-literals' +// CHECK-PRE-CXX11-NOT: ignoring '-fno-raw-string-literals' +// CHECK-POS: ignoring '-fraw-string-literals', which is only valid for C and C++ standards before C++11 +// CHECK-NEG: ignoring '-fno-raw-string-literals', which is only valid for C and C++ standards before C++11 diff --git a/clang/test/Lexer/raw-string-ext.c b/clang/test/Lexer/raw-string-ext.c new file mode 100644 index 00000000000000..de318b616df709 --- /dev/null +++ b/clang/test/Lexer/raw-string-ext.c @@ -0,0 +1,44 @@ +// RUN: %clang_cc1 -fsyntax-only -std=gnu11 -verify=supported %s +// RUN: %clang_cc1 -fsyntax-only -std=c11 -DUNICODE -fraw-string-literals -verify=supported %s +// RUN: %clang_cc1 -fsyntax-only -std=gnu89 -verify=unsupported %s +// RUN: %clang_cc1 -fsyntax-only -std=c11 -DUNICODE -verify=unsupported %s +// RUN: %clang_cc1 -fsyntax-only -std=gnu11 -DUNICODE -fno-raw-string-literals -verify=unsupported %s + +// RUN: %clang_cc1 -x c++ -fsyntax-only -Wno-unused -std=c++03 -verify=unsupported,cxx-unsupported %s +// RUN: %clang_cc1 -x c++ -fsyntax-only -Wno-unused -std=gnu++03 -verify=unsupported,cxx-unsupported %s +// RUN: %clang_cc1 -x c++ -fsyntax-only -Wno-unused -std=c++03 -fraw-string-literals -verify=supported %s +// RUN: %clang_cc1 -x c++ -fsyntax-only -Wno-unused -std=gnu++03 -fraw-string-literals -verify=supported %s +// RUN: %clang_cc1 -x c++ -fsyntax-only -Wno-unused -std=c++11 -DUNICODE -verify=supported,cxx %s +// RUN: %clang_cc1 -x c++ -fsyntax-only -Wno-unused -std=gnu++11 -DUNICODE -verify=supported,cxx %s +// RUN: %clang_cc1 -x c++ -fsyntax-only -Wno-unused -std=c++11 -DUNICODE -fraw-string-literals -verify=supported,yes %s +// RUN: %clang_cc1 -x c++ -fsyntax-only -Wno-unused -std=gnu++11 -DUNICODE -fraw-string-literals -verify=supported,yes %s +// RUN: %clang_cc1 -x c++ -fsyntax-only -Wno-unused -std=c++11 -DUNICODE -fno-raw-string-literals -verify=supported,no %s +// RUN: %clang_cc1 -x c++ -fsyntax-only -Wno-unused -std=gnu++11 -DUNICODE -fno-raw-string-literals -verify=supported,no %s + +// GCC supports raw string literals in C99 and later in '-std=gnuXY' mode; we +// additionally provide '-f[no-]raw-string-literals' to enable/disable them +// explicitly in C. +// +// We do not allow disabling raw string literals in C++ mode if they’re enabled +// by the language standard, i.e. in C++11 or later. + +// Driver warnings. +// yes-warning@* {{ignoring '-fraw-string-literals'}} +// no-warning@* {{ignoring '-fno-raw-string-literals'}} + +void f() { + (void) R"foo()foo"; // unsupported-error {{use of undeclared identifier 'R'}} cxx-unsupported-error {{expected ';' after expression}} + (void) LR"foo()foo"; // unsupported-error {{use of undeclared identifier 'LR'}} cxx-unsupported-error {{expected ';' after expression}} + +#ifdef UNICODE + (void) uR"foo()foo"; // unsupported-error {{use of undeclared identifier 'uR'}} cxx-unsupported-error {{expected ';' after expression}} + (void) u8R"foo()foo"; // unsupported-error {{use of undeclared identifier 'u8R'}} cxx-unsupported-error {{expected ';' after expression}} + (void) UR"foo()foo"; // unsupported-error {{use of undeclared identifier 'UR'}} cxx-unsupported-error {{expected ';' after expression}} +#endif +} + +// supported-error@* {{missing terminating delimiter}} +// supported-error@* {{expected expression}} +// supported-error@* {{expected ';' after top level declarator}} +#define R "bar" +const char* s = R"foo("; diff --git a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp index 59fef9ecbb9c91..94af9688a96e24 100644 --- a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp +++ b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp @@ -583,10 +583,12 @@ TEST(MinimizeSourceToDependencyDirectivesTest, UnderscorePragma) { R"(_Pragma(u"clang module import"))", Out)); EXPECT_STREQ("\n", Out.data()); - // FIXME: R"()" strings depend on using C++11 language mode + // R"()" strings are enabled by default. ASSERT_FALSE(minimizeSourceToDependencyDirectives( R"(_Pragma(R"abc(clang module import)abc"))", Out)); - EXPECT_STREQ("\n", Out.data()); + EXPECT_STREQ(R"(_Pragma(R"abc(clang module import)abc"))" + "\n", + Out.data()); } TEST(MinimizeSourceToDependencyDirectivesTest, Include) {