/* * goostring-format-checker.cc * * This file is licensed under the GPLv2 or later * * Clang++ compiler plugin that checks usage of GooString::format-like functions * * Copyright (C) 2014 Fabio D'Urso */ #include #include #include #include #include #include #include using namespace clang; namespace { class GooStringFormatCheckerVisitor : public RecursiveASTVisitor { public: explicit GooStringFormatCheckerVisitor(CompilerInstance *compInst); bool VisitFunctionDecl(FunctionDecl *funcDecl); bool VisitCallExpr(CallExpr *callExpr); private: /* Returns the index of the format argument, or -1 if the function must * not be checked */ int findFormatArgumentIndex(const FunctionDecl *funcDecl) const; /* Returns the SourceLocation of the n-th character */ SourceLocation getLocationOfCharacter(const StringLiteral *strLiteral, unsigned n); /* Validates usage of a placeholder and returns the corresponding * argument index, or -1 in case of errors */ int verifyPlaceholder(const CallExpr *callExpr, const SourceLocation &placeholderLocation, std::string &placeholderText, int baseArgIdx) const; CompilerInstance *compInst; DiagnosticsEngine *diag; unsigned diag_badFuncZeroArgs; unsigned diag_badFuncNonVariadic; unsigned diag_badFuncLastArgInvalidType; unsigned diag_notStringLiteral; unsigned diag_notPlainASCII; unsigned diag_wrongOrder; unsigned diag_unescapedBracket; unsigned diag_unterminatedPlaceholder; unsigned diag_unconsumedArgs; unsigned diag_missingColon; unsigned diag_missingArgNumber; unsigned diag_badArgNumber; unsigned diag_argumentNotPresent; unsigned diag_badPrecision; unsigned diag_badType; unsigned diag_wrongArgExprType; }; GooStringFormatCheckerVisitor::GooStringFormatCheckerVisitor(CompilerInstance *compInst) : compInst(compInst) { diag = &compInst->getDiagnostics(); diag_badFuncZeroArgs = diag->getCustomDiagID(DiagnosticsEngine::Error, "Cannot enforce format string checks on a function that takes no arguments"); diag_badFuncNonVariadic = diag->getCustomDiagID(DiagnosticsEngine::Error, "Cannot enforce format string checks on a non-variadic function"); diag_badFuncLastArgInvalidType = diag->getCustomDiagID(DiagnosticsEngine::Error, "Cannot enforce format string checks if the last non-variadic argument is not const char *"); diag_notStringLiteral = diag->getCustomDiagID(DiagnosticsEngine::Warning, "Format string is not a string literal. Skipping format checks"); diag_notPlainASCII = diag->getCustomDiagID(DiagnosticsEngine::Warning, "Format string contains non-ASCII or NUL characters. Skipping format checks"); diag_wrongOrder = diag->getCustomDiagID(DiagnosticsEngine::Error, "Argument %0 must be consumed before argument %1"); diag_unescapedBracket = diag->getCustomDiagID(DiagnosticsEngine::Error, "Unescaped '}' character"); diag_unterminatedPlaceholder = diag->getCustomDiagID(DiagnosticsEngine::Error, "Unterminated placeholder"); diag_unconsumedArgs = diag->getCustomDiagID(DiagnosticsEngine::Warning, "Unconsumed argument(s)"); diag_missingColon = diag->getCustomDiagID(DiagnosticsEngine::Error, "Invalid placeholder '{%0}': missing colon character"); diag_missingArgNumber = diag->getCustomDiagID(DiagnosticsEngine::Error, "Invalid placeholder '{%0}': missing number"); diag_badArgNumber = diag->getCustomDiagID(DiagnosticsEngine::Error, "Invalid placeholder '{%0}': bad number"); diag_argumentNotPresent = diag->getCustomDiagID(DiagnosticsEngine::Error, "Argument for placeholder '{%0}' is not present"); diag_badPrecision = diag->getCustomDiagID(DiagnosticsEngine::Error, "Invalid placeholder '{%0}': bad value"); diag_badType = diag->getCustomDiagID(DiagnosticsEngine::Error, "Invalid placeholder '{%0}': bad specifier"); diag_wrongArgExprType = diag->getCustomDiagID(DiagnosticsEngine::Error, "Expected %0 for placeholder '{%1}', found %2"); } bool GooStringFormatCheckerVisitor::VisitFunctionDecl(FunctionDecl *funcDecl) { findFormatArgumentIndex(funcDecl); // Spot misuse of the "gooformat" annotation return true; } bool GooStringFormatCheckerVisitor::VisitCallExpr(CallExpr *callExpr) { /*** Locate format argument or skip calls that needn't be checked ***/ const int formatArgIdx = findFormatArgumentIndex(callExpr->getDirectCallee()); if (formatArgIdx == -1) return true; /*** Obtain format string value ***/ const Expr *formatArgExpr = callExpr->getArg(formatArgIdx); while (formatArgExpr->getStmtClass() == Stmt::ImplicitCastExprClass) { formatArgExpr = static_cast(formatArgExpr)->getSubExpr(); } if (formatArgExpr->getStmtClass() != Stmt::StringLiteralClass) { diag->Report(formatArgExpr->getExprLoc(), diag_notStringLiteral); return true; } const StringLiteral *formatArgStrLiteral = static_cast(formatArgExpr); if (formatArgStrLiteral->containsNonAsciiOrNull()) { diag->Report(formatArgExpr->getExprLoc(), diag_notPlainASCII); return true; } /*** Parse format string and verify arguments ***/ const std::string format = formatArgStrLiteral->getString().str(); /* Keeps track of whether we are currently parsing a character contained * within '{' ... '}'. If set, current_placeholder contains the contents * parsed so far (without brackets) */ bool in_placeholder = false; std::string current_placeholder; // Source location of the current placeholder's opening bracket SourceLocation placeholderLoc; /* Keeps track of the next expected argument number, to check that * arguments are first consumed in order (eg {0:d}{2:d}{1:d} is wrong). * Note that it's possible to "look back" at already consumed * arguments (eg {0:d}{1:d}{0:d} is OK) */ int nextExpectedArgNum = 0; for (unsigned i = 0; i < format.length(); i++) { if (in_placeholder) { // Have we reached the end of the placeholder? if (format[i] == '}') { in_placeholder = false; // Verifies the placeholder and returns the argument number const int foundArgNum = verifyPlaceholder(callExpr, placeholderLoc, current_placeholder, formatArgIdx + 1); // If the placeholder wasn't valid, disable argument order checks if (foundArgNum == -1) { nextExpectedArgNum = -1; } // If argument order checks are enabled, let's check! if (nextExpectedArgNum != -1) { if (foundArgNum == nextExpectedArgNum) { nextExpectedArgNum++; } else if (foundArgNum > nextExpectedArgNum) { diag->Report(placeholderLoc, diag_wrongOrder) << nextExpectedArgNum << foundArgNum; nextExpectedArgNum = -1; // disable further checks } } } else { current_placeholder += format[i]; } } else if (format[i] == '{') { // If we find a '{' then a placeholder is starting... in_placeholder = true; current_placeholder = ""; placeholderLoc = getLocationOfCharacter(formatArgStrLiteral, i); // ...unless it's followed by another '{' (escape sequence) if (i + 1 < format.length() && format[i + 1] == '{') { i++; // skip next '{' character in_placeholder = false; } } else if (format[i] == '}') { /* If we have found a '}' and we're not in a placeholder, * then it *MUST* be followed by another '}' (escape sequence) */ if (i + 1 >= format.length() || format[i + 1] != '}') { diag->Report(getLocationOfCharacter(formatArgStrLiteral, i), diag_unescapedBracket); } else { i++; // skip next '}' character } } } /* If we've reached the end of the format string and in_placeholder is * still set, then the last placeholder wasn't terminated properly */ if (in_placeholder) diag->Report(placeholderLoc, diag_unterminatedPlaceholder); int unconsumedArgs = callExpr->getNumArgs() - (formatArgIdx + 1 + nextExpectedArgNum); if (unconsumedArgs > 0) diag->Report(callExpr->getArg(callExpr->getNumArgs() - unconsumedArgs)->getExprLoc(), diag_unconsumedArgs); return true; } int GooStringFormatCheckerVisitor::findFormatArgumentIndex(const FunctionDecl *funcDecl) const { if (!funcDecl) return -1; AnnotateAttr *annotation = NULL; for (specific_attr_iterator it = funcDecl->specific_attr_begin(); it != funcDecl->specific_attr_end() && !annotation; ++it) { if (it->getAnnotation() == "gooformat") annotation = *it; } // If this function hasn't got the "gooformat" annotation on it if (!annotation) return -1; if (funcDecl->getNumParams() == 0) { diag->Report(annotation->getLocation(), diag_badFuncZeroArgs); return -1; } if (!funcDecl->isVariadic()) { diag->Report(annotation->getLocation(), diag_badFuncNonVariadic); return -1; } // Assume the last non-variadic argument is the format specifier const int formatArgIdx = funcDecl->getNumParams() - 1; const QualType formatArgType = funcDecl->getParamDecl(formatArgIdx)->getType(); if (formatArgType.getAsString() != "const char *") { diag->Report(annotation->getLocation(), diag_badFuncLastArgInvalidType); return -1; } return formatArgIdx; } SourceLocation GooStringFormatCheckerVisitor::getLocationOfCharacter(const StringLiteral *strLiteral, unsigned n) { return strLiteral->getLocationOfByte(n, compInst->getSourceManager(), compInst->getLangOpts(), compInst->getTarget()); } int GooStringFormatCheckerVisitor::verifyPlaceholder(const CallExpr *callExpr, const SourceLocation &placeholderLocation, std::string &placeholderText, int baseArgIdx) const { // Find the colon that separates the argument number and the format specifier const size_t delim = placeholderText.find(':'); if (delim == std::string::npos) { diag->Report(placeholderLocation, diag_missingColon) << placeholderText; return -1; } if (delim == 0) { diag->Report(placeholderLocation, diag_missingArgNumber) << placeholderText; return -1; } for (unsigned int i = 0; i < delim; i++) { if (!isdigit(placeholderText[i])) { diag->Report(placeholderLocation, diag_badArgNumber) << placeholderText; return -1; } } // Extract argument number and its actual position in the call's argument list const int argNum = atoi(placeholderText.substr(0, delim).c_str()); const int argIdx = baseArgIdx + argNum; if (argIdx >= callExpr->getNumArgs()) { diag->Report(placeholderLocation, diag_argumentNotPresent) << placeholderText; return argNum; } // Check and strip width/precision specifiers std::string format = placeholderText.substr(delim + 1); bool dot_found = false; while (isdigit(format[0]) || format[0] == '.') { if (format[0] == '.') { if (dot_found) { diag->Report(placeholderLocation, diag_badPrecision) << placeholderText; return argNum; } dot_found = true; } format = format.substr(1); } const Expr *argExpr = callExpr->getArg(argIdx); const QualType qualType = argExpr->getType(); const Type *valueType = qualType->getUnqualifiedDesugaredType(); if (format == "d" || format == "x" || format == "X" || format == "o" || format == "b" || format == "w") { if (!valueType->isSpecificBuiltinType(BuiltinType::Int)) { diag->Report(argExpr->getExprLoc(), diag_wrongArgExprType) << "int" << placeholderText << qualType.getAsString(); } } else if (format == "ud" || format == "ux" || format == "uX" || format == "uo" || format == "ub") { if (!valueType->isSpecificBuiltinType(BuiltinType::UInt)) { diag->Report(argExpr->getExprLoc(), diag_wrongArgExprType) << "unsigned int" << placeholderText << qualType.getAsString(); } } else if (format == "ld" || format == "lx" || format == "lX" || format == "lo" || format == "lb") { if (!valueType->isSpecificBuiltinType(BuiltinType::Long)) { diag->Report(argExpr->getExprLoc(), diag_wrongArgExprType) << "long" << placeholderText << qualType.getAsString(); } } else if (format == "uld" || format == "ulx" || format == "ulX" || format == "ulo" || format == "ulb") { if (!valueType->isSpecificBuiltinType(BuiltinType::ULong)) { diag->Report(argExpr->getExprLoc(), diag_wrongArgExprType) << "unsigned long" << placeholderText << qualType.getAsString(); } } else if (format == "lld" || format == "llx" || format == "llX" || format == "llo" || format == "llb") { if (!valueType->isSpecificBuiltinType(BuiltinType::LongLong)) { diag->Report(argExpr->getExprLoc(), diag_wrongArgExprType) << "long long" << placeholderText << qualType.getAsString(); } } else if (format == "ulld" || format == "ullx" || format == "ullX" || format == "ullo" || format == "ullb") { if (!valueType->isSpecificBuiltinType(BuiltinType::ULongLong)) { diag->Report(argExpr->getExprLoc(), diag_wrongArgExprType) << "unsigned long long" << placeholderText << qualType.getAsString(); } } else if (format == "f" || format == "g" || format == "gs") { if (!valueType->isSpecificBuiltinType(BuiltinType::Double)) { diag->Report(argExpr->getExprLoc(), diag_wrongArgExprType) << "float or double" << placeholderText << qualType.getAsString(); } } else if (format == "c") { if (!valueType->isSpecificBuiltinType(BuiltinType::UInt) && !valueType->isSpecificBuiltinType(BuiltinType::Int)) { diag->Report(argExpr->getExprLoc(), diag_wrongArgExprType) << "char, short or int" << placeholderText << qualType.getAsString(); } } else if (format == "s") { if (!valueType->isPointerType() || !valueType->getPointeeType()->getUnqualifiedDesugaredType()->isCharType()) { diag->Report(argExpr->getExprLoc(), diag_wrongArgExprType) << "char *" << placeholderText << qualType.getAsString(); } } else if (format == "t") { const CXXRecordDecl *pointeeType = valueType->isPointerType() ? valueType->getPointeeType()->getAsCXXRecordDecl() : 0; if (pointeeType == 0 || pointeeType->getQualifiedNameAsString() != "GooString") { diag->Report(argExpr->getExprLoc(), diag_wrongArgExprType) << "GooString *" << placeholderText << qualType.getAsString(); } } else { diag->Report(placeholderLocation, diag_badType) << placeholderText; return argNum; } return argNum; } class GooStringFormatCheckerConsumer : public clang::ASTConsumer { public: GooStringFormatCheckerConsumer(CompilerInstance *compInst) : visitor(compInst) { } virtual void HandleTranslationUnit(clang::ASTContext &ctx) { visitor.TraverseDecl(ctx.getTranslationUnitDecl()); } private: GooStringFormatCheckerVisitor visitor; }; class GooStringFormatCheckerAction : public PluginASTAction { protected: ASTConsumer *CreateASTConsumer(CompilerInstance &compInst, llvm::StringRef inFile) { return new GooStringFormatCheckerConsumer(&compInst); } bool ParseArgs(const CompilerInstance &compInst, const std::vector &args) { if (args.size() != 0) { DiagnosticsEngine &D = compInst.getDiagnostics(); D.Report(D.getCustomDiagID(DiagnosticsEngine::Error, "goostring-format-checker takes no arguments")); return false; } else { return true; } } }; } static FrontendPluginRegistry::Add X("goostring-format-checker", "Checks usage of GooString::format-like functions");