/***************************************************************************** Copyright (c) 2020, 2024, Oracle and/or its affiliates. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License, version 2.0, as published by the Free Software Foundation. This program is designed to work with certain software (including but not limited to OpenSSL) that is licensed under separate terms, as designated in a particular file or component or in included license documentation. The authors of MySQL hereby grant you an additional permission to link the program and your derivative works with the separately licensed software that they have either included with the program or referenced in the documentation. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0, for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *****************************************************************************/ // This program creates a def file with all exports from specified libraries. // This work is based on a JS script that was doing this before this program was // created, and tries to closely copy the way the script was working. In essence // we create a input file for link /dump, call link on this input with stdout // redirected to big pipe that we read and process into set of unique symbols // that matches our criteria. After link completes, we write out the gathered // unique symbols. On Debug the link /dump generates ~280MB of output which is // transformed into 8MB of the result def file, so it's quite important to have // the processing fast. Currently the create_def consumes the data provided // by the link two times faster than it takes to generate them, so the total // execution time is still bound exactly by the link execution time. // The original script removed the VS_UNICODE_OUTPUT environment variable used // by the VC++ tools to communicate a need to use special pipes inside VS. This // was used only sometime around VS2005-2008 and was removed. #ifndef __clang__ // Disable runtime checks, iterator checks and debugs, and turn on // optimizations. This is done to make debug version as fast as release one. // Without this the create_def works even slower than original JS script. We // can't enable this on CMake level (at least no way was found) because adding // /O2 requires absence of /RTC1, which in turn can't be removed with CMake, and // can't be overridden to be disabled by adding any compile options. Thus we // have to disable /RTC1 and enable /O2 (or any level of optimization) using // pragmas. #ifdef _ITERATOR_DEBUG_LEVEL #undef _ITERATOR_DEBUG_LEVEL #endif #define _ITERATOR_DEBUG_LEVEL 0 #pragma runtime_checks("", off) #pragma optimize("g", on) #endif #include #include #include #include #include #include #include #include #include #include #include /** Prints an error message supplied and attaches GetLastError with formatted * message. */ void error(std::string message) { const DWORD last_error = GetLastError(); std::cerr << "Error during generating .def file: " << message << "\n" << "Last OS error code: " << last_error << ", msg: " << std::system_category().message((int)last_error) << std::endl; exit(1); } /** Gathers and prints out the unique symbols. */ class Unique_symbol_map { public: Unique_symbol_map() { std::cout << "EXPORTS\n"; } /** Processes a new symbol candidate in form of a single line of link /dump * output. */ void insert(const std::string &symbol_line); private: /** List of symbols seen and printed out so far. */ std::unordered_set m_symbols_seen; }; /** Buffers input and runs a specified callback on single complete lines found. */ template class Line_buffer { public: Line_buffer(TCallback &&line_handler) : m_line_handler(line_handler) {} /** Runs callback for last incomplete line if present. */ ~Line_buffer(); /** Adds a raw buffer bytes to our buffer and finds any new completed lines to * call callback on them. */ void insert(const char *buffer, size_t size); private: TCallback m_line_handler; /** Current incomplete line. */ std::string m_curr_buffer; }; template Line_buffer create_line_buffer(TCallback &&callback) { return Line_buffer(std::forward(callback)); } /** Runs a specified command line and calls a callback for all data that is * written by the child program to its standard output. The standard error is * redirected to parent standard error stream. There is no input stream * redirected. */ template class Process { public: Process(std::string cmd_line, TCallback &&input_handler, DWORD pipe_size); ~Process(); double get_cpu_time() const; private: /** Creates a big pipe that will receive and buffer data coming from the * child process. */ void create_pipe(DWORD pipe_size); /** Runs the actual child process */ void create_process(std::string cmd_line); /** Reads the child results until the pipe is not closed. Runs callback for * all received data. */ void read_output(TCallback &&input_handler); HANDLE m_stdout_read_pipe = INVALID_HANDLE_VALUE; HANDLE m_stdout_write_pipe = INVALID_HANDLE_VALUE; HANDLE m_process_handle = INVALID_HANDLE_VALUE; }; template Process create_process(std::string cmd_line, TCallback &&input_handler, DWORD pipe_size) { return Process(cmd_line, std::forward(input_handler), pipe_size); } template Process::Process(std::string cmd_line, TCallback &&input_handler, DWORD pipe_size) { create_pipe(pipe_size); create_process(cmd_line); read_output(std::forward(input_handler)); } template Process::~Process() { if (m_stdout_read_pipe != INVALID_HANDLE_VALUE) { CloseHandle(m_stdout_read_pipe); } if (m_stdout_write_pipe != INVALID_HANDLE_VALUE) { CloseHandle(m_stdout_write_pipe); } if (m_process_handle != INVALID_HANDLE_VALUE) { CloseHandle(m_process_handle); } } template void Process::create_pipe(DWORD pipe_size) { SECURITY_ATTRIBUTES sec_attributes; // The write side of the pipe needs to be inheritable. sec_attributes.nLength = sizeof(SECURITY_ATTRIBUTES); sec_attributes.bInheritHandle = TRUE; sec_attributes.lpSecurityDescriptor = nullptr; // We create buffer big enough to not make child process stall while we are // processing its older output. if (!CreatePipe(&m_stdout_read_pipe, &m_stdout_write_pipe, &sec_attributes, pipe_size)) { error("CreatePipe failed"); } // The read side does not have to be inheritable. if (!SetHandleInformation(m_stdout_read_pipe, HANDLE_FLAG_INHERIT, 0)) { error("SetHandleInformation failed on read pipe"); } } template void Process::create_process(std::string cmd_line) { PROCESS_INFORMATION proc_info; STARTUPINFO start_info; ZeroMemory(&proc_info, sizeof(PROCESS_INFORMATION)); ZeroMemory(&start_info, sizeof(STARTUPINFO)); start_info.cb = sizeof(STARTUPINFO); start_info.hStdError = GetStdHandle(STD_ERROR_HANDLE); start_info.hStdOutput = m_stdout_write_pipe; start_info.hStdInput = INVALID_HANDLE_VALUE; start_info.dwFlags |= STARTF_USESTDHANDLES; // We cast away the constness as CreateProcess expects the non-const argument. // However, only the UNICODE CreateProcessW variant actually modifies the // memory supplied. Since this program assumes it is compiled with ANSI // support only (no use of TCHAR) we can just cast the const away. if (!CreateProcess(nullptr, const_cast(cmd_line.c_str()), nullptr, nullptr, TRUE, 0, nullptr, nullptr, &start_info, &proc_info)) { error("CreateProcess failed"); } else { m_process_handle = proc_info.hProcess; CloseHandle(proc_info.hThread); CloseHandle(m_stdout_write_pipe); m_stdout_write_pipe = INVALID_HANDLE_VALUE; } } // A quite small buffer for reading incoming data. It doesn't have to be big as // the pipe itself does the buffering for more incoming data. constexpr DWORD buf_size = 16 * 1024; char raw_buffer[buf_size]; template void Process::read_output(TCallback &&input_handler) { DWORD bytes_read; DWORD bytes_available; for (;;) { // Check if there is any data to be read. if (!PeekNamedPipe(m_stdout_read_pipe, nullptr, 0, nullptr, &bytes_available, nullptr)) { // This error is reported when the pipe is closed. if (GetLastError() != 109) { error("PeekNamedPipe failed"); } return; } if (bytes_available) { // Read actual data from pipe, but no more than our small buffer. if (!ReadFile(m_stdout_read_pipe, raw_buffer, std::min(bytes_available, buf_size), &bytes_read, nullptr)) { error("ReadFile on child process output pipe failed"); } input_handler(raw_buffer, bytes_read); } else { Sleep(1); } } } double filetime_to_sec(const FILETIME &f) { return (f.dwLowDateTime + (((uint64_t)f.dwHighDateTime) << 32)) / 10.0 / 1000 / 1000; } double get_cpu_time(HANDLE process) { FILETIME kernel_time; FILETIME user_time; FILETIME creation_time; FILETIME exit_time; GetProcessTimes(process, &creation_time, &exit_time, &kernel_time, &user_time); return filetime_to_sec(kernel_time) + filetime_to_sec(user_time); } template double Process::get_cpu_time() const { return ::get_cpu_time(m_process_handle); } double our_cpu_time() { return get_cpu_time(GetCurrentProcess()); } template Line_buffer::~Line_buffer() { if (!m_curr_buffer.empty()) { m_line_handler(m_curr_buffer); } } template void Line_buffer::insert(const char *buffer, size_t size) { size_t last_index_added = 0; for (size_t i = 0; i < size; ++i) { if (buffer[i] == '\r' || buffer[i] == '\n') { m_curr_buffer.append(buffer + last_index_added, i - last_index_added); if (!m_curr_buffer.empty()) { m_line_handler(m_curr_buffer); m_curr_buffer.clear(); } last_index_added = i + 1; } } m_curr_buffer.append(buffer + last_index_added, size - last_index_added); } void Unique_symbol_map::insert(const std::string &symbol_line) { // Some magic list of symbols we don't want to be exported. static const char *compiler_symbols[] = { "__real@", // "__xmm@", // SSE instruction set constants "_CTA2?", // std::bad_alloc "_CTA3?", // std::length_error "_CTA4?", // std::ios_base::failure "_CTA5?", // std::ios_base::failure "_CTA6?", // boost::exception_detail::clone_impl> "_CTA7?", // boost::exception_detail::clone_impl> "_CTA8?AV?", // bad_rational "_TI2?", // std::bad_alloc "_TI3?", // std::length_error "_TI4?", // std::ios_base::failure "_TI5?", // std::ios_base::failure "_TI6?", // boost::exception_detail::clone_impl> "_TI7?", // boost::exception_detail::clone_impl> "_TI8?AV?", // bad_rational "_RTC_", // "??_C@_", // "??_R", // "??_7", // "?_G", // scalar deleting destructor "_VInfreq_?", // special label (exception handler?) for Intel compiler "?_E", // vector deleting destructor " columns; size_t current_pos = 0; for (;;) { auto pos = symbol_line.find(' ', current_pos); if (pos == std::string::npos) break; if (pos != 0) { columns.push_back(symbol_line.substr(current_pos, pos - current_pos)); } current_pos = pos + 1; } if (current_pos != symbol_line.size()) { columns.push_back(symbol_line.substr(current_pos)); } if (columns.size() < 3) { return; } // Magic copied from JS script: // If the third column of link /dump /symbols output contains SECTx, the // symbol is defined in that section of the object file. If UNDEF appears, it // is not defined in that object and must be resolved elsewhere. BSS symbols // (like uninitialized arrays) appear to have non-zero second column. if (columns[2].substr(0, 4) != "SECT") { if (columns[2] == "UNDEF" && atol(columns[1].c_str()) == 0) { return; } } // Extract undecorated symbol names between "|" and next whitespace after it. size_t index = 0; while (index < columns.size() && columns[index] != "|") { index++; } if (index + 1 >= columns.size()) { error("Unexpected symbol line format: " + symbol_line); } // Extract the actual symbol name we care about and check it's not on list of // compiler's symbols. auto &symbol = columns[index + 1]; for (auto &compiler_symbol : compiler_symbols) { if (symbol.find(compiler_symbol) != std::string::npos) { return; } } // Check if we have function or data. if (symbol_line.find("notype () ") == std::string::npos) { symbol.append(" DATA"); } // Check if this is a function inside the std namespace if (symbol_line.find(" __cdecl std::") != std::string::npos) { return; } // Check if this symbol was seen before. auto res = m_symbols_seen.emplace(symbol); if (res.second) { std::cout << symbol << "\n"; } } template double measure_execution_time(TCallback &&callback) { auto start_time = std::chrono::high_resolution_clock::now(); callback(); // Print info about the time used. auto end_time = std::chrono::high_resolution_clock::now(); return std::chrono::duration_cast(end_time - start_time) .count() / 1000.0; } class Resp_file { public: Resp_file(int arguments_count, const char **arguments) { std::ofstream rspFile(get_name().c_str()); rspFile << "/symbols \n"; for (int i = 0; i < arguments_count; ++i) { const std::string input(arguments[i]); if (input.size() > 4 && (input.substr(input.size() - 4) == ".lib" || input.substr(input.size() - 4) == ".obj")) { rspFile << "\"" << input << "\"\n"; } } } ~Resp_file() { // Cleanup. _unlink(get_name().c_str()); } std::string get_name() { return "dumpsymbols.rsp"; } }; int main(int argc, const char *argv[]) { double link_cpu_time; auto time_in_sec = measure_execution_time([argc, argv, &link_cpu_time] { // Prepare the input file for the link /dump. std::cerr << "Creating def file..." << std::endl; Resp_file resp_file(argc - 1, argv + 1); // This should speed-up printing the result a little. std::iostream::sync_with_stdio(false); // Call the actual link /dump and process the data. Unique_symbol_map symbol_map; auto buffer = create_line_buffer( [&symbol_map](std::string &line) { symbol_map.insert(line); }); auto process = create_process( "link /dump @" + resp_file.get_name(), [&buffer](const char *buf, size_t bytes_count) { buffer.insert(buf, bytes_count); }, // use bigger pipe buffer to let link /dump to buffer data in case we // have some lag. The data comes from linker in rates of 10s MB/s. 16 * 1024 * 1024); link_cpu_time = process.get_cpu_time(); }); std::cerr << std::setprecision(3) << "Creating def file finished in " << time_in_sec << "s. (We used " << our_cpu_time() << "s, link used " << link_cpu_time << "s CPU time)" << std::endl; return 0; }