diff --git a/.gitignore b/.gitignore index fa8a84e..c8dcd2e 100644 --- a/.gitignore +++ b/.gitignore @@ -38,3 +38,6 @@ __pycache__/ *.py[cod] *.tmp + +# Coretrace StackAnalyzer cache and temporary files +*.cache diff --git a/src/app/AnalyzerApp.cpp b/src/app/AnalyzerApp.cpp index a9621d2..c6ffac2 100644 --- a/src/app/AnalyzerApp.cpp +++ b/src/app/AnalyzerApp.cpp @@ -19,12 +19,19 @@ #include #include #include +#include #include #include #include #include #include #include + +#if defined(__APPLE__) || defined(__unix__) +#include +#define CTRACE_STACK_ANALYZER_HAS_PTHREAD 1 +#endif + #include #include #include @@ -87,6 +94,18 @@ static unsigned resolveConfiguredJobs(const AnalysisConfig& cfg) return hw == 0 ? 1u : hw; } +#if defined(CTRACE_STACK_ANALYZER_HAS_PTHREAD) +static std::size_t resolveParallelWorkerStackBytes() +{ + constexpr std::size_t kDefaultWorkerStackBytes = 8u * 1024u * 1024u; +#if defined(PTHREAD_STACK_MIN) + return std::max(kDefaultWorkerStackBytes, static_cast(PTHREAD_STACK_MIN)); +#else + return kDefaultWorkerStackBytes; +#endif +} +#endif + template struct AppResult { std::optional value; @@ -310,29 +329,76 @@ static void runParallelWork(std::size_t workItemCount, unsigned maxJobs, WorkFn& std::vector workerStates(workerCount); std::atomic_size_t nextIndex{0}; + + auto workerBody = [&](WorkerState* workerState) + { + WorkerState& state = *workerState; + while (true) + { + const std::size_t index = nextIndex.fetch_add(1, std::memory_order_relaxed); + if (index >= workItemCount) + break; + workFn(index); + ++state.processedCount; + } + }; + +#if defined(CTRACE_STACK_ANALYZER_HAS_PTHREAD) + struct PthreadWorkerContext + { + decltype(workerBody)* body = nullptr; + WorkerState* state = nullptr; + }; + + pthread_attr_t attr; + if (pthread_attr_init(&attr) != 0) + { + llvm::report_fatal_error("failed to initialize parallel worker thread attributes"); + } + const int stackErr = pthread_attr_setstacksize(&attr, resolveParallelWorkerStackBytes()); + if (stackErr != 0) + { + pthread_attr_destroy(&attr); + llvm::report_fatal_error("failed to configure parallel worker stack size"); + } + + std::vector contexts(workerCount); + std::vector workers(workerCount); + for (unsigned workerId = 0; workerId < workerCount; ++workerId) + { + contexts[workerId] = {&workerBody, &workerStates[workerId]}; + const int createErr = pthread_create( + &workers[workerId], &attr, + [](void* rawContext) -> void* + { + auto* context = static_cast(rawContext); + (*context->body)(context->state); + return nullptr; + }, + &contexts[workerId]); + if (createErr != 0) + { + pthread_attr_destroy(&attr); + llvm::report_fatal_error("failed to create parallel worker thread"); + } + } + pthread_attr_destroy(&attr); + + for (pthread_t worker : workers) + pthread_join(worker, nullptr); +#else std::vector workers; workers.reserve(workerCount); for (unsigned workerId = 0; workerId < workerCount; ++workerId) { WorkerState* const workerState = &workerStates[workerId]; - workers.emplace_back( - [&, workerState]() - { - WorkerState& state = *workerState; - while (true) - { - const std::size_t index = nextIndex.fetch_add(1, std::memory_order_relaxed); - if (index >= workItemCount) - break; - workFn(index); - ++state.processedCount; - } - }); + workers.emplace_back([&, workerState]() { workerBody(workerState); }); } for (auto& worker : workers) worker.join(); +#endif std::uint64_t processedTotal = 0; for (const WorkerState& state : workerStates)