Skip to content

Commit

Permalink
DPL: read metadata from parent files
Browse files Browse the repository at this point in the history
If the metadata is not found in the main file and if there is a
list of parent files, try those as well.
  • Loading branch information
ktf committed Nov 7, 2024
1 parent 2d015ad commit ed328b5
Show file tree
Hide file tree
Showing 5 changed files with 96 additions and 38 deletions.
20 changes: 11 additions & 9 deletions Framework/AnalysisSupport/src/AODJAlienReaderHelpers.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -120,10 +120,17 @@ static inline auto extractOriginalsTuple(framework::pack<Os...>, ProcessingConte

AlgorithmSpec AODJAlienReaderHelpers::rootFileReaderCallback(ConfigContext const& config)
{
auto callback = AlgorithmSpec{adaptStateful([](ConfigParamRegistry const& options,
DeviceSpec const& spec,
Monitoring& monitoring,
DataProcessingStats& stats) {
// aod-parent-base-path-replacement is now a workflow option, so it needs to be
// retrieved from the ConfigContext. This is because we do not allow workflow options
// to change over start-stop-start because they can affect the topology generation.
std::string parentFileReplacement;
if (config.options().isSet("aod-parent-base-path-replacement")) {
parentFileReplacement = config.options().get<std::string>("aod-parent-base-path-replacement");
}
auto callback = AlgorithmSpec{adaptStateful([parentFileReplacement](ConfigParamRegistry const& options,
DeviceSpec const& spec,
Monitoring& monitoring,
DataProcessingStats& stats) {
// FIXME: not actually needed, since data processing stats can specify that we should
// send the initial value.
stats.updateStats({static_cast<short>(ProcessingStatsId::ARROW_BYTES_CREATED), DataProcessingStats::Op::Set, 0});
Expand All @@ -141,11 +148,6 @@ AlgorithmSpec AODJAlienReaderHelpers::rootFileReaderCallback(ConfigContext const

auto maxRate = options.get<float>("aod-max-io-rate");

std::string parentFileReplacement;
if (options.isSet("aod-parent-base-path-replacement")) {
parentFileReplacement = options.get<std::string>("aod-parent-base-path-replacement");
}

int parentAccessLevel = 0;
if (options.isSet("aod-parent-access-level")) {
parentAccessLevel = options.get<int>("aod-parent-access-level");
Expand Down
102 changes: 76 additions & 26 deletions Framework/AnalysisSupport/src/Plugin.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <TObjString.h>
#include <TString.h>
#include <fmt/format.h>
#include <memory>

O2_DECLARE_DYNAMIC_LOG(analysis_support);

Expand Down Expand Up @@ -65,7 +66,7 @@ struct RunSummary : o2::framework::ServicePlugin {
}
};

std::vector<std::string> getListOfTables(TFile* f)
std::vector<std::string> getListOfTables(std::unique_ptr<TFile>& f)
{
std::vector<std::string> r;
TList* keyList = f->GetListOfKeys();
Expand All @@ -83,6 +84,32 @@ std::vector<std::string> getListOfTables(TFile* f)
}
return r;
}
auto readMetadata(std::unique_ptr<TFile>& currentFile) -> std::vector<ConfigParamSpec>
{
// Get the metadata, if any
auto m = (TMap*)currentFile->Get("metaData");
if (!m) {
return {};
}
std::vector<ConfigParamSpec> results;
auto it = m->MakeIterator();

// Serialise metadata into a ; separated string with : separating key and value
bool first = true;
while (auto obj = it->Next()) {
if (first) {
LOGP(info, "Metadata for file \"{}\":", currentFile->GetName());
first = false;
}
auto objString = (TObjString*)m->GetValue(obj);
LOGP(info, "- {}: {}", obj->GetName(), objString->String().Data());
std::string key = "aod-metadata-" + std::string(obj->GetName());
char const* value = strdup(objString->String());
results.push_back(ConfigParamSpec{key, VariantType::String, value, {"Metadata in AOD"}});
}

return results;
}

struct DiscoverMetadataInAOD : o2::framework::ConfigDiscoveryPlugin {
ConfigDiscovery* create() override
Expand All @@ -94,8 +121,6 @@ struct DiscoverMetadataInAOD : o2::framework::ConfigDiscoveryPlugin {
if (filename.empty()) {
return {};
}
std::vector<ConfigParamSpec> results;
TFile* currentFile = nullptr;
if (filename.at(0) == '@') {
filename.erase(0, 1);
// read the text file and set filename to the contents of the first line
Expand All @@ -110,39 +135,64 @@ struct DiscoverMetadataInAOD : o2::framework::ConfigDiscoveryPlugin {
TGrid::Connect("alien://");
}
LOGP(info, "Loading metadata from file {} in PID {}", filename, getpid());
currentFile = TFile::Open(filename.c_str());
if (!currentFile) {
std::unique_ptr<TFile> currentFile{TFile::Open(filename.c_str())};
if (currentFile.get() == nullptr) {
LOGP(fatal, "Couldn't open file \"{}\"!", filename);
}
std::vector<ConfigParamSpec> results = readMetadata(currentFile);
// Found metadata already in the main file.
if (!results.empty()) {
auto tables = getListOfTables(currentFile);
if (tables.empty() == false) {
results.push_back(ConfigParamSpec{"aod-metadata-tables", VariantType::ArrayString, tables, {"Tables in first AOD"}});
}
results.push_back(ConfigParamSpec{"aod-metadata-source", VariantType::String, filename, {"File from which the metadata was extracted."}});
return results;
}

// Get the metadata, if any
auto m = (TMap*)currentFile->Get("metaData");
if (!m) {
// Lets try in parent files
auto parentFiles = (TMap*)currentFile->Get("parentFiles");
if (!parentFiles) {
LOGP(info, "No metadata found in file \"{}\"", filename);
results.push_back(ConfigParamSpec{"aod-metadata-disable", VariantType::String, "1", {"Metadata not found in AOD"}});
return results;
}
auto it = m->MakeIterator();

// Serialise metadata into a ; separated string with : separating key and value
bool first = true;
while (auto obj = it->Next()) {
if (first) {
LOGP(info, "Metadata for file \"{}\":", filename);
first = false;
for (auto* p : *parentFiles) {
std::string parentFilename = ((TPair*)p)->Value()->GetName();
// Do the replacement. Notice this will require changing aod-parent-base-path-replacement to be
// a workflow option (because the metadata itself is potentially changing the topology).
if (registry.isSet("aod-parent-base-path-replacement")) {
auto parentFileReplacement = registry.get<std::string>("aod-parent-base-path-replacement");
auto pos = parentFileReplacement.find(';');
if (pos == std::string::npos) {
throw std::runtime_error(fmt::format("Invalid syntax in aod-parent-base-path-replacement: \"{}\"", parentFileReplacement.c_str()));
}
auto from = parentFileReplacement.substr(0, pos);
auto to = parentFileReplacement.substr(pos + 1);
pos = parentFilename.find(from);
if (pos != std::string::npos) {
parentFilename.replace(pos, from.length(), to);
}
}
auto objString = (TObjString*)m->GetValue(obj);
LOGP(info, "- {}: {}", obj->GetName(), objString->String().Data());
std::string key = "aod-metadata-" + std::string(obj->GetName());
char const* value = strdup(objString->String());
results.push_back(ConfigParamSpec{key, VariantType::String, value, {"Metadata in AOD"}});
}

auto tables = getListOfTables(currentFile);
if (tables.empty() == false) {
results.push_back(ConfigParamSpec{"aod-metadata-tables", VariantType::ArrayString, tables, {"Tables in first AOD"}});
std::unique_ptr<TFile> parentFile{TFile::Open(parentFilename.c_str())};
if (parentFile.get() == nullptr) {
LOGP(fatal, "Couldn't open derived file \"{}\"!", parentFilename);
}
results = readMetadata(parentFile);
// Found metadata already in the main file.
if (!results.empty()) {
auto tables = getListOfTables(parentFile);
if (tables.empty() == false) {
results.push_back(ConfigParamSpec{"aod-metadata-tables", VariantType::ArrayString, tables, {"Tables in first AOD"}});
}
results.push_back(ConfigParamSpec{"aod-metadata-source", VariantType::String, filename, {"File from which the metadata was extracted."}});
return results;
}
LOGP(info, "No metadata found in file \"{}\" nor in its parent file \"{}\"", filename, parentFilename);
break;
}
currentFile->Close();
results.push_back(ConfigParamSpec{"aod-metadata-disable", VariantType::String, "1", {"Metadata not found in AOD"}});
return results;
}};
}
Expand Down
2 changes: 1 addition & 1 deletion Framework/Core/src/ConfigParamDiscovery.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ namespace o2::framework
std::vector<ConfigParamSpec> ConfigParamDiscovery::discover(ConfigParamRegistry& registry, int argc, char** argv)
{
std::vector<char const*> capabilitiesSpecs = {
"O2Framework:DiscoverAODOptionsInCommandLineCapability",
"O2Framework:DiscoverMetadataInAODCapability",
"O2Framework:DiscoverMetadataInCommandLineCapability",
"O2Framework:DiscoverAODOptionsInCommandLineCapability",
};

// Load all the requested plugins and discover what we can do.
Expand Down
9 changes: 8 additions & 1 deletion Framework/Core/src/Plugin.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ auto lookForCommandLineAODOptions = [](ConfigParamRegistry& registry, int argc,
O2_SIGNPOST_EVENT_EMIT(capabilities, sid, "DiscoverAODOptionsInCommandLineCapability", "AOD options found in arguments. Populating from them.");
return true;
}
if (arg.starts_with("--aod-parent-base-path-replacement")) {
O2_SIGNPOST_EVENT_EMIT(capabilities, sid, "DiscoverAODOptionsInCommandLineCapability", "AOD options found in arguments. Populating from them.");
return true;
}
}
return false;
};
Expand Down Expand Up @@ -137,7 +141,7 @@ struct DiscoverAODOptionsInCommandLine : o2::framework::ConfigDiscoveryPlugin {
bool injectOption = true;
for (size_t i = 0; i < argc; i++) {
std::string_view arg = argv[i];
if (!arg.starts_with("--aod-writer-")) {
if (!arg.starts_with("--aod-writer-") && arg != "--aod-parent-base-path-replacement") {
continue;
}
std::string key = arg.data() + 2;
Expand All @@ -149,6 +153,9 @@ struct DiscoverAODOptionsInCommandLine : o2::framework::ConfigDiscoveryPlugin {
results.push_back(ConfigParamSpec{"aod-writer-compression", VariantType::Int, numericValue, {"AOD Compression options"}});
injectOption = false;
}
if (key == "aod-parent-base-path-replacement") {
results.push_back(ConfigParamSpec{"aod-parent-base-path-replacement", VariantType::String, value, {R"(Replace base path of parent files. Syntax: FROM;TO. E.g. "alien:///path/in/alien;/local/path". Enclose in "" on the command line.)"}});
}
}
if (injectOption) {
results.push_back(ConfigParamSpec{"aod-writer-compression", VariantType::Int, 505, {"AOD Compression options"}});
Expand Down
1 change: 0 additions & 1 deletion Framework/Core/src/WorkflowHelpers.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,6 @@ void WorkflowHelpers::injectServiceDevices(WorkflowSpec& workflow, ConfigContext
ConfigParamSpec{"aod-max-io-rate", VariantType::Float, 0.f, {"Maximum I/O rate in MB/s"}},
ConfigParamSpec{"aod-reader-json", VariantType::String, {"json configuration file"}},
ConfigParamSpec{"aod-parent-access-level", VariantType::String, {"Allow parent file access up to specified level. Default: no (0)"}},
ConfigParamSpec{"aod-parent-base-path-replacement", VariantType::String, {R"(Replace base path of parent files. Syntax: FROM;TO. E.g. "alien:///path/in/alien;/local/path". Enclose in "" on the command line.)"}},
ConfigParamSpec{"time-limit", VariantType::Int64, 0ll, {"Maximum run time limit in seconds"}},
ConfigParamSpec{"orbit-offset-enumeration", VariantType::Int64, 0ll, {"initial value for the orbit"}},
ConfigParamSpec{"orbit-multiplier-enumeration", VariantType::Int64, 0ll, {"multiplier to get the orbit from the counter"}},
Expand Down

0 comments on commit ed328b5

Please sign in to comment.