From d8b247b3bb7d67177a7ad395c795e17a1580858d Mon Sep 17 00:00:00 2001 From: Daniel Magliola Date: Mon, 14 Oct 2019 12:33:12 +0100 Subject: [PATCH 1/2] WIP: Allow having all metrics in one file, #143 This was a quick experiment on having all metrics for each process on the same file, because it seemed like it would be easy. (just do these things in this commit) However, even though tests pass, this doesn't actually work. See next commit on why. If the change were just this, i'd say we should do this. However, as you'll see in the next commit, it's more involved than that, and i'm not sure it's worth doing, at least not with this approach... I'm just pushing this up so it doesn't get lost. --- .../client/data_stores/direct_file_store.rb | 27 +++++++-- spec/benchmarks/data_stores.rb | 6 ++ .../data_stores/direct_file_store_spec.rb | 56 +++++++++++++++++++ 3 files changed, 85 insertions(+), 4 deletions(-) diff --git a/lib/prometheus/client/data_stores/direct_file_store.rb b/lib/prometheus/client/data_stores/direct_file_store.rb index 9dbab76e..e683cedb 100644 --- a/lib/prometheus/client/data_stores/direct_file_store.rb +++ b/lib/prometheus/client/data_stores/direct_file_store.rb @@ -29,8 +29,9 @@ class InvalidStoreSettingsError < StandardError; end DEFAULT_METRIC_SETTINGS = { aggregation: SUM } DEFAULT_GAUGE_SETTINGS = { aggregation: ALL } - def initialize(dir:) - @store_settings = { dir: dir } + def initialize(dir:, separate_files_per_metric: true) + @store_settings = { dir: dir, + separate_files_per_metric: separate_files_per_metric } FileUtils.mkdir_p(dir) end @@ -125,6 +126,12 @@ def all_values [k.to_sym, vs.first] end.to_h + unless @store_settings[:separate_files_per_metric] + # All metrics are in the same file. Ignore entries for other metrics + next unless label_set[:__metric_name] == metric_name.to_s + label_set.delete(:__metric_name) + end + stores_data[label_set] << v end ensure @@ -149,6 +156,9 @@ def store_key(labels) if @values_aggregation_mode == ALL labels[:pid] = process_id end + unless @store_settings[:separate_files_per_metric] + labels[:__metric_name] = metric_name.to_s + end labels.map{|k,v| "#{CGI::escape(k.to_s)}=#{CGI::escape(v.to_s)}"}.join('&') end @@ -164,12 +174,21 @@ def internal_store # Filename for this metric's PStore (one per process) def filemap_filename - filename = "metric_#{ metric_name }___#{ process_id }.bin" + filename = if @store_settings[:separate_files_per_metric] + "metric_#{ metric_name }___#{ process_id }.bin" + else + "metric___all_metrics___#{ process_id }.bin" + end File.join(@store_settings[:dir], filename) end def stores_for_metric - Dir.glob(File.join(@store_settings[:dir], "metric_#{ metric_name }___*")) + base_filename = if @store_settings[:separate_files_per_metric] + "metric_#{ metric_name }___*" + else + "metric___all_metrics___*" + end + Dir.glob(File.join(@store_settings[:dir], base_filename)) end def process_id diff --git a/spec/benchmarks/data_stores.rb b/spec/benchmarks/data_stores.rb index 773604df..bcb00a3a 100644 --- a/spec/benchmarks/data_stores.rb +++ b/spec/benchmarks/data_stores.rb @@ -81,6 +81,12 @@ def all_values; {}; end { store: Prometheus::Client::DataStores::DirectFileStore.new(dir: TMP_DIR), before: -> () { cleanup_dir(TMP_DIR) }, + }, + { + store: Prometheus::Client::DataStores::DirectFileStore.new(dir: TMP_DIR, + separate_files_per_metric: false), + before: -> () { cleanup_dir(TMP_DIR) }, + name: "DirectFileStore Single File" } ] diff --git a/spec/prometheus/client/data_stores/direct_file_store_spec.rb b/spec/prometheus/client/data_stores/direct_file_store_spec.rb index ba9d7d15..aea45773 100644 --- a/spec/prometheus/client/data_stores/direct_file_store_spec.rb +++ b/spec/prometheus/client/data_stores/direct_file_store_spec.rb @@ -250,4 +250,60 @@ expect(truncate_calls_count).to be >= 3 end + + context "with all metrics in one single file" do + subject { described_class.new(dir: "/tmp/prometheus_test", separate_files_per_metric: false) } + + let(:metric_store1) { subject.for_metric(:metric_name, metric_type: :counter) } + let(:metric_store2) { subject.for_metric(:metric_name2, metric_type: :counter) } + + it "stores all metrics into one file" do + metric_store1.increment(labels: {a: "x", b: "x"}, by: 1) + metric_store1.increment(labels: {a: "x", b: "zzz"}, by: 2) + metric_store2.increment(labels: {a: "x", b: "x"}, by: 3) + + expect(Dir.glob('/tmp/prometheus_test/*').length).to eq(1) + end + + it "exports values correctly" do + metric_store1.increment(labels: {a: "x", b: "x"}, by: 1) + metric_store1.increment(labels: {a: "x", b: "zzz"}, by: 2) + metric_store2.increment(labels: {a: "x", b: "x"}, by: 3) + + expect(metric_store1.all_values).to eq( + {a: "x", b: "x"} => 1.0, + {a: "x", b: "zzz"} => 2.0, + ) + + expect(metric_store2.all_values).to eq( + {a: "x", b: "x"} => 3.0, + ) + end + + it "exports values correctly from multiple processes" do + metric_store1.increment(labels: {a: "x", b: "x"}, by: 1) + metric_store1.increment(labels: {a: "x", b: "zzz"}, by: 2) + metric_store2.increment(labels: {a: "x", b: "x"}, by: 3) + + # 2nd process + allow(Process).to receive(:pid).and_return(23456) + metric_store1.increment(labels: {a: "x", b: "x"}, by: 4) + metric_store1.increment(labels: {a: "x", b: "yyy"}, by: 5) + metric_store2.increment(labels: {a: "x", b: "x"}, by: 6) + + # Make sure we actually have 2 files to sup together + expect(Dir.glob('/tmp/prometheus_test/metric___all_metrics___*').length).to eq(2) + + expect(metric_store1.all_values).to eq( + {a: "x", b: "x"} => 5.0, + {a: "x", b: "zzz"} => 2.0, + {a: "x", b: "yyy"} => 5.0, + ) + + expect(metric_store2.all_values).to eq( + {a: "x", b: "x"} => 9.0, + ) + end + + end end From 149fc3ce62aa3971105186507d9a0b24a13aaa1f Mon Sep 17 00:00:00 2001 From: Daniel Magliola Date: Mon, 14 Oct 2019 12:38:18 +0100 Subject: [PATCH 2/2] Fix the "subtle" bug introduced in the previous commit So, again, I don't propose we actually do this, this is just illustrative of what we need to do, and didn't want it to get lost. The problem with the previous commit is that for each metric, we open a new FileMappedDict. All of them sharing the same file is "fine", actually. The problem comes from the fact that each instance of FileMappedDict has their own version of `@used`. When one metric adds a new entry to the file, it moves its own `@used` pointed, but the other instances don't notice this. As soon as a second instance that had already been loaded adds a new labelset, it corrupts the file. This only shows up when running the benchmarks, with histograms. Counters (in our benchmark) don't reproduce it, because each metric in our benchmark only has one labelset. They all get added initially to the file, and an already loded Dict doesn't add new ones. The reason Histograms reproduce it is that new observations may report a bucket that wasn't seen before, which is effectively a new labelset. The solution is for all MetricStore instances to share one single FileMappedDict. This is not particularly hard, but the current MetricStore is not written in a way that allows that without doing this crap. We should find a better way. I'm just leaving this here as documentation of this problem, for the next brave soul attempting this --- .../client/data_stores/direct_file_store.rb | 22 +++++++++++++++++++ .../data_stores/direct_file_store_spec.rb | 2 ++ 2 files changed, 24 insertions(+) diff --git a/lib/prometheus/client/data_stores/direct_file_store.rb b/lib/prometheus/client/data_stores/direct_file_store.rb index e683cedb..5d7bb240 100644 --- a/lib/prometheus/client/data_stores/direct_file_store.rb +++ b/lib/prometheus/client/data_stores/direct_file_store.rb @@ -67,6 +67,11 @@ def validate_metric_settings(metric_settings) class MetricStore attr_reader :metric_name, :store_settings + class << self + attr_accessor :shared_store_opened_by_pid + attr_accessor :shared_internal_store + end + def initialize(metric_name:, store_settings:, metric_settings:) @metric_name = metric_name @store_settings = store_settings @@ -164,6 +169,14 @@ def store_key(labels) end def internal_store + if @store_settings[:separate_files_per_metric] + individual_metric_internal_store + else + all_metrics_shared_internal_store + end + end + + def individual_metric_internal_store if @store_opened_by_pid != process_id @store_opened_by_pid = process_id @internal_store = FileMappedDict.new(filemap_filename) @@ -172,6 +185,15 @@ def internal_store end end + def all_metrics_shared_internal_store + if self.class.shared_store_opened_by_pid != process_id + self.class.shared_store_opened_by_pid = process_id + self.class.shared_internal_store = FileMappedDict.new(filemap_filename) + else + self.class.shared_internal_store + end + end + # Filename for this metric's PStore (one per process) def filemap_filename filename = if @store_settings[:separate_files_per_metric] diff --git a/spec/prometheus/client/data_stores/direct_file_store_spec.rb b/spec/prometheus/client/data_stores/direct_file_store_spec.rb index aea45773..35f29f0b 100644 --- a/spec/prometheus/client/data_stores/direct_file_store_spec.rb +++ b/spec/prometheus/client/data_stores/direct_file_store_spec.rb @@ -10,6 +10,8 @@ # Reset the PStores before do Dir.glob('/tmp/prometheus_test/*').each { |file| File.delete(file) } + # This doesn't actually work, btw, but it's what would have to be done. + Prometheus::Client::DataStores::DirectFileStore::MetricStore.shared_store_opened_by_pid = nil end it_behaves_like Prometheus::Client::DataStores