BigW Consortium Gitlab

Commit 5aea2e6e by Grzegorz Bizon

Merge branch…

Merge branch '31459-fix-transient-error-caused-by-small-timeout-also-adapt-tests-to-work-locally-correctly' into 'master' Fix transient CI errors by increasing command execution timeouts from 1s to 30s Closes #31459 See merge request !11420
parents 7911f1c0 a0497a7b
......@@ -2,6 +2,9 @@ module Gitlab
module HealthChecks
class FsShardsCheck
extend BaseAbstractCheck
RANDOM_STRING = SecureRandom.hex(1000).freeze
COMMAND_TIMEOUT = '1'.freeze
TIMEOUT_EXECUTABLE = 'timeout'.freeze
class << self
def readiness
......@@ -41,8 +44,6 @@ module Gitlab
private
RANDOM_STRING = SecureRandom.hex(1000).freeze
def operation_metrics(ok_metric, latency_metric, operation, **labels)
with_timing operation do |result, elapsed|
[
......@@ -63,8 +64,8 @@ module Gitlab
@storage_paths ||= Gitlab.config.repositories.storages
end
def with_timeout(args)
%w{timeout 1}.concat(args)
def exec_with_timeout(cmd_args, *args, &block)
Gitlab::Popen.popen([TIMEOUT_EXECUTABLE, COMMAND_TIMEOUT].concat(cmd_args), *args, &block)
end
def tmp_file_path(storage_name)
......@@ -78,7 +79,7 @@ module Gitlab
def storage_stat_test(storage_name)
stat_path = File.join(path(storage_name), '.')
begin
_, status = Gitlab::Popen.popen(with_timeout(%W{ stat #{stat_path} }))
_, status = exec_with_timeout(%W{ stat #{stat_path} })
status == 0
rescue Errno::ENOENT
File.exist?(stat_path) && File::Stat.new(stat_path).readable?
......@@ -86,7 +87,7 @@ module Gitlab
end
def storage_write_test(tmp_path)
_, status = Gitlab::Popen.popen(with_timeout(%W{ tee #{tmp_path} })) do |stdin|
_, status = exec_with_timeout(%W{ tee #{tmp_path} }) do |stdin|
stdin.write(RANDOM_STRING)
end
status == 0
......@@ -96,7 +97,7 @@ module Gitlab
end
def storage_read_test(tmp_path)
_, status = Gitlab::Popen.popen(with_timeout(%W{ diff #{tmp_path} - })) do |stdin|
_, status = exec_with_timeout(%W{ diff #{tmp_path} - }) do |stdin|
stdin.write(RANDOM_STRING)
end
status == 0
......@@ -106,7 +107,7 @@ module Gitlab
end
def delete_test_file(tmp_path)
_, status = Gitlab::Popen.popen(with_timeout(%W{ rm -f #{tmp_path} }))
_, status = exec_with_timeout(%W{ rm -f #{tmp_path} })
status == 0
rescue Errno::ENOENT
File.delete(tmp_path) rescue Errno::ENOENT
......
require 'spec_helper'
describe Gitlab::HealthChecks::FsShardsCheck do
def command_exists?(command)
_, status = Gitlab::Popen.popen(%W{ #{command} 1 echo })
status == 0
rescue Errno::ENOENT
false
end
def timeout_command
@timeout_command ||=
if command_exists?('timeout')
'timeout'
elsif command_exists?('gtimeout')
'gtimeout'
else
''
end
end
let(:metric_class) { Gitlab::HealthChecks::Metric }
let(:result_class) { Gitlab::HealthChecks::Result }
let(:repository_storages) { [:default] }
......@@ -15,6 +33,7 @@ describe Gitlab::HealthChecks::FsShardsCheck do
before do
allow(described_class).to receive(:repository_storages) { repository_storages }
allow(described_class).to receive(:storages_paths) { storages_paths }
stub_const('Gitlab::HealthChecks::FsShardsCheck::TIMEOUT_EXECUTABLE', timeout_command)
end
after do
......@@ -78,40 +97,76 @@ describe Gitlab::HealthChecks::FsShardsCheck do
}.with_indifferent_access
end
it { is_expected.to include(metric_class.new(:filesystem_accessible, 0, shard: :default)) }
it { is_expected.to include(metric_class.new(:filesystem_readable, 0, shard: :default)) }
it { is_expected.to include(metric_class.new(:filesystem_writable, 0, shard: :default)) }
it { is_expected.to all(have_attributes(labels: { shard: :default })) }
it { is_expected.to include(an_object_having_attributes(name: :filesystem_accessible, value: 0)) }
it { is_expected.to include(an_object_having_attributes(name: :filesystem_readable, value: 0)) }
it { is_expected.to include(an_object_having_attributes(name: :filesystem_writable, value: 0)) }
it { is_expected.to include(have_attributes(name: :filesystem_access_latency, value: be >= 0, labels: { shard: :default })) }
it { is_expected.to include(have_attributes(name: :filesystem_read_latency, value: be >= 0, labels: { shard: :default })) }
it { is_expected.to include(have_attributes(name: :filesystem_write_latency, value: be >= 0, labels: { shard: :default })) }
it { is_expected.to include(an_object_having_attributes(name: :filesystem_access_latency, value: be >= 0)) }
it { is_expected.to include(an_object_having_attributes(name: :filesystem_read_latency, value: be >= 0)) }
it { is_expected.to include(an_object_having_attributes(name: :filesystem_write_latency, value: be >= 0)) }
end
context 'storage points to directory that has both read and write rights' do
before do
FileUtils.chmod_R(0755, tmp_dir)
end
it { is_expected.to all(have_attributes(labels: { shard: :default })) }
it { is_expected.to include(metric_class.new(:filesystem_accessible, 1, shard: :default)) }
it { is_expected.to include(metric_class.new(:filesystem_readable, 1, shard: :default)) }
it { is_expected.to include(metric_class.new(:filesystem_writable, 1, shard: :default)) }
it { is_expected.to include(an_object_having_attributes(name: :filesystem_accessible, value: 1)) }
it { is_expected.to include(an_object_having_attributes(name: :filesystem_readable, value: 1)) }
it { is_expected.to include(an_object_having_attributes(name: :filesystem_writable, value: 1)) }
it { is_expected.to include(have_attributes(name: :filesystem_access_latency, value: be >= 0, labels: { shard: :default })) }
it { is_expected.to include(have_attributes(name: :filesystem_read_latency, value: be >= 0, labels: { shard: :default })) }
it { is_expected.to include(have_attributes(name: :filesystem_write_latency, value: be >= 0, labels: { shard: :default })) }
it { is_expected.to include(an_object_having_attributes(name: :filesystem_access_latency, value: be >= 0)) }
it { is_expected.to include(an_object_having_attributes(name: :filesystem_read_latency, value: be >= 0)) }
it { is_expected.to include(an_object_having_attributes(name: :filesystem_write_latency, value: be >= 0)) }
end
end
end
context 'when timeout kills fs checks' do
before do
stub_const('Gitlab::HealthChecks::FsShardsCheck::COMMAND_TIMEOUT', '1')
allow(described_class).to receive(:exec_with_timeout).and_wrap_original { |m| m.call(%w(sleep 60)) }
FileUtils.chmod_R(0755, tmp_dir)
end
describe '#readiness' do
subject { described_class.readiness }
it { is_expected.to include(result_class.new(false, 'cannot stat storage', shard: :default)) }
end
describe '#metrics' do
subject { described_class.metrics }
it 'provides metrics' do
expect(subject).to all(have_attributes(labels: { shard: :default }))
expect(subject).to include(an_object_having_attributes(name: :filesystem_accessible, value: 0))
expect(subject).to include(an_object_having_attributes(name: :filesystem_readable, value: 0))
expect(subject).to include(an_object_having_attributes(name: :filesystem_writable, value: 0))
expect(subject).to include(an_object_having_attributes(name: :filesystem_access_latency, value: be >= 0))
expect(subject).to include(an_object_having_attributes(name: :filesystem_read_latency, value: be >= 0))
expect(subject).to include(an_object_having_attributes(name: :filesystem_write_latency, value: be >= 0))
end
end
end
context 'when popen always finds required binaries' do
before do
allow(Gitlab::Popen).to receive(:popen).and_wrap_original do |method, *args, &block|
allow(described_class).to receive(:exec_with_timeout).and_wrap_original do |method, *args, &block|
begin
method.call(*args, &block)
rescue RuntimeError
rescue RuntimeError, Errno::ENOENT
raise 'expected not to happen'
end
end
stub_const('Gitlab::HealthChecks::FsShardsCheck::COMMAND_TIMEOUT', '10')
end
it_behaves_like 'filesystem checks'
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment