BigW Consortium Gitlab

Commit 5aea2e6e by Grzegorz Bizon

Merge branch…

Merge branch '31459-fix-transient-error-caused-by-small-timeout-also-adapt-tests-to-work-locally-correctly' into 'master' Fix transient CI errors by increasing command execution timeouts from 1s to 30s Closes #31459 See merge request !11420
parents 7911f1c0 a0497a7b
...@@ -2,6 +2,9 @@ module Gitlab ...@@ -2,6 +2,9 @@ module Gitlab
module HealthChecks module HealthChecks
class FsShardsCheck class FsShardsCheck
extend BaseAbstractCheck extend BaseAbstractCheck
RANDOM_STRING = SecureRandom.hex(1000).freeze
COMMAND_TIMEOUT = '1'.freeze
TIMEOUT_EXECUTABLE = 'timeout'.freeze
class << self class << self
def readiness def readiness
...@@ -41,8 +44,6 @@ module Gitlab ...@@ -41,8 +44,6 @@ module Gitlab
private private
RANDOM_STRING = SecureRandom.hex(1000).freeze
def operation_metrics(ok_metric, latency_metric, operation, **labels) def operation_metrics(ok_metric, latency_metric, operation, **labels)
with_timing operation do |result, elapsed| with_timing operation do |result, elapsed|
[ [
...@@ -63,8 +64,8 @@ module Gitlab ...@@ -63,8 +64,8 @@ module Gitlab
@storage_paths ||= Gitlab.config.repositories.storages @storage_paths ||= Gitlab.config.repositories.storages
end end
def with_timeout(args) def exec_with_timeout(cmd_args, *args, &block)
%w{timeout 1}.concat(args) Gitlab::Popen.popen([TIMEOUT_EXECUTABLE, COMMAND_TIMEOUT].concat(cmd_args), *args, &block)
end end
def tmp_file_path(storage_name) def tmp_file_path(storage_name)
...@@ -78,7 +79,7 @@ module Gitlab ...@@ -78,7 +79,7 @@ module Gitlab
def storage_stat_test(storage_name) def storage_stat_test(storage_name)
stat_path = File.join(path(storage_name), '.') stat_path = File.join(path(storage_name), '.')
begin begin
_, status = Gitlab::Popen.popen(with_timeout(%W{ stat #{stat_path} })) _, status = exec_with_timeout(%W{ stat #{stat_path} })
status == 0 status == 0
rescue Errno::ENOENT rescue Errno::ENOENT
File.exist?(stat_path) && File::Stat.new(stat_path).readable? File.exist?(stat_path) && File::Stat.new(stat_path).readable?
...@@ -86,7 +87,7 @@ module Gitlab ...@@ -86,7 +87,7 @@ module Gitlab
end end
def storage_write_test(tmp_path) def storage_write_test(tmp_path)
_, status = Gitlab::Popen.popen(with_timeout(%W{ tee #{tmp_path} })) do |stdin| _, status = exec_with_timeout(%W{ tee #{tmp_path} }) do |stdin|
stdin.write(RANDOM_STRING) stdin.write(RANDOM_STRING)
end end
status == 0 status == 0
...@@ -96,7 +97,7 @@ module Gitlab ...@@ -96,7 +97,7 @@ module Gitlab
end end
def storage_read_test(tmp_path) def storage_read_test(tmp_path)
_, status = Gitlab::Popen.popen(with_timeout(%W{ diff #{tmp_path} - })) do |stdin| _, status = exec_with_timeout(%W{ diff #{tmp_path} - }) do |stdin|
stdin.write(RANDOM_STRING) stdin.write(RANDOM_STRING)
end end
status == 0 status == 0
...@@ -106,7 +107,7 @@ module Gitlab ...@@ -106,7 +107,7 @@ module Gitlab
end end
def delete_test_file(tmp_path) def delete_test_file(tmp_path)
_, status = Gitlab::Popen.popen(with_timeout(%W{ rm -f #{tmp_path} })) _, status = exec_with_timeout(%W{ rm -f #{tmp_path} })
status == 0 status == 0
rescue Errno::ENOENT rescue Errno::ENOENT
File.delete(tmp_path) rescue Errno::ENOENT File.delete(tmp_path) rescue Errno::ENOENT
......
require 'spec_helper' require 'spec_helper'
describe Gitlab::HealthChecks::FsShardsCheck do describe Gitlab::HealthChecks::FsShardsCheck do
def command_exists?(command)
_, status = Gitlab::Popen.popen(%W{ #{command} 1 echo })
status == 0
rescue Errno::ENOENT
false
end
def timeout_command
@timeout_command ||=
if command_exists?('timeout')
'timeout'
elsif command_exists?('gtimeout')
'gtimeout'
else
''
end
end
let(:metric_class) { Gitlab::HealthChecks::Metric } let(:metric_class) { Gitlab::HealthChecks::Metric }
let(:result_class) { Gitlab::HealthChecks::Result } let(:result_class) { Gitlab::HealthChecks::Result }
let(:repository_storages) { [:default] } let(:repository_storages) { [:default] }
...@@ -15,6 +33,7 @@ describe Gitlab::HealthChecks::FsShardsCheck do ...@@ -15,6 +33,7 @@ describe Gitlab::HealthChecks::FsShardsCheck do
before do before do
allow(described_class).to receive(:repository_storages) { repository_storages } allow(described_class).to receive(:repository_storages) { repository_storages }
allow(described_class).to receive(:storages_paths) { storages_paths } allow(described_class).to receive(:storages_paths) { storages_paths }
stub_const('Gitlab::HealthChecks::FsShardsCheck::TIMEOUT_EXECUTABLE', timeout_command)
end end
after do after do
...@@ -78,40 +97,76 @@ describe Gitlab::HealthChecks::FsShardsCheck do ...@@ -78,40 +97,76 @@ describe Gitlab::HealthChecks::FsShardsCheck do
}.with_indifferent_access }.with_indifferent_access
end end
it { is_expected.to include(metric_class.new(:filesystem_accessible, 0, shard: :default)) } it { is_expected.to all(have_attributes(labels: { shard: :default })) }
it { is_expected.to include(metric_class.new(:filesystem_readable, 0, shard: :default)) }
it { is_expected.to include(metric_class.new(:filesystem_writable, 0, shard: :default)) } it { is_expected.to include(an_object_having_attributes(name: :filesystem_accessible, value: 0)) }
it { is_expected.to include(an_object_having_attributes(name: :filesystem_readable, value: 0)) }
it { is_expected.to include(an_object_having_attributes(name: :filesystem_writable, value: 0)) }
it { is_expected.to include(have_attributes(name: :filesystem_access_latency, value: be >= 0, labels: { shard: :default })) } it { is_expected.to include(an_object_having_attributes(name: :filesystem_access_latency, value: be >= 0)) }
it { is_expected.to include(have_attributes(name: :filesystem_read_latency, value: be >= 0, labels: { shard: :default })) } it { is_expected.to include(an_object_having_attributes(name: :filesystem_read_latency, value: be >= 0)) }
it { is_expected.to include(have_attributes(name: :filesystem_write_latency, value: be >= 0, labels: { shard: :default })) } it { is_expected.to include(an_object_having_attributes(name: :filesystem_write_latency, value: be >= 0)) }
end end
context 'storage points to directory that has both read and write rights' do context 'storage points to directory that has both read and write rights' do
before do before do
FileUtils.chmod_R(0755, tmp_dir) FileUtils.chmod_R(0755, tmp_dir)
end end
it { is_expected.to all(have_attributes(labels: { shard: :default })) }
it { is_expected.to include(metric_class.new(:filesystem_accessible, 1, shard: :default)) } it { is_expected.to include(an_object_having_attributes(name: :filesystem_accessible, value: 1)) }
it { is_expected.to include(metric_class.new(:filesystem_readable, 1, shard: :default)) } it { is_expected.to include(an_object_having_attributes(name: :filesystem_readable, value: 1)) }
it { is_expected.to include(metric_class.new(:filesystem_writable, 1, shard: :default)) } it { is_expected.to include(an_object_having_attributes(name: :filesystem_writable, value: 1)) }
it { is_expected.to include(have_attributes(name: :filesystem_access_latency, value: be >= 0, labels: { shard: :default })) } it { is_expected.to include(an_object_having_attributes(name: :filesystem_access_latency, value: be >= 0)) }
it { is_expected.to include(have_attributes(name: :filesystem_read_latency, value: be >= 0, labels: { shard: :default })) } it { is_expected.to include(an_object_having_attributes(name: :filesystem_read_latency, value: be >= 0)) }
it { is_expected.to include(have_attributes(name: :filesystem_write_latency, value: be >= 0, labels: { shard: :default })) } it { is_expected.to include(an_object_having_attributes(name: :filesystem_write_latency, value: be >= 0)) }
end
end
end
context 'when timeout kills fs checks' do
before do
stub_const('Gitlab::HealthChecks::FsShardsCheck::COMMAND_TIMEOUT', '1')
allow(described_class).to receive(:exec_with_timeout).and_wrap_original { |m| m.call(%w(sleep 60)) }
FileUtils.chmod_R(0755, tmp_dir)
end
describe '#readiness' do
subject { described_class.readiness }
it { is_expected.to include(result_class.new(false, 'cannot stat storage', shard: :default)) }
end
describe '#metrics' do
subject { described_class.metrics }
it 'provides metrics' do
expect(subject).to all(have_attributes(labels: { shard: :default }))
expect(subject).to include(an_object_having_attributes(name: :filesystem_accessible, value: 0))
expect(subject).to include(an_object_having_attributes(name: :filesystem_readable, value: 0))
expect(subject).to include(an_object_having_attributes(name: :filesystem_writable, value: 0))
expect(subject).to include(an_object_having_attributes(name: :filesystem_access_latency, value: be >= 0))
expect(subject).to include(an_object_having_attributes(name: :filesystem_read_latency, value: be >= 0))
expect(subject).to include(an_object_having_attributes(name: :filesystem_write_latency, value: be >= 0))
end end
end end
end end
context 'when popen always finds required binaries' do context 'when popen always finds required binaries' do
before do before do
allow(Gitlab::Popen).to receive(:popen).and_wrap_original do |method, *args, &block| allow(described_class).to receive(:exec_with_timeout).and_wrap_original do |method, *args, &block|
begin begin
method.call(*args, &block) method.call(*args, &block)
rescue RuntimeError rescue RuntimeError, Errno::ENOENT
raise 'expected not to happen' raise 'expected not to happen'
end end
end end
stub_const('Gitlab::HealthChecks::FsShardsCheck::COMMAND_TIMEOUT', '10')
end end
it_behaves_like 'filesystem checks' it_behaves_like 'filesystem checks'
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment