BigW Consortium Gitlab

Commit e1018e02 by Stan Hu Committed by Tiago

Merge branch…

Merge branch '41677-branch-name-omitted-due-to-bad-utf-8-conversion-by-gitaly-ref-handler' into 'master' Resolve "Branch name omitted due to bad UTF-8 conversion by Gitaly ref handler" Closes #41677 See merge request gitlab-org/gitlab-ce!16243 (cherry picked from commit f7afb14c) 5152cc3b Fix a bug where charlock_holmes was used needlessly to encode strings
parent 873f6320
...@@ -14,14 +14,7 @@ module Gitlab ...@@ -14,14 +14,7 @@ module Gitlab
ENCODING_CONFIDENCE_THRESHOLD = 50 ENCODING_CONFIDENCE_THRESHOLD = 50
def encode!(message) def encode!(message)
return nil unless message.respond_to?(:force_encoding) message = force_encode_utf8(message)
return message if message.encoding == Encoding::UTF_8 && message.valid_encoding?
if message.respond_to?(:frozen?) && message.frozen?
message = message.dup
end
message.force_encoding("UTF-8")
return message if message.valid_encoding? return message if message.valid_encoding?
# return message if message type is binary # return message if message type is binary
...@@ -35,6 +28,8 @@ module Gitlab ...@@ -35,6 +28,8 @@ module Gitlab
# encode and clean the bad chars # encode and clean the bad chars
message.replace clean(message) message.replace clean(message)
rescue ArgumentError
return nil
rescue rescue
encoding = detect ? detect[:encoding] : "unknown" encoding = detect ? detect[:encoding] : "unknown"
"--broken encoding: #{encoding}" "--broken encoding: #{encoding}"
...@@ -54,8 +49,8 @@ module Gitlab ...@@ -54,8 +49,8 @@ module Gitlab
end end
def encode_utf8(message) def encode_utf8(message)
return nil unless message.is_a?(String) message = force_encode_utf8(message)
return message if message.encoding == Encoding::UTF_8 && message.valid_encoding? return message if message.valid_encoding?
detect = CharlockHolmes::EncodingDetector.detect(message) detect = CharlockHolmes::EncodingDetector.detect(message)
if detect && detect[:encoding] if detect && detect[:encoding]
...@@ -69,10 +64,21 @@ module Gitlab ...@@ -69,10 +64,21 @@ module Gitlab
else else
clean(message) clean(message)
end end
rescue ArgumentError
return nil
end end
private private
def force_encode_utf8(message)
raise ArgumentError unless message.respond_to?(:force_encoding)
return message if message.encoding == Encoding::UTF_8 && message.valid_encoding?
message = message.dup if message.respond_to?(:frozen?) && message.frozen?
message.force_encoding("UTF-8")
end
def clean(message) def clean(message)
message.encode("UTF-16BE", undef: :replace, invalid: :replace, replace: "") message.encode("UTF-16BE", undef: :replace, invalid: :replace, replace: "")
.encode("UTF-8") .encode("UTF-8")
......
...@@ -11,7 +11,7 @@ module Gitlab ...@@ -11,7 +11,7 @@ module Gitlab
include Gitlab::EncodingHelper include Gitlab::EncodingHelper
def ref_name(ref) def ref_name(ref)
encode_utf8(ref).sub(/\Arefs\/(tags|heads|remotes)\//, '') encode!(ref).sub(/\Arefs\/(tags|heads|remotes)\//, '')
end end
def branch_name(ref) def branch_name(ref)
......
...@@ -120,6 +120,24 @@ describe Gitlab::EncodingHelper do ...@@ -120,6 +120,24 @@ describe Gitlab::EncodingHelper do
it 'returns empty string on conversion errors' do it 'returns empty string on conversion errors' do
expect { ext_class.encode_utf8('') }.not_to raise_error(ArgumentError) expect { ext_class.encode_utf8('') }.not_to raise_error(ArgumentError)
end end
context 'with strings that can be forcefully encoded into utf8' do
let(:test_string) do
"refs/heads/FixSymbolsTitleDropdown".encode("ASCII-8BIT")
end
let(:expected_string) do
"refs/heads/FixSymbolsTitleDropdown".encode("UTF-8")
end
subject { ext_class.encode_utf8(test_string) }
it "doesn't use CharlockHolmes if the encoding can be forced into utf_8" do
expect(CharlockHolmes::EncodingDetector).not_to receive(:detect)
expect(subject).to eq(expected_string)
expect(subject.encoding.name).to eq('UTF-8')
end
end
end end
describe '#clean' do describe '#clean' do
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment