BigW Consortium Gitlab

Commit 5152cc3b by Alejandro Rodríguez

Fix a bug where charlock_holmes was used needlessly to encode strings

parent 6f1b4dc7
......@@ -14,14 +14,7 @@ module Gitlab
ENCODING_CONFIDENCE_THRESHOLD = 50
def encode!(message)
return nil unless message.respond_to?(:force_encoding)
return message if message.encoding == Encoding::UTF_8 && message.valid_encoding?
if message.respond_to?(:frozen?) && message.frozen?
message = message.dup
end
message.force_encoding("UTF-8")
message = force_encode_utf8(message)
return message if message.valid_encoding?
# return message if message type is binary
......@@ -35,6 +28,8 @@ module Gitlab
# encode and clean the bad chars
message.replace clean(message)
rescue ArgumentError
return nil
rescue
encoding = detect ? detect[:encoding] : "unknown"
"--broken encoding: #{encoding}"
......@@ -54,8 +49,8 @@ module Gitlab
end
def encode_utf8(message)
return nil unless message.is_a?(String)
return message if message.encoding == Encoding::UTF_8 && message.valid_encoding?
message = force_encode_utf8(message)
return message if message.valid_encoding?
detect = CharlockHolmes::EncodingDetector.detect(message)
if detect && detect[:encoding]
......@@ -69,6 +64,8 @@ module Gitlab
else
clean(message)
end
rescue ArgumentError
return nil
end
def encode_binary(s)
......@@ -83,6 +80,15 @@ module Gitlab
private
def force_encode_utf8(message)
raise ArgumentError unless message.respond_to?(:force_encoding)
return message if message.encoding == Encoding::UTF_8 && message.valid_encoding?
message = message.dup if message.respond_to?(:frozen?) && message.frozen?
message.force_encoding("UTF-8")
end
def clean(message)
message.encode("UTF-16BE", undef: :replace, invalid: :replace, replace: "")
.encode("UTF-8")
......
......@@ -11,7 +11,7 @@ module Gitlab
include Gitlab::EncodingHelper
def ref_name(ref)
encode_utf8(ref).sub(/\Arefs\/(tags|heads|remotes)\//, '')
encode!(ref).sub(/\Arefs\/(tags|heads|remotes)\//, '')
end
def branch_name(ref)
......
......@@ -120,6 +120,24 @@ describe Gitlab::EncodingHelper do
it 'returns empty string on conversion errors' do
expect { ext_class.encode_utf8('') }.not_to raise_error(ArgumentError)
end
context 'with strings that can be forcefully encoded into utf8' do
let(:test_string) do
"refs/heads/FixSymbolsTitleDropdown".encode("ASCII-8BIT")
end
let(:expected_string) do
"refs/heads/FixSymbolsTitleDropdown".encode("UTF-8")
end
subject { ext_class.encode_utf8(test_string) }
it "doesn't use CharlockHolmes if the encoding can be forced into utf_8" do
expect(CharlockHolmes::EncodingDetector).not_to receive(:detect)
expect(subject).to eq(expected_string)
expect(subject.encoding.name).to eq('UTF-8')
end
end
end
describe '#clean' do
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment