BigW Consortium Gitlab

Commit a6af5522 by micael.bergeron

renames ambiguous methods and add spec

parent dbaed90c
...@@ -24,7 +24,7 @@ module Gitlab ...@@ -24,7 +24,7 @@ module Gitlab
# return message if message type is binary # return message if message type is binary
detect = CharlockHolmes::EncodingDetector.detect(message) detect = CharlockHolmes::EncodingDetector.detect(message)
return message.force_encoding("BINARY") if all_binary?(message, detect) return message.force_encoding("BINARY") if detect_binary?(message, detect)
if detect && detect[:encoding] && detect[:confidence] > ENCODING_CONFIDENCE_THRESHOLD if detect && detect[:encoding] && detect[:confidence] > ENCODING_CONFIDENCE_THRESHOLD
# force detected encoding if we have sufficient confidence. # force detected encoding if we have sufficient confidence.
...@@ -38,17 +38,17 @@ module Gitlab ...@@ -38,17 +38,17 @@ module Gitlab
"--broken encoding: #{encoding}" "--broken encoding: #{encoding}"
end end
def all_binary?(data, detect = nil) def detect_binary?(data, detect = nil)
detect ||= CharlockHolmes::EncodingDetector.detect(data) detect ||= CharlockHolmes::EncodingDetector.detect(data)
detect && detect[:type] == :binary detect && detect[:type] == :binary && detect[:confidence] == 100
end end
def libgit2_binary?(data) def detect_libgit2_binary?(data)
# EncodingDetector checks the first 1024 * 1024 bytes for NUL byte, libgit2 checks # EncodingDetector checks the first 1024 * 1024 bytes for NUL byte, libgit2 checks
# only the first 8000 (https://github.com/libgit2/libgit2/blob/2ed855a9e8f9af211e7274021c2264e600c0f86b/src/filter.h#L15), # only the first 8000 (https://github.com/libgit2/libgit2/blob/2ed855a9e8f9af211e7274021c2264e600c0f86b/src/filter.h#L15),
# which is what we use below to keep a consistent behavior. # which is what we use below to keep a consistent behavior.
detect = CharlockHolmes::EncodingDetector.new(8000).detect(data) detect = CharlockHolmes::EncodingDetector.new(8000).detect(data)
all_binary?(data, detect) detect && detect[:type] == :binary
end end
def encode_utf8(message) def encode_utf8(message)
......
...@@ -58,7 +58,7 @@ module Gitlab ...@@ -58,7 +58,7 @@ module Gitlab
end end
def binary?(data) def binary?(data)
EncodingHelper.libgit2_binary?(data) EncodingHelper.detect_libgit2_binary?(data)
end end
private private
......
...@@ -120,6 +120,8 @@ module Gitlab ...@@ -120,6 +120,8 @@ module Gitlab
# Return a binary diff message like: # Return a binary diff message like:
# #
# "Binary files a/file/path and b/file/path differ\n" # "Binary files a/file/path and b/file/path differ\n"
# This is used when we detect that a diff is binary
# using CharlockHolmes when Rugged treats it as text.
def binary_message(old_path, new_path) def binary_message(old_path, new_path)
"Binary files #{old_path} and #{new_path} differ\n" "Binary files #{old_path} and #{new_path} differ\n"
end end
...@@ -198,7 +200,7 @@ module Gitlab ...@@ -198,7 +200,7 @@ module Gitlab
end end
def json_safe_diff def json_safe_diff
return @diff unless all_binary?(@diff) return @diff unless detect_binary?(@diff)
# the diff is binary, let's make a message for it # the diff is binary, let's make a message for it
Diff.binary_message(@old_path, @new_path) Diff.binary_message(@old_path, @new_path)
......
...@@ -673,6 +673,12 @@ describe API::Commits do ...@@ -673,6 +673,12 @@ describe API::Commits do
it_behaves_like 'ref diff' it_behaves_like 'ref diff'
end end
end end
context 'when binary diff are treated as text' do
let(:commit_id) { TestEnv::BRANCH_SHA['add-pdf-text-binary'] }
it_behaves_like 'ref diff'
end
end end
end end
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment