BigW Consortium Gitlab
Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
G
gitlab-ce
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
Registry
Registry
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
Forest Godfrey
gitlab-ce
Commits
ba39b26c
Commit
ba39b26c
authored
Sep 06, 2017
by
Sean McGivern
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch '35942_api_binary_encoding' into 'master'
API fix for non UTF-8 data Closes #35942 See merge request !14038
parents
93e1d4dd
46f6092a
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
66 additions
and
12 deletions
+66
-12
35942-api-binary-encoding.yaml
changelogs/unreleased/35942-api-binary-encoding.yaml
+3
-0
commits.rb
lib/api/commits.rb
+1
-1
entities.rb
lib/api/entities.rb
+2
-1
encoding_helper.rb
lib/gitlab/encoding_helper.rb
+15
-2
blob.rb
lib/gitlab/git/blob.rb
+4
-8
diff.rb
lib/gitlab/git/diff.rb
+16
-0
diff_spec.rb
spec/lib/gitlab/git/diff_spec.rb
+19
-0
commits_spec.rb
spec/requests/api/commits_spec.rb
+6
-0
No files found.
changelogs/unreleased/35942-api-binary-encoding.yaml
0 → 100644
View file @
ba39b26c
---
title
:
"
Fix
API
to
serve
binary
diffs
that
are
treated
as
text."
merge_request
:
14038
lib/api/commits.rb
View file @
ba39b26c
...
...
@@ -104,7 +104,7 @@ module API
not_found!
'Commit'
unless
commit
commit
.
raw_diffs
.
to_a
present
commit
.
raw_diffs
.
to_a
,
with:
Entities
::
RepoDiff
end
desc
"Get a commit's comments"
do
...
...
lib/api/entities.rb
View file @
ba39b26c
...
...
@@ -291,10 +291,11 @@ module API
end
class
RepoDiff
<
Grape
::
Entity
expose
:old_path
,
:new_path
,
:a_mode
,
:b_mode
,
:diff
expose
:old_path
,
:new_path
,
:a_mode
,
:b_mode
expose
:new_file?
,
as: :new_file
expose
:renamed_file?
,
as: :renamed_file
expose
:deleted_file?
,
as: :deleted_file
expose
:json_safe_diff
,
as: :diff
end
class
ProtectedRefAccess
<
Grape
::
Entity
...
...
lib/gitlab/encoding_helper.rb
View file @
ba39b26c
...
...
@@ -22,10 +22,10 @@ module Gitlab
# return message if message type is binary
detect
=
CharlockHolmes
::
EncodingDetector
.
detect
(
message
)
return
message
.
force_encoding
(
"BINARY"
)
if
detect
&&
detect
[
:type
]
==
:binary
return
message
.
force_encoding
(
"BINARY"
)
if
detect
_binary?
(
message
,
detect
)
# force detected encoding if we have sufficient confidence.
if
detect
&&
detect
[
:encoding
]
&&
detect
[
:confidence
]
>
ENCODING_CONFIDENCE_THRESHOLD
# force detected encoding if we have sufficient confidence.
message
.
force_encoding
(
detect
[
:encoding
])
end
...
...
@@ -36,6 +36,19 @@ module Gitlab
"--broken encoding:
#{
encoding
}
"
end
def
detect_binary?
(
data
,
detect
=
nil
)
detect
||=
CharlockHolmes
::
EncodingDetector
.
detect
(
data
)
detect
&&
detect
[
:type
]
==
:binary
&&
detect
[
:confidence
]
==
100
end
def
detect_libgit2_binary?
(
data
)
# EncodingDetector checks the first 1024 * 1024 bytes for NUL byte, libgit2 checks
# only the first 8000 (https://github.com/libgit2/libgit2/blob/2ed855a9e8f9af211e7274021c2264e600c0f86b/src/filter.h#L15),
# which is what we use below to keep a consistent behavior.
detect
=
CharlockHolmes
::
EncodingDetector
.
new
(
8000
).
detect
(
data
)
detect
&&
detect
[
:type
]
==
:binary
end
def
encode_utf8
(
message
)
detect
=
CharlockHolmes
::
EncodingDetector
.
detect
(
message
)
if
detect
&&
detect
[
:encoding
]
...
...
lib/gitlab/git/blob.rb
View file @
ba39b26c
...
...
@@ -42,14 +42,6 @@ module Gitlab
end
end
def
binary?
(
data
)
# EncodingDetector checks the first 1024 * 1024 bytes for NUL byte, libgit2 checks
# only the first 8000 (https://github.com/libgit2/libgit2/blob/2ed855a9e8f9af211e7274021c2264e600c0f86b/src/filter.h#L15),
# which is what we use below to keep a consistent behavior.
detect
=
CharlockHolmes
::
EncodingDetector
.
new
(
8000
).
detect
(
data
)
detect
&&
detect
[
:type
]
==
:binary
end
# Returns an array of Blob instances, specified in blob_references as
# [[commit_sha, path], [commit_sha, path], ...]. If blob_size_limit < 0 then the
# full blob contents are returned. If blob_size_limit >= 0 then each blob will
...
...
@@ -65,6 +57,10 @@ module Gitlab
end
end
def
binary?
(
data
)
EncodingHelper
.
detect_libgit2_binary?
(
data
)
end
private
# Recursive search of blob id by path
...
...
lib/gitlab/git/diff.rb
View file @
ba39b26c
...
...
@@ -116,6 +116,15 @@ module Gitlab
filtered_opts
end
# Return a binary diff message like:
#
# "Binary files a/file/path and b/file/path differ\n"
# This is used when we detect that a diff is binary
# using CharlockHolmes when Rugged treats it as text.
def
binary_message
(
old_path
,
new_path
)
"Binary files
#{
old_path
}
and
#{
new_path
}
differ
\n
"
end
end
def
initialize
(
raw_diff
,
expanded:
true
)
...
...
@@ -190,6 +199,13 @@ module Gitlab
@collapsed
=
true
end
def
json_safe_diff
return
@diff
unless
detect_binary?
(
@diff
)
# the diff is binary, let's make a message for it
Diff
.
binary_message
(
@old_path
,
@new_path
)
end
private
def
init_from_rugged
(
rugged
)
...
...
spec/lib/gitlab/git/diff_spec.rb
View file @
ba39b26c
...
...
@@ -273,6 +273,25 @@ EOT
end
end
describe
'#json_safe_diff'
do
let
(
:project
)
{
create
(
:project
,
:repository
)
}
it
'fake binary message when it detects binary'
do
# Rugged will not detect this as binary, but we can fake it
diff_message
=
"Binary files files/images/icn-time-tracking.pdf and files/images/icn-time-tracking.pdf differ
\n
"
binary_diff
=
described_class
.
between
(
project
.
repository
,
'add-pdf-text-binary'
,
'add-pdf-text-binary^'
).
first
expect
(
binary_diff
.
diff
).
not_to
be_empty
expect
(
binary_diff
.
json_safe_diff
).
to
eq
(
diff_message
)
end
it
'leave non-binary diffs as-is'
do
diff
=
described_class
.
new
(
@rugged_diff
)
expect
(
diff
.
json_safe_diff
).
to
eq
(
diff
.
diff
)
end
end
describe
'#submodule?'
do
before
do
commit
=
repository
.
lookup
(
'5937ac0a7beb003549fc5fd26fc247adbce4a52e'
)
...
...
spec/requests/api/commits_spec.rb
View file @
ba39b26c
...
...
@@ -673,6 +673,12 @@ describe API::Commits do
it_behaves_like
'ref diff'
end
end
context
'when binary diff are treated as text'
do
let
(
:commit_id
)
{
TestEnv
::
BRANCH_SHA
[
'add-pdf-text-binary'
]
}
it_behaves_like
'ref diff'
end
end
end
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment