BigW Consortium Gitlab

Commit 0f1d348d by Micaël Bergeron

port the object storage to CE

parent a2f375e8
class Appearance < ActiveRecord::Base
include CacheMarkdownField
include AfterCommitQueue
include ObjectStorage::BackgroundMove
cache_markdown_field :description
cache_markdown_field :new_project_guidelines
......
module Ci
class JobArtifact < ActiveRecord::Base
include AfterCommitQueue
include ObjectStorage::BackgroundMove
extend Gitlab::Ci::Model
belongs_to :project
......@@ -8,15 +9,11 @@ module Ci
before_save :set_size, if: :file_changed?
mount_uploader :file, JobArtifactUploader
scope :with_files_stored_locally, -> { where(file_store: [nil, ::JobArtifactUploader::Store::LOCAL]) }
after_save if: :file_changed?, on: [:create, :update] do
run_after_commit do
file.schedule_migration_to_object_storage
end
end
mount_uploader :file, JobArtifactUploader
delegate :open, :exists?, to: :file
delegate :exists?, :open, to: :file
enum file_type: {
archive: 1,
......@@ -28,6 +25,10 @@ module Ci
self.where(project: project).sum(:size)
end
def local_store?
[nil, ::JobArtifactUploader::Store::LOCAL].include?(self.file_store)
end
def set_size
self.size = file.size
end
......
......@@ -3,6 +3,7 @@ module Avatarable
included do
prepend ShadowMethods
include ObjectStorage::BackgroundMove
validate :avatar_type, if: ->(user) { user.avatar.present? && user.avatar_changed? }
validates :avatar, file_size: { maximum: 200.kilobytes.to_i }
......
class LfsObject < ActiveRecord::Base
prepend EE::LfsObject
include AfterCommitQueue
include ObjectStorage::BackgroundMove
has_many :lfs_objects_projects, dependent: :destroy # rubocop:disable Cop/ActiveRecordDependent
has_many :projects, through: :lfs_objects_projects
scope :with_files_stored_locally, -> { where(file_store: [nil, LfsObjectUploader::Store::LOCAL]) }
validates :oid, presence: true, uniqueness: true
scope :with_files_stored_locally, -> { where(file_store: [nil, LfsObjectUploader::Store::LOCAL]) }
......@@ -21,6 +23,10 @@ class LfsObject < ActiveRecord::Base
projects.exists?(project.lfs_storage_project.id)
end
def local_store?
[nil, LfsObjectUploader::Store::LOCAL].include?(self.file_store)
end
def self.destroy_unreferenced
joins("LEFT JOIN lfs_objects_projects ON lfs_objects_projects.lfs_object_id = #{table_name}.id")
.where(lfs_objects_projects: { id: nil })
......
......@@ -9,6 +9,8 @@ class Upload < ActiveRecord::Base
validates :model, presence: true
validates :uploader, presence: true
scope :with_files_stored_locally, -> { where(store: [nil, ObjectStorage::Store::LOCAL]) }
before_save :calculate_checksum!, if: :foreground_checksummable?
after_commit :schedule_checksum, if: :checksummable?
......@@ -34,8 +36,8 @@ class Upload < ActiveRecord::Base
self.checksum = Digest::SHA256.file(absolute_path).hexdigest
end
def build_uploader
uploader_class.new(model, mount_point, **uploader_context).tap do |uploader|
def build_uploader(mounted_as = nil)
uploader_class.new(model, mounted_as || mount_point).tap do |uploader|
uploader.upload = self
uploader.retrieve_from_store!(identifier)
end
......@@ -52,6 +54,12 @@ class Upload < ActiveRecord::Base
}.compact
end
def local?
return true if store.nil?
store == ObjectStorage::Store::LOCAL
end
private
def delete_file!
......@@ -62,12 +70,6 @@ class Upload < ActiveRecord::Base
checksum.nil? && local? && exist?
end
def local?
return true if store.nil?
store == ObjectStorage::Store::LOCAL
end
def foreground_checksummable?
checksummable? && size <= CHECKSUM_THRESHOLD
end
......
......@@ -2,7 +2,6 @@ class AttachmentUploader < GitlabUploader
include RecordsUploads::Concern
include ObjectStorage::Concern
prepend ObjectStorage::Extension::RecordsUploads
include UploaderHelper
private
......
......@@ -15,10 +15,12 @@ class FileUploader < GitlabUploader
MARKDOWN_PATTERN = %r{\!?\[.*?\]\(/uploads/(?<secret>[0-9a-f]{32})/(?<file>.*?)\)}
DYNAMIC_PATH_PATTERN = %r{(?<secret>\h{32})/(?<identifier>.*)}
attr_accessor :model
after :remove, :prune_store_dir
# FileUploader do not run in a model transaction, so we can simply
# enqueue a job after the :store hook.
after :store, :schedule_background_upload
def self.root
File.join(options.storage_path, 'uploads')
end
......
require 'fog/aws'
require 'carrierwave/storage/fog'
#
# This concern should add object storage support
# to the GitlabUploader class
#
module ObjectStorage
RemoteStoreError = Class.new(StandardError)
UnknownStoreError = Class.new(StandardError)
ObjectStorageUnavailable = Class.new(StandardError)
module Store
LOCAL = 1
REMOTE = 2
end
module Extension
# this extension is the glue between the ObjectStorage::Concern and RecordsUploads::Concern
module RecordsUploads
extend ActiveSupport::Concern
prepended do |base|
raise "#{base} must include ObjectStorage::Concern to use extensions." unless base < Concern
base.include(::RecordsUploads::Concern)
end
def retrieve_from_store!(identifier)
paths = store_dirs.map { |store, path| File.join(path, identifier) }
unless current_upload_satisfies?(paths, model)
# the upload we already have isn't right, find the correct one
self.upload = uploads.find_by(model: model, path: paths)
end
super
end
def build_upload
super.tap do |upload|
upload.store = object_store
end
end
def upload=(upload)
return unless upload
self.object_store = upload.store
super
end
def schedule_background_upload(*args)
return unless schedule_background_upload?
ObjectStorage::BackgroundMoveWorker.perform_async(self.class.name,
upload.class.to_s,
mounted_as,
upload.id)
end
private
def current_upload_satisfies?(paths, model)
return false unless upload
return false unless model
paths.include?(upload.path) &&
upload.model_id == model.id &&
upload.model_type == model.class.base_class.sti_name
end
end
end
# Add support for automatic background uploading after the file is stored.
#
module BackgroundMove
extend ActiveSupport::Concern
def background_upload(mount_points = [])
return unless mount_points.any?
run_after_commit do
mount_points.each { |mount| send(mount).schedule_background_upload } # rubocop:disable GitlabSecurity/PublicSend
end
end
def changed_mounts
self.class.uploaders.select do |mount, uploader_class|
mounted_as = uploader_class.serialization_column(self.class, mount)
mount if send(:"#{mounted_as}_changed?") # rubocop:disable GitlabSecurity/PublicSend
end.keys
end
included do
after_save on: [:create, :update] do
background_upload(changed_mounts)
end
end
end
module Concern
extend ActiveSupport::Concern
included do |base|
base.include(ObjectStorage)
before :store, :verify_license!
after :migrate, :delete_migrated_file
end
class_methods do
def object_store_options
options.object_store
end
def object_store_enabled?
object_store_options.enabled
end
def background_upload_enabled?
object_store_options.background_upload
end
def object_store_credentials
object_store_options.connection.to_hash.deep_symbolize_keys
end
def remote_store_path
object_store_options.remote_directory
end
def licensed?
License.feature_available?(:object_storage)
end
def serialization_column(model_class, mount_point)
model_class.uploader_options.dig(mount_point, :mount_on) || mount_point
end
end
def file_storage?
storage.is_a?(CarrierWave::Storage::File)
end
def file_cache_storage?
cache_storage.is_a?(CarrierWave::Storage::File)
end
def object_store
@object_store ||= model.try(store_serialization_column) || Store::LOCAL
end
# rubocop:disable Gitlab/ModuleWithInstanceVariables
def object_store=(value)
@object_store = value || Store::LOCAL
@storage = storage_for(object_store)
end
# rubocop:enable Gitlab/ModuleWithInstanceVariables
# Return true if the current file is part or the model (i.e. is mounted in the model)
#
def persist_object_store?
model.respond_to?(:"#{store_serialization_column}=")
end
# Save the current @object_store to the model <mounted_as>_store column
def persist_object_store!
return unless persist_object_store?
updated = model.update_column(store_serialization_column, object_store)
raise ActiveRecordError unless updated
end
def use_file
if file_storage?
return yield path
end
begin
cache_stored_file!
yield cache_path
ensure
cache_storage.delete_dir!(cache_path(nil))
end
end
def filename
super || file&.filename
end
#
# Move the file to another store
#
# new_store: Enum (Store::LOCAL, Store::REMOTE)
#
def migrate!(new_store)
return unless object_store != new_store
return unless file
new_file = nil
file_to_delete = file
from_object_store = object_store
self.object_store = new_store # changes the storage and file
cache_stored_file! if file_storage?
with_callbacks(:migrate, file_to_delete) do
with_callbacks(:store, file_to_delete) do # for #store_versions!
new_file = storage.store!(file)
persist_object_store!
self.file = new_file
end
end
file
rescue => e
# in case of failure delete new file
new_file.delete unless new_file.nil?
# revert back to the old file
self.object_store = from_object_store
self.file = file_to_delete
raise e
end
def schedule_background_upload(*args)
return unless schedule_background_upload?
ObjectStorage::BackgroundMoveWorker.perform_async(self.class.name,
model.class.name,
mounted_as,
model.id)
end
def fog_directory
self.class.remote_store_path
end
def fog_credentials
self.class.object_store_credentials
end
def fog_public
false
end
def delete_migrated_file(migrated_file)
migrated_file.delete if exists?
end
def verify_license!(_file)
return if file_storage?
raise(ObjectStorageUnavailable, 'Object Storage feature is missing') unless self.class.licensed?
end
def exists?
file.present?
end
def store_dir(store = nil)
store_dirs[store || object_store]
end
def store_dirs
{
Store::LOCAL => File.join(base_dir, dynamic_segment),
Store::REMOTE => File.join(dynamic_segment)
}
end
private
def schedule_background_upload?
self.class.object_store_enabled? &&
self.class.background_upload_enabled? &&
self.class.licensed? &&
self.file_storage?
end
# this is a hack around CarrierWave. The #migrate method needs to be
# able to force the current file to the migrated file upon success.
def file=(file)
@file = file # rubocop:disable Gitlab/ModuleWithInstanceVariables
end
def serialization_column
self.class.serialization_column(model.class, mounted_as)
end
# Returns the column where the 'store' is saved
# defaults to 'store'
def store_serialization_column
[serialization_column, 'store'].compact.join('_').to_sym
end
def storage
@storage ||= storage_for(object_store)
end
def storage_for(store)
case store
when Store::REMOTE
raise 'Object Storage is not enabled' unless self.class.object_store_enabled?
CarrierWave::Storage::Fog.new(self)
when Store::LOCAL
CarrierWave::Storage::File.new(self)
else
raise UnknownStoreError
end
end
end
end
......@@ -38,6 +38,9 @@
- github_importer:github_import_stage_import_pull_requests
- github_importer:github_import_stage_import_repository
- object_storage:object_storage_background_move
- object_storage:object_storage_migrate_uploads
- pipeline_cache:expire_job_cache
- pipeline_cache:expire_pipeline_cache
- pipeline_creation:create_pipeline
......@@ -102,3 +105,5 @@
- update_user_activity
- upload_checksum
- web_hook
......@@ -305,6 +305,12 @@ Settings.artifacts['storage_path'] = Settings.absolute(Settings.artifacts.values
# Settings.artifact['path'] is deprecated, use `storage_path` instead
Settings.artifacts['path'] = Settings.artifacts['storage_path']
Settings.artifacts['max_size'] ||= 100 # in megabytes
Settings.artifacts['object_store'] ||= Settingslogic.new({})
Settings.artifacts['object_store']['enabled'] = false if Settings.artifacts['object_store']['enabled'].nil?
Settings.artifacts['object_store']['remote_directory'] ||= nil
Settings.artifacts['object_store']['background_upload'] = true if Settings.artifacts['object_store']['background_upload'].nil?
# Convert upload connection settings to use string keys, to make Fog happy
Settings.artifacts['object_store']['connection']&.deep_stringify_keys!
Settings.artifacts['object_store'] ||= Settingslogic.new({})
Settings.artifacts['object_store']['enabled'] = false if Settings.artifacts['object_store']['enabled'].nil?
......
......@@ -69,3 +69,4 @@
- [project_migrate_hashed_storage, 1]
- [storage_migrator, 1]
- [pages_domain_verification, 1]
- [object_storage, 1]
......@@ -336,6 +336,7 @@ ActiveRecord::Schema.define(version: 20180216121030) do
t.datetime_with_timezone "updated_at", null: false
t.datetime_with_timezone "expire_at"
t.string "file"
t.integer "file_store"
end
add_index "ci_job_artifacts", ["expire_at", "job_id"], name: "index_ci_job_artifacts_on_expire_at_and_job_id", using: :btree
......
# Geo with Object storage
Geo can be used in combination with Object Storage (AWS S3, or
other compatible object storage).
## Configuration
At this time it is required that if object storage is enabled on the
primary, it must also be enabled on the secondary.
The secondary nodes can use the same storage bucket as the primary, or
they can use a replicated storage bucket. At this time GitLab does not
take care of content replication in object storage.
For LFS, follow the documentation to
[set up LFS object storage](../../../workflow/lfs/lfs_administration.md#setting-up-s3-compatible-object-storage).
For CI job artifacts, there is similar documentation to configure
[jobs artifact object storage](../../job_artifacts.md#using-object-storage)
Complete these steps on all nodes, primary **and** secondary.
## Replication
When using Amazon S3, you can use
[CRR](https://docs.aws.amazon.com/AmazonS3/latest/dev/crr.html) to
have automatic replication between the bucket used by the primary and
the bucket used by the secondary.
If you are using Google Cloud Storage, consider using
[Multi-Regional Storage](https://cloud.google.com/storage/docs/storage-classes#multi-regional).
Or you can use the [Storage Transfer Service](https://cloud.google.com/storage/transfer/),
although this only supports daily synchronization.
For manual synchronization, or scheduled by `cron`, please have a look at:
- [`s3cmd sync`](http://s3tools.org/s3cmd-sync)
- [`gsutil rsync`](https://cloud.google.com/storage/docs/gsutil/commands/rsync)
......@@ -85,41 +85,100 @@ _The artifacts are stored by default in
1. Save the file and [restart GitLab][] for the changes to take effect.
### Using object storage
>**Notes:**
- [Introduced][ee-1762] in [GitLab Premium][eep] 9.4.
- Since version 9.5, artifacts are [browsable], when object storage is enabled.
9.4 lacks this feature.
> Available in [GitLab Premium](https://about.gitlab.com/products/) and
[GitLab.com Silver](https://about.gitlab.com/gitlab-com/).
> Since version 10.6, available in [GitLab CE](https://about.gitlab.com/products/)
If you don't want to use the local disk where GitLab is installed to store the
artifacts, you can use an object storage like AWS S3 instead.
This configuration relies on valid AWS credentials to be configured already.
Use an [Object storage option][os] like AWS S3 to store job artifacts.
**In Omnibus installations:**
_The artifacts are stored by default in
`/var/opt/gitlab/gitlab-rails/shared/artifacts`._
1. Edit `/etc/gitlab/gitlab.rb` and add the following lines by replacing with
the values you want:
```ruby
gitlab_rails['artifacts_enabled'] = true
gitlab_rails['artifacts_object_store_enabled'] = true
gitlab_rails['artifacts_object_store_remote_directory'] = "artifacts"
gitlab_rails['artifacts_object_store_connection'] = {
'provider' => 'AWS',
'region' => 'eu-central-1',
'aws_access_key_id' => 'AWS_ACCESS_KEY_ID',
'aws_secret_access_key' => 'AWS_SECRET_ACCESS_KEY'
}
```
NOTE: For GitLab 9.4+, if you are using AWS IAM profiles, be sure to omit the
AWS access key and secret acces key/value pairs. For example:
```ruby
gitlab_rails['artifacts_object_store_connection'] = {
'provider' => 'AWS',
'region' => 'eu-central-1',
'use_iam_profile' => true
}
```
1. Save the file and [reconfigure GitLab][] for the changes to take effect.
1. Migrate any existing local artifacts to the object storage:
```bash
gitlab-rake gitlab:artifacts:migrate
```
Currently this has to be executed manually and it will allow you to
migrate the existing artifacts to the object storage, but all new
artifacts will still be stored on the local disk. In the future
you will be given an option to define a default storage artifacts for all
new files.
---
**Using Object Store**
**In installations from source:**
_The artifacts are stored by default in
`/home/git/gitlab/shared/artifacts`._
The previously mentioned methods use the local disk to store artifacts. However,
there is the option to use object stores like AWS' S3. To do this, set the
`object_store` in your `gitlab.yml`. This relies on valid AWS
credentials to be configured already.
1. Edit `/home/git/gitlab/config/gitlab.yml` and add or amend the following
lines:
```yaml
artifacts:
enabled: true
object_store:
enabled: true
path: /mnt/storage/artifacts
object_store:
enabled: true
remote_directory: my-bucket-name
connection:
provider: AWS
aws_access_key_id: S3_KEY_ID
aws_secret_key_id: S3_SECRET_KEY_ID
region: eu-central-1
remote_directory: "artifacts" # The bucket name
connection:
provider: AWS # Only AWS supported at the moment
aws_access_key_id: AWS_ACESS_KEY_ID
aws_secret_access_key: AWS_SECRET_ACCESS_KEY
region: eu-central-1
```
This will allow you to migrate existing artifacts to object store,
but all new artifacts will still be stored on the local disk.
In the future you will be given an option to define a default storage artifacts
for all new files. Currently the artifacts migration has to be executed manually:
1. Save the file and [restart GitLab][] for the changes to take effect.
1. Migrate any existing local artifacts to the object storage:
```bash
gitlab-rake gitlab:artifacts:migrate
```
```bash
sudo -u git -H bundle exec rake gitlab:artifacts:migrate RAILS_ENV=production
```
Please note, that enabling this feature
will have the effect that artifacts are _not_ browsable anymore through the web
interface. This limitation will be removed in one of the upcoming releases.
Currently this has to be executed manually and it will allow you to
migrate the existing artifacts to the object storage, but all new
artifacts will still be stored on the local disk. In the future
you will be given an option to define a default storage artifacts for all
new files.
## Expiring artifacts
......@@ -223,7 +282,7 @@ When clicking on a specific file, [GitLab Workhorse] extracts it
from the archive and the download begins. This implementation saves space,
memory and disk I/O.
[reconfigure gitlab]: restart_gitlab.md "How to restart GitLab"
[restart gitlab]: restart_gitlab.md "How to restart GitLab"
[reconfigure gitlab]: restart_gitlab.md#omnibus-gitlab-reconfigure "How to reconfigure Omnibus GitLab"
[restart gitlab]: restart_gitlab.md#installations-from-source "How to restart GitLab"
[gitlab workhorse]: https://gitlab.com/gitlab-org/gitlab-workhorse "GitLab Workhorse repository"
[ee-os]: https://docs.gitlab.com/ee/administration/job_artifacts.html#using-object-storage
[os]: https://docs.gitlab.com/administration/job_artifacts.html#using-object-storage
# Uploads Migrate Rake Task
## Migrate to Object Storage
After [configuring the object storage](../../uploads.md#using-object-storage) for GitLab's uploads, you may use this task to migrate existing uploads from the local storage to the remote storage.
>**Note:**
All of the processing will be done in a background worker and requires **no downtime**.
This tasks uses 3 parameters to find uploads to migrate.
>**Note:**
These parameters are mainly internal to GitLab's structure, you may want to refer to the task list instead below.
Parameter | Type | Description
--------- | ---- | -----------
`uploader_class` | string | Type of the uploader to migrate from
`model_class` | string | Type of the model to migrate from
`mount_point` | string/symbol | Name of the model's column on which the uploader is mounted on.
This task also accepts some environment variables which you can use to override
certain values:
Variable | Type | Description
-------- | ---- | -----------
`BATCH` | integer | Specifies the size of the batch. Defaults to 200.
** Omnibus Installation**
```bash
# gitlab-rake gitlab:uploads:migrate[uploader_class, model_class, mount_point]
# Avatars
gitlab-rake "gitlab:uploads:migrate[AvatarUploader, Project, :avatar]"
gitlab-rake "gitlab:uploads:migrate[AvatarUploader, Group, :avatar]"
gitlab-rake "gitlab:uploads:migrate[AvatarUploader, User, :avatar]"
# Attachments
gitlab-rake "gitlab:uploads:migrate[AttachmentUploader, Note, :attachment]"
gitlab-rake "gitlab:uploads:migrate[AttachmentUploader, Appearance, :logo]"
gitlab-rake "gitlab:uploads:migrate[AttachmentUploader, Appearance, :header_logo]"
# Markdown
gitlab-rake "gitlab:uploads:migrate[FileUploader, Project]"
gitlab-rake "gitlab:uploads:migrate[PersonalFileUploader, Snippet]"
gitlab-rake "gitlab:uploads:migrate[NamespaceFileUploader, Snippet]"
gitlab-rake "gitlab:uploads:migrate[FileUploader, MergeRequest]"
```
**Source Installation**
>**Note:**
Use `RAILS_ENV=production` for every task.
```bash
# sudo -u git -H bundle exec rake gitlab:uploads:migrate
# Avatars
sudo -u git -H bundle exec rake "gitlab:uploads:migrate[AvatarUploader, Project, :avatar]"
sudo -u git -H bundle exec rake "gitlab:uploads:migrate[AvatarUploader, Group, :avatar]"
sudo -u git -H bundle exec rake "gitlab:uploads:migrate[AvatarUploader, User, :avatar]"
# Attachments
sudo -u git -H bundle exec rake "gitlab:uploads:migrate[AttachmentUploader, Note, :attachment]"
sudo -u git -H bundle exec rake "gitlab:uploads:migrate[AttachmentUploader, Appearance, :logo]"
sudo -u git -H bundle exec rake "gitlab:uploads:migrate[AttachmentUploader, Appearance, :header_logo]"
# Markdown
sudo -u git -H bundle exec rake "gitlab:uploads:migrate[FileUploader, Project]"
sudo -u git -H bundle exec rake "gitlab:uploads:migrate[PersonalFileUploader, Snippet]"
sudo -u git -H bundle exec rake "gitlab:uploads:migrate[NamespaceFileUploader, Snippet]"
sudo -u git -H bundle exec rake "gitlab:uploads:migrate[FileUploader, MergeRequest]"
```
# Uploads administration
>**Notes:**
Uploads represent all user data that may be sent to GitLab as a single file. As an example, avatars and notes' attachments are uploads. Uploads are integral to GitLab functionality, and therefore cannot be disabled.
### Using local storage
>**Notes:**
This is the default configuration
To change the location where the uploads are stored locally, follow the steps
below.
---
**In Omnibus installations:**
>**Notes:**
For historical reasons, uploads are stored into a base directory, which by default is `uploads/-/system`. It is strongly discouraged to change this configuration option on an existing GitLab installation.
_The uploads are stored by default in `/var/opt/gitlab/gitlab-rails/public/uploads/-/system`._
1. To change the storage path for example to `/mnt/storage/uploads`, edit
`/etc/gitlab/gitlab.rb` and add the following line:
```ruby
gitlab_rails['uploads_storage_path'] = "/mnt/storage/"
gitlab_rails['uploads_base_dir'] = "uploads"
```
1. Save the file and [reconfigure GitLab][] for the changes to take effect.
---
**In installations from source:**
_The uploads are stored by default in
`/home/git/gitlab/public/uploads/-/system`._
1. To change the storage path for example to `/mnt/storage/uploads`, edit
`/home/git/gitlab/config/gitlab.yml` and add or amend the following lines:
```yaml
uploads:
storage_path: /mnt/storage
base_dir: uploads
```
1. Save the file and [restart GitLab][] for the changes to take effect.
### Using object storage
>**Notes:**
- [Introduced][ee-3867] in [GitLab Enterprise Edition Premium][eep] 10.5.
If you don't want to use the local disk where GitLab is installed to store the
uploads, you can use an object storage provider like AWS S3 instead.
This configuration relies on valid AWS credentials to be configured already.
**In Omnibus installations:**
_The uploads are stored by default in
`/var/opt/gitlab/gitlab-rails/public/uploads/-/system`._
1. Edit `/etc/gitlab/gitlab.rb` and add the following lines by replacing with
the values you want:
```ruby
gitlab_rails['uploads_object_store_enabled'] = true
gitlab_rails['uploads_object_store_remote_directory'] = "uploads"
gitlab_rails['uploads_object_store_connection'] = {
'provider' => 'AWS',
'region' => 'eu-central-1',
'aws_access_key_id' => 'AWS_ACCESS_KEY_ID',
'aws_secret_access_key' => 'AWS_SECRET_ACCESS_KEY'
}
```
>**Note:**
If you are using AWS IAM profiles, be sure to omit the AWS access key and secret acces key/value pairs.
```ruby
gitlab_rails['uploads_object_store_connection'] = {
'provider' => 'AWS',
'region' => 'eu-central-1',
'use_iam_profile' => true
}
```
1. Save the file and [reconfigure GitLab][] for the changes to take effect.
1. Migrate any existing local uploads to the object storage:
>**Notes:**
These task complies with the `BATCH` environment variable to process uploads in batch (200 by default). All of the processing will be done in a background worker and requires **no downtime**.
```bash
# gitlab-rake gitlab:uploads:migrate[uploader_class, model_class, mount_point]
# Avatars
gitlab-rake "gitlab:uploads:migrate[AvatarUploader, Project, :avatar]"
gitlab-rake "gitlab:uploads:migrate[AvatarUploader, Group, :avatar]"
gitlab-rake "gitlab:uploads:migrate[AvatarUploader, User, :avatar]"
# Attachments
gitlab-rake "gitlab:uploads:migrate[AttachmentUploader, Note, :attachment]"
gitlab-rake "gitlab:uploads:migrate[AttachmentUploader, Appearance, :logo]"
gitlab-rake "gitlab:uploads:migrate[AttachmentUploader, Appearance, :header_logo]"
# Markdown
gitlab-rake "gitlab:uploads:migrate[FileUploader, Project]"
gitlab-rake "gitlab:uploads:migrate[PersonalFileUploader, Snippet]"
gitlab-rake "gitlab:uploads:migrate[NamespaceFileUploader, Snippet]"
gitlab-rake "gitlab:uploads:migrate[FileUploader, MergeRequest]"
```
Currently this has to be executed manually and it will allow you to
migrate the existing uploads to the object storage, but all new
uploads will still be stored on the local disk. In the future
you will be given an option to define a default storage for all
new files.
---
**In installations from source:**
_The uploads are stored by default in
`/home/git/gitlab/public/uploads/-/system`._
1. Edit `/home/git/gitlab/config/gitlab.yml` and add or amend the following
lines:
```yaml
uploads:
object_store:
enabled: true
remote_directory: "uploads" # The bucket name
connection:
provider: AWS # Only AWS supported at the moment
aws_access_key_id: AWS_ACESS_KEY_ID
aws_secret_access_key: AWS_SECRET_ACCESS_KEY
region: eu-central-1
```
1. Save the file and [restart GitLab][] for the changes to take effect.
1. Migrate any existing local uploads to the object storage:
>**Notes:**
- These task comply with the `BATCH` environment variable to process uploads in batch (200 by default). All of the processing will be done in a background worker and requires **no downtime**.
- To migrate in production use `RAILS_ENV=production` environment variable.
```bash
# sudo -u git -H bundle exec rake gitlab:uploads:migrate
# Avatars
sudo -u git -H bundle exec rake "gitlab:uploads:migrate[AvatarUploader, Project, :avatar]"
sudo -u git -H bundle exec rake "gitlab:uploads:migrate[AvatarUploader, Group, :avatar]"
sudo -u git -H bundle exec rake "gitlab:uploads:migrate[AvatarUploader, User, :avatar]"
# Attachments
sudo -u git -H bundle exec rake "gitlab:uploads:migrate[AttachmentUploader, Note, :attachment]"
sudo -u git -H bundle exec rake "gitlab:uploads:migrate[AttachmentUploader, Appearance, :logo]"
sudo -u git -H bundle exec rake "gitlab:uploads:migrate[AttachmentUploader, Appearance, :header_logo]"
# Markdown
sudo -u git -H bundle exec rake "gitlab:uploads:migrate[FileUploader, Project]"
sudo -u git -H bundle exec rake "gitlab:uploads:migrate[PersonalFileUploader, Snippet]"
sudo -u git -H bundle exec rake "gitlab:uploads:migrate[NamespaceFileUploader, Snippet]"
sudo -u git -H bundle exec rake "gitlab:uploads:migrate[FileUploader, MergeRequest]"
```
Currently this has to be executed manually and it will allow you to
migrate the existing uploads to the object storage, but all new
uploads will still be stored on the local disk. In the future
you will be given an option to define a default storage for all
new files.
[reconfigure gitlab]: restart_gitlab.md#omnibus-gitlab-reconfigure "How to reconfigure Omnibus GitLab"
[restart gitlab]: restart_gitlab.md#installations-from-source "How to restart GitLab"
[eep]: https://about.gitlab.com/gitlab-ee/ "GitLab Enterprise Edition Premium"
[ee-3867]: https://gitlab.com/gitlab-org/gitlab-ee/merge_requests/3867
This document was moved to [another location](../administration/geo/replication/object_storage.md).
......@@ -14,3 +14,4 @@ comments: false
- [Webhooks](web_hooks.md)
- [Import](import.md) of git repositories in bulk
- [Rebuild authorized_keys file](http://docs.gitlab.com/ce/raketasks/maintenance.html#rebuild-authorized_keys-file) task for administrators
- [Migrate Uploads](../administration/raketasks/uploads/migrate.md)
......@@ -5,6 +5,7 @@ Documentation on how to use Git LFS are under [Managing large binary files with
## Requirements
* Git LFS is supported in GitLab starting with version 8.2.
* Support for object storage, such as AWS S3, was introduced in 10.0.
* Users need to install [Git LFS client](https://git-lfs.github.com) version 1.0.1 and up.
## Configuration
......@@ -12,16 +13,18 @@ Documentation on how to use Git LFS are under [Managing large binary files with
Git LFS objects can be large in size. By default, they are stored on the server
GitLab is installed on.
There are two configuration options to help GitLab server administrators:
There are various configuration options to help GitLab server administrators:
* Enabling/disabling Git LFS support
* Changing the location of LFS object storage
* Setting up AWS S3 compatible object storage
### Omnibus packages
In `/etc/gitlab/gitlab.rb`:
```ruby
# Change to true to enable lfs
gitlab_rails['lfs_enabled'] = false
# Optionally, change the storage path location. Defaults to
......@@ -35,11 +38,113 @@ gitlab_rails['lfs_storage_path'] = "/mnt/storage/lfs-objects"
In `config/gitlab.yml`:
```yaml
# Change to true to enable lfs
lfs:
enabled: false
storage_path: /mnt/storage/lfs-objects
```
## Setting up S3 compatible object storage
> **Note:** [Introduced][ee-2760] in [GitLab Premium][eep] 10.0.
> Available in [GitLab CE][ce] 10.6
It is possible to store LFS objects on remote object storage instead of on a local disk.
This allows you to offload storage to an external AWS S3 compatible service, freeing up disk space locally. You can also host your own S3 compatible storage decoupled from GitLab, with with a service such as [Minio](https://www.minio.io/).
Object storage currently transfers files first to GitLab, and then on the object storage in a second stage. This can be done either by using a rake task to transfer existing objects, or in a background job after each file is received.
### Object Storage Settings
For source installations the following settings are nested under `lfs:` and then `object_store:`. On omnibus installs they are prefixed by `lfs_object_store_`.
| Setting | Description | Default |
|---------|-------------|---------|
| `enabled` | Enable/disable object storage | `false` |
| `remote_directory` | The bucket name where LFS objects will be stored| |
| `background_upload` | Set to false to disable automatic upload. Option may be removed once upload is direct to S3 | `true` |
| `connection` | Various connection options described below | |
#### S3 compatible connection settings
The connection settings match those provided by [Fog](https://github.com/fog), and are as follows:
| Setting | Description | Default |
|---------|-------------|---------|
| `provider` | Always `AWS` for compatible hosts | AWS |
| `aws_access_key_id` | AWS credentials, or compatible | |
| `aws_secret_access_key` | AWS credentials, or compatible | |
| `region` | AWS region | us-east-1 |
| `host` | S3 compatible host for when not using AWS, e.g. `localhost` or `storage.example.com` | s3.amazonaws.com |
| `endpoint` | Can be used when configuring an S3 compatible service such as [Minio](https://www.minio.io), by entering a URL such as `http://127.0.0.1:9000` | (optional) |
| `path_style` | Set to true to use `host/bucket_name/object` style paths instead of `bucket_name.host/object`. Leave as false for AWS S3 | false |
### From source
1. Edit `/home/git/gitlab/config/gitlab.yml` and add or amend the following
lines:
```yaml
lfs:
enabled: true
object_store:
enabled: false
remote_directory: lfs-objects # Bucket name
connection:
provider: AWS
aws_access_key_id: 1ABCD2EFGHI34JKLM567N
aws_secret_access_key: abcdefhijklmnopQRSTUVwxyz0123456789ABCDE
region: eu-central-1
# Use the following options to configure an AWS compatible host such as Minio
host: 'localhost'
endpoint: 'http://127.0.0.1:9000'
path_style: true
```
1. Save the file and [restart GitLab][] for the changes to take effect.
1. Migrate any existing local LFS objects to the object storage:
```bash
sudo -u git -H bundle exec rake gitlab:lfs:migrate RAILS_ENV=production
```
This will migrate existing LFS objects to object storage. New LFS objects
will be forwarded to object storage unless
`gitlab_rails['lfs_object_store_background_upload']` is set to false.
### In Omnibus
1. Edit `/etc/gitlab/gitlab.rb` and add the following lines by replacing with
the values you want:
```ruby
gitlab_rails['lfs_object_store_enabled'] = true
gitlab_rails['lfs_object_store_remote_directory'] = "lfs-objects"
gitlab_rails['lfs_object_store_connection'] = {
'provider' => 'AWS',
'region' => 'eu-central-1',
'aws_access_key_id' => '1ABCD2EFGHI34JKLM567N',
'aws_secret_access_key' => 'abcdefhijklmnopQRSTUVwxyz0123456789ABCDE',
# The below options configure an S3 compatible host instead of AWS
'host' => 'localhost',
'endpoint' => 'http://127.0.0.1:9000',
'path_style' => true
}
```
1. Save the file and [reconfigure GitLab]s for the changes to take effect.
1. Migrate any existing local LFS objects to the object storage:
```bash
gitlab-rake gitlab:lfs:migrate
```
This will migrate existing LFS objects to object storage. New LFS objects
will be forwarded to object storage unless
`gitlab_rails['lfs_object_store_background_upload']` is set to false.
## Storage statistics
You can see the total storage used for LFS objects on groups and projects
......@@ -48,10 +153,13 @@ and [projects APIs](../../api/projects.md).
## Known limitations
* Currently, storing GitLab Git LFS objects on a non-local storage (like S3 buckets)
is not supported
* Support for removing unreferenced LFS objects was added in 8.14 onwards.
* LFS authentications via SSH was added with GitLab 8.12
* Only compatible with the GitLFS client versions 1.1.0 and up, or 1.0.2.
* The storage statistics currently count each LFS object multiple times for
every project linking to it
[reconfigure gitlab]: ../../administration/restart_gitlab.md#omnibus-gitlab-reconfigure "How to reconfigure Omnibus GitLab"
[restart gitlab]: ../../administration/restart_gitlab.md#installations-from-source "How to restart GitLab"
[eep]: https://about.gitlab.com/products/ "GitLab Premium"
[ee-2760]: https://gitlab.com/gitlab-org/gitlab-ee/merge_requests/2760
......@@ -2,8 +2,21 @@
FactoryBot.define do
factory :appearance do
title "MepMep"
description "This is my Community Edition instance"
title "GitLab Enterprise Edition"
description "Open source software to collaborate on code"
new_project_guidelines "Custom project guidelines"
end
trait :with_logo do
logo { fixture_file_upload('spec/fixtures/dk.png') }
end
trait :with_header_logo do
header_logo { fixture_file_upload('spec/fixtures/dk.png') }
end
trait :with_logos do
with_logo
with_header_logo
end
end
......@@ -6,6 +6,7 @@ FactoryBot.define do
store ObjectStorage::Store::LOCAL
mount_point :avatar
secret nil
store ObjectStorage::Store::LOCAL
# we should build a mount agnostic upload by default
transient do
......@@ -28,6 +29,10 @@ FactoryBot.define do
secret SecureRandom.hex
end
trait :object_storage do
store ObjectStorage::Store::REMOTE
end
trait :namespace_upload do
model { build(:group) }
path { File.join(secret, filename) }
......
......@@ -683,6 +683,34 @@ describe 'Git LFS API and storage' do
expect(json_response['objects'].first['actions']['upload']['href']).to eq("#{Gitlab.config.gitlab.url}/#{project.full_path}.git/gitlab-lfs/objects/#{sample_oid}/#{sample_size}")
expect(json_response['objects'].first['actions']['upload']['header']).to eq('Authorization' => authorization)
end
## EE-specific context
context 'and project is above the limit' do
let(:update_lfs_permissions) do
allow_any_instance_of(EE::Project).to receive_messages(
repository_and_lfs_size: 100.megabytes,
actual_size_limit: 99.megabytes)
end
it 'responds with status 406' do
expect(response).to have_gitlab_http_status(406)
expect(json_response['message']).to eql('Your push has been rejected, because this repository has exceeded its size limit of 99 MB by 1 MB. Please contact your GitLab administrator for more information.')
end
end
context 'and project will go over the limit' do
let(:update_lfs_permissions) do
allow_any_instance_of(EE::Project).to receive_messages(
repository_and_lfs_size: 200.megabytes,
actual_size_limit: 300.megabytes)
end
it 'responds with status 406' do
expect(response).to have_gitlab_http_status(406)
expect(json_response['documentation_url']).to include('/help')
expect(json_response['message']).to eql('Your push has been rejected, because this repository has exceeded its size limit of 300 MB by 50 MB. Please contact your GitLab administrator for more information.')
end
end
end
describe 'when request is authenticated' do
......@@ -997,12 +1025,12 @@ describe 'Git LFS API and storage' do
context 'and workhorse requests upload finalize for a new lfs object' do
before do
lfs_object.destroy
allow_any_instance_of(LfsObjectUploader).to receive(:exists?) { false }
end
context 'with object storage disabled' do
it "doesn't attempt to migrate file to object storage" do
expect(ObjectStorageUploadWorker).not_to receive(:perform_async)
expect(ObjectStorage::BackgroundMoveWorker).not_to receive(:perform_async)
put_finalize(with_tempfile: true)
end
......@@ -1014,7 +1042,7 @@ describe 'Git LFS API and storage' do
end
it 'schedules migration of file to object storage' do
expect(ObjectStorageUploadWorker).to receive(:perform_async).with('LfsObjectUploader', 'LfsObject', :file, kind_of(Numeric))
expect(ObjectStorage::BackgroundMoveWorker).to receive(:perform_async).with('LfsObjectUploader', 'LfsObject', :file, kind_of(Numeric))
put_finalize(with_tempfile: true)
end
......
......@@ -89,6 +89,29 @@ describe FileUploader do
end
end
describe 'callbacks' do
describe '#prune_store_dir after :remove' do
before do
uploader.store!(fixture_file_upload('spec/fixtures/doc_sample.txt'))
end
def store_dir
File.expand_path(uploader.store_dir, uploader.root)
end
it 'is called' do
expect(uploader).to receive(:prune_store_dir).once
uploader.remove!
end
it 'prune the store directory' do
expect { uploader.remove! }
.to change { File.exist?(store_dir) }.from(true).to(false)
end
end
end
describe "#migrate!" do
before do
uploader.store!(fixture_file_upload(Rails.root.join('spec/fixtures/dk.png')))
......@@ -98,4 +121,24 @@ describe FileUploader do
it_behaves_like "migrates", to_store: described_class::Store::REMOTE
it_behaves_like "migrates", from_store: described_class::Store::REMOTE, to_store: described_class::Store::LOCAL
end
describe '#upload=' do
let(:secret) { SecureRandom.hex }
let(:upload) { create(:upload, :issuable_upload, secret: secret, filename: 'file.txt') }
it 'handles nil' do
expect(uploader).not_to receive(:apply_context!)
uploader.upload = nil
end
it 'extract the uploader context from it' do
expect(uploader).to receive(:apply_context!).with(a_hash_including(secret: secret, identifier: 'file.txt'))
uploader.upload = upload
end
it_behaves_like "migrates", to_store: described_class::Store::REMOTE
it_behaves_like "migrates", from_store: described_class::Store::REMOTE, to_store: described_class::Store::LOCAL
end
end
......@@ -26,7 +26,7 @@ describe LfsObjectUploader do
describe 'migration to object storage' do
context 'with object storage disabled' do
it "is skipped" do
expect(ObjectStorageUploadWorker).not_to receive(:perform_async)
expect(ObjectStorage::BackgroundMoveWorker).not_to receive(:perform_async)
lfs_object
end
......@@ -38,7 +38,7 @@ describe LfsObjectUploader do
end
it 'is scheduled to run after creation' do
expect(ObjectStorageUploadWorker).to receive(:perform_async).with(described_class.name, 'LfsObject', :file, kind_of(Numeric))
expect(ObjectStorage::BackgroundMoveWorker).to receive(:perform_async).with(described_class.name, 'LfsObject', :file, kind_of(Numeric))
lfs_object
end
......@@ -50,7 +50,7 @@ describe LfsObjectUploader do
end
it 'is skipped' do
expect(ObjectStorageUploadWorker).not_to receive(:perform_async)
expect(ObjectStorage::BackgroundMoveWorker).not_to receive(:perform_async)
lfs_object
end
......@@ -67,7 +67,7 @@ describe LfsObjectUploader do
end
it 'can store file remotely' do
allow(ObjectStorageUploadWorker).to receive(:perform_async)
allow(ObjectStorage::BackgroundMoveWorker).to receive(:perform_async)
store_file(lfs_object)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment