Merge branch 'live-trace-v2' into 'master'

New CI Job live-trace architecture (v2) Closes #44935 See merge request gitlab-org/gitlab-ce!18169

Merge branch 'live-trace-v2' into 'master'
bf4073d5 · Grzegorz Bizon · 892b371d · e1d11cc6 · bf4073d5 · bf4073d5
Commit bf4073d5 authored May 07, 2018 by Grzegorz Bizon
29 changed files
--- a/.flayignore
+++ b/.flayignore
@@ -9,3 +9,4 @@ lib/gitlab/gitaly_client/operation_service.rb
 lib/gitlab/background_migration/*
 app/models/project_services/kubernetes_service.rb
 lib/gitlab/workhorse.rb
+lib/gitlab/ci/trace/chunked_io.rb
--- a/app/models/ci/build.rb
+++ b/app/models/ci/build.rb
@@ -19,6 +19,7 @@ module Ci

    has_one :last_deployment, -> { order('deployments.id DESC') }, as: :deployable, class_name: 'Deployment'
    has_many :trace_sections, class_name: 'Ci::BuildTraceSection'
+    has_many :trace_chunks, class_name: 'Ci::BuildTraceChunk', foreign_key: :build_id, dependent: :destroy # rubocop:disable Cop/ActiveRecordDependent

    has_many :job_artifacts, class_name: 'Ci::JobArtifact', foreign_key: :job_id, dependent: :destroy, inverse_of: :job # rubocop:disable Cop/ActiveRecordDependent
    has_one :job_artifacts_archive, -> { where(file_type: Ci::JobArtifact.file_types[:archive]) }, class_name: 'Ci::JobArtifact', inverse_of: :job, foreign_key: :job_id

--- a/app/models/ci/build_trace_chunk.rb
+++ b/app/models/ci/build_trace_chunk.rb
+module Ci
+  class BuildTraceChunk < ActiveRecord::Base
+    extend Gitlab::Ci::Model
+
+    belongs_to :build, class_name: "Ci::Build", foreign_key: :build_id
+
+    after_destroy :redis_delete_data, if: :redis?
+
+    default_value_for :data_store, :redis
+
+    WriteError = Class.new(StandardError)
+
+    CHUNK_SIZE = 128.kilobytes
+    CHUNK_REDIS_TTL = 1.week
+    WRITE_LOCK_RETRY = 10
+    WRITE_LOCK_SLEEP = 0.01.seconds
+    WRITE_LOCK_TTL = 1.minute
+
+    enum data_store: {
+      redis: 1,
+      db: 2
+    }
+
+    ##
+    # Data is memoized for optimizing #size and #end_offset
+    def data
+      @data ||= get_data.to_s
+    end
+
+    def truncate(offset = 0)
+      raise ArgumentError, 'Offset is out of range' if offset > size || offset < 0
+      return if offset == size # Skip the following process as it doesn't affect anything
+
+      self.append("", offset)
+    end
+
+    def append(new_data, offset)
+      raise ArgumentError, 'Offset is out of range' if offset > size || offset < 0
+      raise ArgumentError, 'Chunk size overflow' if CHUNK_SIZE < (offset + new_data.bytesize)
+
+      set_data(data.byteslice(0, offset) + new_data)
+    end
+
+    def size
+      data&.bytesize.to_i
+    end
+
+    def start_offset
+      chunk_index * CHUNK_SIZE
+    end
+
+    def end_offset
+      start_offset + size
+    end
+
+    def range
+      (start_offset...end_offset)
+    end
+
+    def use_database!
+      in_lock do
+        break if db?
+        break unless size > 0
+
+        self.update!(raw_data: data, data_store: :db)
+        redis_delete_data
+      end
+    end
+
+    private
+
+    def get_data
+      if redis?
+        redis_data
+      elsif db?
+        raw_data
+      else
+        raise 'Unsupported data store'
+      end&.force_encoding(Encoding::BINARY) # Redis/Database return UTF-8 string as default
+    end
+
+    def set_data(value)
+      raise ArgumentError, 'too much data' if value.bytesize > CHUNK_SIZE
+
+      in_lock do
+        if redis?
+          redis_set_data(value)
+        elsif db?
+          self.raw_data = value
+        else
+          raise 'Unsupported data store'
+        end
+
+        @data = value
+
+        save! if changed?
+      end
+
+      schedule_to_db if full?
+    end
+
+    def schedule_to_db
+      return if db?
+
+      Ci::BuildTraceChunkFlushWorker.perform_async(id)
+    end
+
+    def full?
+      size == CHUNK_SIZE
+    end
+
+    def redis_data
+      Gitlab::Redis::SharedState.with do |redis|
+        redis.get(redis_data_key)
+      end
+    end
+
+    def redis_set_data(data)
+      Gitlab::Redis::SharedState.with do |redis|
+        redis.set(redis_data_key, data, ex: CHUNK_REDIS_TTL)
+      end
+    end
+
+    def redis_delete_data
+      Gitlab::Redis::SharedState.with do |redis|
+        redis.del(redis_data_key)
+      end
+    end
+
+    def redis_data_key
+      "gitlab:ci:trace:#{build_id}:chunks:#{chunk_index}"
+    end
+
+    def redis_lock_key
+      "trace_write:#{build_id}:chunks:#{chunk_index}"
+    end
+
+    def in_lock
+      lease = Gitlab::ExclusiveLease.new(redis_lock_key, timeout: WRITE_LOCK_TTL)
+      retry_count = 0
+
+      until uuid = lease.try_obtain
+        # Keep trying until we obtain the lease. To prevent hammering Redis too
+        # much we'll wait for a bit between retries.
+        sleep(WRITE_LOCK_SLEEP)
+        break if WRITE_LOCK_RETRY < (retry_count += 1)
+      end
+
+      raise WriteError, 'Failed to obtain write lock' unless uuid
+
+      self.reload if self.persisted?
+      return yield
+    ensure
+      Gitlab::ExclusiveLease.cancel(redis_lock_key, uuid)
+    end
+  end
+end
--- a/app/workers/all_queues.yml
+++ b/app/workers/all_queues.yml
@@ -52,6 +52,7 @@
 - pipeline_creation:create_pipeline
 - pipeline_creation:run_pipeline_schedule
 - pipeline_background:archive_trace
+- pipeline_background:ci_build_trace_chunk_flush
 - pipeline_default:build_coverage
 - pipeline_default:build_trace_sections
 - pipeline_default:pipeline_metrics

--- a/app/workers/ci/build_trace_chunk_flush_worker.rb
+++ b/app/workers/ci/build_trace_chunk_flush_worker.rb
+module Ci
+  class BuildTraceChunkFlushWorker
+    include ApplicationWorker
+    include PipelineBackgroundQueue
+
+    def perform(build_trace_chunk_id)
+      ::Ci::BuildTraceChunk.find_by(id: build_trace_chunk_id).try do |build_trace_chunk|
+        build_trace_chunk.use_database!
+      end
+    end
+  end
+end
--- a/changelogs/unreleased/live-trace-v2.yml
+++ b/changelogs/unreleased/live-trace-v2.yml
+---
+title: New CI Job live-trace architecture
+merge_request: 18169
+author:
+type: changed
--- a/db/migrate/20180326202229_create_ci_build_trace_chunks.rb
+++ b/db/migrate/20180326202229_create_ci_build_trace_chunks.rb
+class CreateCiBuildTraceChunks < ActiveRecord::Migration
+  include Gitlab::Database::MigrationHelpers
+
+  DOWNTIME = false
+
+  def change
+    create_table :ci_build_trace_chunks, id: :bigserial do |t|
+      t.integer :build_id, null: false
+      t.integer :chunk_index, null: false
+      t.integer :data_store, null: false
+      t.binary :raw_data
+
+      t.foreign_key :ci_builds, column: :build_id, on_delete: :cascade
+      t.index [:build_id, :chunk_index], unique: true
+    end
+  end
+end
--- a/db/migrate/20180406204716_add_limits_ci_build_trace_chunks_raw_data_for_mysql.rb
+++ b/db/migrate/20180406204716_add_limits_ci_build_trace_chunks_raw_data_for_mysql.rb
+# See http://doc.gitlab.com/ce/development/migration_style_guide.html
+# for more information on how to write migrations for GitLab.
+require Rails.root.join('db/migrate/limits_ci_build_trace_chunks_raw_data_for_mysql')
+
+class AddLimitsCiBuildTraceChunksRawDataForMysql < ActiveRecord::Migration
+  include Gitlab::Database::MigrationHelpers
+
+  DOWNTIME = false
+
+  def up
+    LimitsCiBuildTraceChunksRawDataForMysql.new.up
+  end
+end
--- a/db/migrate/limits_ci_build_trace_chunks_raw_data_for_mysql.rb
+++ b/db/migrate/limits_ci_build_trace_chunks_raw_data_for_mysql.rb
+class LimitsCiBuildTraceChunksRawDataForMysql < ActiveRecord::Migration
+  def up
+    return unless Gitlab::Database.mysql?
+
+    # Mysql needs MEDIUMTEXT type (up to 16MB) rather than TEXT (up to 64KB)
+    # Because 'raw_data' is always capped by Ci::BuildTraceChunk::CHUNK_SIZE, which is 128KB
+    change_column :ci_build_trace_chunks, :raw_data, :binary, limit: 16.megabytes - 1 #MEDIUMTEXT
+  end
+end
--- a/db/schema.rb
+++ b/db/schema.rb
@@ -253,6 +253,15 @@ ActiveRecord::Schema.define(version: 20180503200320) do

  add_index "chat_teams", ["namespace_id"], name: "index_chat_teams_on_namespace_id", unique: true, using: :btree

+  create_table "ci_build_trace_chunks", id: :bigserial, force: :cascade do |t|
+    t.integer "build_id", null: false
+    t.integer "chunk_index", null: false
+    t.integer "data_store", null: false
+    t.binary "raw_data"
+  end
+
+  add_index "ci_build_trace_chunks", ["build_id", "chunk_index"], name: "index_ci_build_trace_chunks_on_build_id_and_chunk_index", unique: true, using: :btree
+
  create_table "ci_build_trace_section_names", force: :cascade do |t|
    t.integer "project_id", null: false
    t.string "name", null: false
@@ -2110,6 +2119,7 @@ ActiveRecord::Schema.define(version: 20180503200320) do
  add_foreign_key "boards", "namespaces", column: "group_id", on_delete: :cascade
  add_foreign_key "boards", "projects", name: "fk_f15266b5f9", on_delete: :cascade
  add_foreign_key "chat_teams", "namespaces", on_delete: :cascade
+  add_foreign_key "ci_build_trace_chunks", "ci_builds", column: "build_id", on_delete: :cascade
  add_foreign_key "ci_build_trace_section_names", "projects", on_delete: :cascade
  add_foreign_key "ci_build_trace_sections", "ci_build_trace_section_names", column: "section_name_id", name: "fk_264e112c66", on_delete: :cascade
  add_foreign_key "ci_build_trace_sections", "ci_builds", column: "build_id", name: "fk_4ebe41f502", on_delete: :cascade

--- a/doc/administration/job_traces.md
+++ b/doc/administration/job_traces.md
@@ -40,3 +40,98 @@ To change the location where the job logs will be stored, follow the steps below

 [reconfigure gitlab]: restart_gitlab.md#omnibus-gitlab-reconfigure "How to reconfigure Omnibus GitLab"
 [restart gitlab]: restart_gitlab.md#installations-from-source "How to restart GitLab"
+
+## New live trace architecture
+
+> [Introduced][ce-18169] in GitLab 10.4.
+
+> **Notes**:
+- This feature is still Beta, which could impact GitLab.com/on-premises instances, and in the worst case scenario, traces will be lost.
+- This feature is still being discussed in [an issue](https://gitlab.com/gitlab-org/gitlab-ce/issues/46097) for the performance improvements.
+- This feature is off by default. Please check below how to enable/disable this featrue.
+
+**What is "live trace"?**
+
+Job trace that is sent by runner while jobs are running. You can see live trace in job pages UI.
+The live traces are archived once job finishes.
+
+**What is new architecture?**
+
+So far, when GitLab Runner sends a job trace to GitLab-Rails, traces have been saved to file storage as text files.
+This was a problem for [Cloud Native-compatible GitLab application](https://gitlab.com/gitlab-com/migration/issues/23) where GitLab had to rely on File Storage.
+
+This new live trace architecture stores chunks of traces in Redis and database instead of file storage.
+Redis is used as first-class storage, and it stores up-to 128kB. Once the full chunk is sent it will be flushed to database. Afterwhile, the data in Redis and database will be archived to ObjectStorage.
+
+Here is the detailed data flow.
+
+1. GitLab Runner picks a job from GitLab-Rails
+1. GitLab Runner sends a piece of trace to GitLab-Rails
+1. GitLab-Rails appends the data to Redis
+1. If the data in Redis is fulfilled 128kB, the data is flushed to Database.
+1. 2.~4. is continued until the job is finished
+1. Once the job is finished, GitLab-Rails schedules a sidekiq worker to archive the trace
+1. The sidekiq worker archives the trace to Object Storage, and cleanup the trace in Redis and Database
+
+**How to check if it's on or off?**
+
+```ruby
+Feature.enabled?('ci_enable_live_trace')
+```
+
+**How to enable?**
+
+```ruby
+Feature.enable('ci_enable_live_trace')
+```
+
+>**Note:**
+The transition period will be handled gracefully. Upcoming traces will be generated with the new architecture, and on-going live traces will stay with the legacy architecture (i.e. on-going live traces won't be re-generated forcibly with the new architecture).
+
+**How to disable?**
+
+```ruby
+Feature.disable('ci_enable_live_trace')
+```
+
+>**Note:**
+The transition period will be handled gracefully. Upcoming traces will be generated with the legacy architecture, and on-going live traces will stay with the new architecture (i.e. on-going live traces won't be re-generated forcibly with the legacy architecture).
+
+**Redis namespace:**
+
+`Gitlab::Redis::SharedState`
+
+**Potential impact:**
+
+- This feature could incur data loss:
+  - Case 1: When all data in Redis are accidentally flushed.
+    - On-going live traces could be recovered by re-sending traces (This is supported by all versions of GitLab Runner)
+    - Finished jobs which has not archived live traces will lose the last part (~128kB) of trace data.
+  - Case 2: When sidekiq workers failed to archive (e.g. There was a bug that prevents archiving process, Sidekiq inconsistancy, etc):
+    - Currently all trace data in Redis will be deleted after one week. If the sidekiq workers can't finish by the expiry date, the part of trace data will be lost.
+- This feature could consume all memory on Redis instance. If the number of jobs is 1000, 128MB (128kB * 1000) is consumed.
+- This feature could pressure Database replication lag. `INSERT` are generated to indicate that we have trace chunk. `UPDATE` with 128kB of data is issued once we receive multiple chunks.
+- and so on
+
+**How to test?**
+
+We're currently evaluating this feature on dev.gitalb.org or staging.gitlab.com to verify this features. Here is the list of tests/measurements.
+
+- Features:
+  - Live traces should be visible on job pages
+  - Archived traces should be visible on job pages
+  - Live traces should be archived to Object storage
+  - Live traces should be cleaned up after archived
+  - etc
+- Performance:
+  - Schedule 1000~10000 jobs and let GitLab-runners process concurrently. Measure memoery presssure, IO load, etc.
+  - etc
+- Failover:
+  - Simulate Redis outage
+  - etc
+
+**How to verify the correctnesss?**
+
+- TBD
+
+[ce-44935]: https://gitlab.com/gitlab-org/gitlab-ce/merge_requests/18169
--- a/lib/api/runner.rb
+++ b/lib/api/runner.rb
@@ -153,9 +153,20 @@ module API
        content_range = request.headers['Content-Range']
        content_range = content_range.split('-')

-        stream_size = job.trace.append(request.body.read, content_range[0].to_i)
-        if stream_size < 0
-          break error!('416 Range Not Satisfiable', 416, { 'Range' => "0-#{-stream_size}" })
+        # TODO:
+        # it seems that `Content-Range` as formatted by runner is wrong,
+        # the `byte_end` should point to final byte, but it points byte+1
+        # that means that we have to calculate end of body,
+        # as we cannot use `content_length[1]`
+        # Issue: https://gitlab.com/gitlab-org/gitlab-runner/issues/3275
+
+        body_data = request.body.read
+        body_start = content_range[0].to_i
+        body_end = body_start + body_data.bytesize
+
+        stream_size = job.trace.append(body_data, body_start)
+        unless stream_size == body_end
+          break error!('416 Range Not Satisfiable', 416, { 'Range' => "0-#{stream_size}" })
        end

        status 202

--- a/lib/gitlab/ci/trace.rb
+++ b/lib/gitlab/ci/trace.rb
@@ -36,16 +36,16 @@ module Gitlab
      end

      def set(data)
-        write do |stream|
+        write('w+b') do |stream|
          data = job.hide_secrets(data)
          stream.set(data)
        end
      end

      def append(data, offset)
-        write do |stream|
+        write('a+b') do |stream|
          current_length = stream.size
-          break -current_length unless current_length == offset
+          break current_length unless current_length == offset

          data = job.hide_secrets(data)
          stream.append(data, offset)
@@ -54,13 +54,15 @@ module Gitlab
      end

      def exist?
-        trace_artifact&.exists? || current_path.present? || old_trace.present?
+        trace_artifact&.exists? || job.trace_chunks.any? || current_path.present? || old_trace.present?
      end

      def read
        stream = Gitlab::Ci::Trace::Stream.new do
          if trace_artifact
            trace_artifact.open
+          elsif job.trace_chunks.any?
+            Gitlab::Ci::Trace::ChunkedIO.new(job)
          elsif current_path
            File.open(current_path, "rb")
          elsif old_trace
@@ -73,9 +75,15 @@ module Gitlab
        stream&.close
      end

-      def write
+      def write(mode)
        stream = Gitlab::Ci::Trace::Stream.new do
-          File.open(ensure_path, "a+b")
+          if current_path
+            File.open(current_path, mode)
+          elsif Feature.enabled?('ci_enable_live_trace')
+            Gitlab::Ci::Trace::ChunkedIO.new(job)
+          else
+            File.open(ensure_path, mode)
+          end
        end

        yield(stream).tap do
@@ -92,6 +100,7 @@ module Gitlab
          FileUtils.rm(trace_path, force: true)
        end

+        job.trace_chunks.destroy_all
        job.erase_old_trace!
      end

@@ -99,7 +108,12 @@ module Gitlab
        raise ArchiveError, 'Already archived' if trace_artifact
        raise ArchiveError, 'Job is not finished yet' unless job.complete?

-        if current_path
+        if job.trace_chunks.any?
+          Gitlab::Ci::Trace::ChunkedIO.new(job) do |stream|
+            archive_stream!(stream)
+            stream.destroy!
+          end
+        elsif current_path
          File.open(current_path) do |stream|
            archive_stream!(stream)
            FileUtils.rm(current_path)
@@ -116,7 +130,7 @@ module Gitlab

      def archive_stream!(stream)
        clone_file!(stream, JobArtifactUploader.workhorse_upload_path) do |clone_path|
-          create_job_trace!(job, clone_path)
+          create_build_trace!(job, clone_path)
        end
      end

@@ -132,7 +146,7 @@ module Gitlab
        end
      end

-      def create_job_trace!(job, path)
+      def create_build_trace!(job, path)
        File.open(path) do |stream|
          job.create_job_artifacts_trace!(
            project: job.project,

--- a/lib/gitlab/ci/trace/chunked_io.rb
+++ b/lib/gitlab/ci/trace/chunked_io.rb
+##
+# This class is compatible with IO class (https://ruby-doc.org/core-2.3.1/IO.html)
+# source: https://gitlab.com/snippets/1685610
+module Gitlab
+  module Ci
+    class Trace
+      class ChunkedIO
+        CHUNK_SIZE = ::Ci::BuildTraceChunk::CHUNK_SIZE
+
+        FailedToGetChunkError = Class.new(StandardError)
+
+        attr_reader :build
+        attr_reader :tell, :size
+        attr_reader :chunk, :chunk_range
+
+        alias_method :pos, :tell
+
+        def initialize(build, &block)
+          @build = build
+          @chunks_cache = []
+          @tell = 0
+          @size = calculate_size
+          yield self if block_given?
+        end
+
+        def close
+          # no-op
+        end
+
+        def binmode
+          # no-op
+        end
+
+        def binmode?
+          true
+        end
+
+        def seek(pos, where = IO::SEEK_SET)
+          new_pos =
+            case where
+            when IO::SEEK_END
+              size + pos
+            when IO::SEEK_SET
+              pos
+            when IO::SEEK_CUR
+              tell + pos
+            else
+              -1
+            end
+
+          raise ArgumentError, 'new position is outside of file' if new_pos < 0 || new_pos > size
+
+          @tell = new_pos
+        end
+
+        def eof?
+          tell == size
+        end
+
+        def each_line
+          until eof?
+            line = readline
+            break if line.nil?
+
+            yield(line)
+          end
+        end
+
+        def read(length = nil, outbuf = "")
+          out = ""
+
+          length ||= size - tell
+
+          until length <= 0 || eof?
+            data = chunk_slice_from_offset
+            break if data.empty?
+
+            chunk_bytes = [CHUNK_SIZE - chunk_offset, length].min
+            chunk_data = data.byteslice(0, chunk_bytes)
+
+            out << chunk_data
+            @tell += chunk_data.bytesize
+            length -= chunk_data.bytesize
+          end
+
+          # If outbuf is passed, we put the output into the buffer. This supports IO.copy_stream functionality
+          if outbuf
+            outbuf.slice!(0, outbuf.bytesize)
+            outbuf << out
+          end
+
+          out
+        end
+
+        def readline
+          out = ""
+
+          until eof?
+            data = chunk_slice_from_offset
+            new_line = data.index("\n")
+
+            if !new_line.nil?
+              out << data[0..new_line]
+              @tell += new_line + 1
+              break
+            else
+              out << data
+              @tell += data.bytesize
+            end
+          end
+
+          out
+        end
+
+        def write(data)
+          start_pos = tell
+
+          while tell < start_pos + data.bytesize
+            # get slice from current offset till the end where it falls into chunk
+            chunk_bytes = CHUNK_SIZE - chunk_offset
+            chunk_data = data.byteslice(tell - start_pos, chunk_bytes)
+
+            # append data to chunk, overwriting from that point
+            ensure_chunk.append(chunk_data, chunk_offset)
+
+            # move offsets within buffer
+            @tell += chunk_data.bytesize
+            @size = [size, tell].max
+          end
+
+          tell - start_pos
+        ensure
+          invalidate_chunk_cache
+        end
+
+        def truncate(offset)
+          raise ArgumentError, 'Outside of file' if offset > size || offset < 0
+          return if offset == size # Skip the following process as it doesn't affect anything
+
+          @tell = offset
+          @size = offset
+
+          # remove all next chunks
+          trace_chunks.where('chunk_index > ?', chunk_index).destroy_all
+
+          # truncate current chunk
+          current_chunk.truncate(chunk_offset)
+        ensure
+          invalidate_chunk_cache
+        end
+
+        def flush
+          # no-op
+        end
+
+        def present?
+          true
+        end
+
+        def destroy!
+          trace_chunks.destroy_all
+          @tell = @size = 0
+        ensure
+          invalidate_chunk_cache
+        end
+
+        private
+
+        ##
+        # The below methods are not implemented in IO class
+        #
+        def in_range?
+          @chunk_range&.include?(tell)
+        end
+
+        def chunk_slice_from_offset
+          unless in_range?
+            current_chunk.tap do |chunk|
+              raise FailedToGetChunkError unless chunk
+
+              @chunk = chunk.data
+              @chunk_range = chunk.range
+            end
+          end
+
+          @chunk[chunk_offset..CHUNK_SIZE]
+        end
+
+        def chunk_offset
+          tell % CHUNK_SIZE
+        end
+
+        def chunk_index
+          tell / CHUNK_SIZE
+        end
+
+        def chunk_start
+          chunk_index * CHUNK_SIZE
+        end
+
+        def chunk_end
+          [chunk_start + CHUNK_SIZE, size].min
+        end
+
+        def invalidate_chunk_cache
+          @chunks_cache = []
+        end
+
+        def current_chunk
+          @chunks_cache[chunk_index] ||= trace_chunks.find_by(chunk_index: chunk_index)
+        end
+
+        def build_chunk
+          @chunks_cache[chunk_index] = ::Ci::BuildTraceChunk.new(build: build, chunk_index: chunk_index)
+        end
+
+        def ensure_chunk
+          current_chunk || build_chunk
+        end
+
+        def trace_chunks
+          ::Ci::BuildTraceChunk.where(build: build)
+        end
+
+        def calculate_size
+          trace_chunks.order(chunk_index: :desc).first.try(&:end_offset).to_i
+        end
+      end
+    end
+  end
+end
--- a/lib/gitlab/ci/trace/stream.rb
+++ b/lib/gitlab/ci/trace/stream.rb
@@ -39,6 +39,8 @@ module Gitlab
        end

        def append(data, offset)
+          data = data.force_encoding(Encoding::BINARY)
+
          stream.truncate(offset)
          stream.seek(0, IO::SEEK_END)
          stream.write(data)
@@ -46,8 +48,11 @@ module Gitlab
        end

        def set(data)
-          truncate(0)
+          data = data.force_encoding(Encoding::BINARY)
+
+          stream.seek(0, IO::SEEK_SET)
          stream.write(data)
+          stream.truncate(data.bytesize)
          stream.flush()
        end

@@ -127,11 +132,11 @@ module Gitlab
            buf += debris
            debris, *lines = buf.each_line.to_a
            lines.reverse_each do |line|
-              yield(line.force_encoding('UTF-8'))
+              yield(line.force_encoding(Encoding.default_external))
            end
          end

-          yield(debris.force_encoding('UTF-8')) unless debris.empty?
+          yield(debris.force_encoding(Encoding.default_external)) unless debris.empty?
        end

        def read_backward(length)

--- a/lib/tasks/migrate/add_limits_mysql.rake
+++ b/lib/tasks/migrate/add_limits_mysql.rake
 require Rails.root.join('db/migrate/limits_to_mysql')
 require Rails.root.join('db/migrate/markdown_cache_limits_to_mysql')
 require Rails.root.join('db/migrate/merge_request_diff_file_limits_to_mysql')
+require Rails.root.join('db/migrate/limits_ci_build_trace_chunks_raw_data_for_mysql')

 desc "GitLab | Add limits to strings in mysql database"
 task add_limits_mysql: :environment do
@@ -8,4 +9,5 @@ task add_limits_mysql: :environment do
  LimitsToMysql.new.up
  MarkdownCacheLimitsToMysql.new.up
  MergeRequestDiffFileLimitsToMysql.new.up
+  LimitsCiBuildTraceChunksRawDataForMysql.new.up
 end
--- a/spec/controllers/projects/jobs_controller_spec.rb
+++ b/spec/controllers/projects/jobs_controller_spec.rb
 # coding: utf-8
 require 'spec_helper'

-describe Projects::JobsController do
+describe Projects::JobsController, :clean_gitlab_redis_shared_state do
  include ApiHelpers
  include HttpIOHelpers

@@ -10,6 +10,7 @@ describe Projects::JobsController do
  let(:user) { create(:user) }

  before do
+    stub_feature_flags(ci_enable_live_trace: true)
    stub_not_protect_default_branch
  end


--- a/spec/factories/ci/build_trace_chunks.rb
+++ b/spec/factories/ci/build_trace_chunks.rb
+FactoryBot.define do
+  factory :ci_build_trace_chunk, class: Ci::BuildTraceChunk do
+    build factory: :ci_build
+    chunk_index 0
+    data_store :redis
+  end
+end
--- a/spec/features/projects/jobs_spec.rb
+++ b/spec/features/projects/jobs_spec.rb
 require 'spec_helper'
 require 'tempfile'

-feature 'Jobs' do
+feature 'Jobs', :clean_gitlab_redis_shared_state do
  let(:user) { create(:user) }
  let(:user_access_level) { :developer }
  let(:project) { create(:project, :repository) }
@@ -282,7 +282,7 @@ feature 'Jobs' do
        it 'loads job trace' do
          expect(page).to have_content 'BUILD TRACE'

-          job.trace.write do |stream|
+          job.trace.write('a+b') do |stream|
            stream.append(' and more trace', 11)
          end

@@ -593,44 +593,6 @@ feature 'Jobs' do
      end
    end

-    context 'storage form' do
-      let(:existing_file) { Tempfile.new('existing-trace-file').path }
-
-      before do
-        job.run!
-      end
-
-      context 'when job has trace in file', :js do
-        before do
-          allow_any_instance_of(Gitlab::Ci::Trace)
-            .to receive(:paths)
-            .and_return([existing_file])
-        end
-
-        it 'sends the right headers' do
-          requests = inspect_requests(inject_headers: { 'X-Sendfile-Type' => 'X-Sendfile' }) do
-            visit raw_project_job_path(project, job)
-          end
-          expect(requests.first.response_headers['Content-Type']).to eq('text/plain; charset=utf-8')
-          expect(requests.first.response_headers['X-Sendfile']).to eq(existing_file)
-        end
-      end
-
-      context 'when job has trace in the database', :js do
-        before do
-          allow_any_instance_of(Gitlab::Ci::Trace)
-            .to receive(:paths)
-            .and_return([])
-
-          visit project_job_path(project, job)
-        end
-
-        it 'sends the right headers' do
-          expect(page).not_to have_selector('.js-raw-link-controller')
-        end
-      end
-    end
-
    context "when visiting old URL" do
      let(:raw_job_url) do
        raw_project_job_path(project, job)

--- a/spec/lib/gitlab/ci/trace/chunked_io_spec.rb
+++ b/spec/lib/gitlab/ci/trace/chunked_io_spec.rb
+require 'spec_helper'
+
+describe Gitlab::Ci::Trace::ChunkedIO, :clean_gitlab_redis_cache do
+  include ChunkedIOHelpers
+
+  set(:build) { create(:ci_build, :running) }
+  let(:chunked_io) { described_class.new(build) }
+
+  before do
+    stub_feature_flags(ci_enable_live_trace: true)
+  end
+
+  context "#initialize" do
+    context 'when a chunk exists' do
+      before do
+        build.trace.set('ABC')
+      end
+
+      it { expect(chunked_io.size).to eq(3) }
+    end
+
+    context 'when two chunks exist' do
+      before do
+        stub_buffer_size(4)
+        build.trace.set('ABCDEF')
+      end
+
+      it { expect(chunked_io.size).to eq(6) }
+    end
+
+    context 'when no chunks exists' do
+      it { expect(chunked_io.size).to eq(0) }
+    end
+  end
+
+  context "#seek" do
+    subject { chunked_io.seek(pos, where) }
+
+    before do
+      build.trace.set(sample_trace_raw)
+    end
+
+    context 'when moves pos to end of the file' do
+      let(:pos) { 0 }
+      let(:where) { IO::SEEK_END }
+
+      it { is_expected.to eq(sample_trace_raw.bytesize) }
+    end
+
+    context 'when moves pos to middle of the file' do
+      let(:pos) { sample_trace_raw.bytesize / 2 }
+      let(:where) { IO::SEEK_SET }
+
+      it { is_expected.to eq(pos) }
+    end
+
+    context 'when moves pos around' do
+      it 'matches the result' do
+        expect(chunked_io.seek(0)).to eq(0)
+        expect(chunked_io.seek(100, IO::SEEK_CUR)).to eq(100)
+        expect { chunked_io.seek(sample_trace_raw.bytesize + 1, IO::SEEK_CUR) }
+          .to raise_error('new position is outside of file')
+      end
+    end
+  end
+
+  context "#eof?" do
+    subject { chunked_io.eof? }
+
+    before do
+      build.trace.set(sample_trace_raw)
+    end
+
+    context 'when current pos is at end of the file' do
+      before do
+        chunked_io.seek(sample_trace_raw.bytesize, IO::SEEK_SET)
+      end
+
+      it { is_expected.to be_truthy }
+    end
+
+    context 'when current pos is not at end of the file' do
+      before do
+        chunked_io.seek(0, IO::SEEK_SET)
+      end
+
+      it { is_expected.to be_falsey }
+    end
+  end
+
+  context "#each_line" do
+    let(:string_io) { StringIO.new(sample_trace_raw) }
+
+    context 'when buffer size is smaller than file size' do
+      before do
+        stub_buffer_size(sample_trace_raw.bytesize / 2)
+        build.trace.set(sample_trace_raw)
+      end
+
+      it 'yields lines' do
+        expect { |b| chunked_io.each_line(&b) }
+          .to yield_successive_args(*string_io.each_line.to_a)
+      end
+    end
+
+    context 'when buffer size is larger than file size' do
+      before do
+        stub_buffer_size(sample_trace_raw.bytesize * 2)
+        build.trace.set(sample_trace_raw)
+      end
+
+      it 'calls get_chunk only once' do
+        expect_any_instance_of(Gitlab::Ci::Trace::ChunkedIO)
+          .to receive(:current_chunk).once.and_call_original
+
+        chunked_io.each_line { |line| }
+      end
+    end
+  end
+
+  context "#read" do
+    subject { chunked_io.read(length) }
+
+    context 'when read the whole size' do
+      let(:length) { nil }
+
+      context 'when buffer size is smaller than file size' do
+        before do
+          stub_buffer_size(sample_trace_raw.bytesize / 2)
+          build.trace.set(sample_trace_raw)
+        end
+
+        it { is_expected.to eq(sample_trace_raw) }
+      end
+
+      context 'when buffer size is larger than file size' do
+        before do
+          stub_buffer_size(sample_trace_raw.bytesize * 2)
+          build.trace.set(sample_trace_raw)
+        end
+
+        it { is_expected.to eq(sample_trace_raw) }
+      end
+    end
+
+    context 'when read only first 100 bytes' do
+      let(:length) { 100 }
+
+      context 'when buffer size is smaller than file size' do
+        before do
+          stub_buffer_size(sample_trace_raw.bytesize / 2)
+          build.trace.set(sample_trace_raw)
+        end
+
+        it 'reads a trace' do
+          is_expected.to eq(sample_trace_raw.byteslice(0, length))
+        end
+      end
+
+      context 'when buffer size is larger than file size' do
+        before do
+          stub_buffer_size(sample_trace_raw.bytesize * 2)
+          build.trace.set(sample_trace_raw)
+        end
+
+        it 'reads a trace' do
+          is_expected.to eq(sample_trace_raw.byteslice(0, length))
+        end
+      end
+    end
+
+    context 'when tries to read oversize' do
+      let(:length) { sample_trace_raw.bytesize + 1000 }
+
+      context 'when buffer size is smaller than file size' do
+        before do
+          stub_buffer_size(sample_trace_raw.bytesize / 2)
+          build.trace.set(sample_trace_raw)
+        end
+
+        it 'reads a trace' do
+          is_expected.to eq(sample_trace_raw)
+        end
+      end
+
+      context 'when buffer size is larger than file size' do
+        before do
+          stub_buffer_size(sample_trace_raw.bytesize * 2)
+          build.trace.set(sample_trace_raw)
+        end
+
+        it 'reads a trace' do
+          is_expected.to eq(sample_trace_raw)
+        end
+      end
+    end
+
+    context 'when tries to read 0 bytes' do
+      let(:length) { 0 }
+
+      context 'when buffer size is smaller than file size' do
+        before do
+          stub_buffer_size(sample_trace_raw.bytesize / 2)
+          build.trace.set(sample_trace_raw)
+        end
+
+        it 'reads a trace' do
+          is_expected.to be_empty
+        end
+      end
+
+      context 'when buffer size is larger than file size' do
+        before do
+          stub_buffer_size(sample_trace_raw.bytesize * 2)
+          build.trace.set(sample_trace_raw)
+        end
+
+        it 'reads a trace' do
+          is_expected.to be_empty
+        end
+      end
+    end
+  end
+
+  context "#readline" do
+    subject { chunked_io.readline }
+
+    let(:string_io) { StringIO.new(sample_trace_raw) }
+
+    shared_examples 'all line matching' do
+      it do
+        (0...sample_trace_raw.lines.count).each do
+          expect(chunked_io.readline).to eq(string_io.readline)
+        end
+      end
+    end
+
+    context 'when buffer size is smaller than file size' do
+      before do
+        stub_buffer_size(sample_trace_raw.bytesize / 2)
+        build.trace.set(sample_trace_raw)
+      end
+
+      it_behaves_like 'all line matching'
+    end
+
+    context 'when buffer size is larger than file size' do
+      before do
+        stub_buffer_size(sample_trace_raw.bytesize * 2)
+        build.trace.set(sample_trace_raw)
+      end
+
+      it_behaves_like 'all line matching'
+    end
+
+    context 'when pos is at middle of the file' do
+      before do
+        stub_buffer_size(sample_trace_raw.bytesize / 2)
+        build.trace.set(sample_trace_raw)
+
+        chunked_io.seek(chunked_io.size / 2)
+        string_io.seek(string_io.size / 2)
+      end
+
+      it 'reads from pos' do
+        expect(chunked_io.readline).to eq(string_io.readline)
+      end
+    end
+  end
+
+  context "#write" do
+    subject { chunked_io.write(data) }
+
+    let(:data) { sample_trace_raw }
+
+    context 'when data does not exist' do
+      shared_examples 'writes a trace' do
+        it do
+          is_expected.to eq(data.bytesize)
+
+          chunked_io.seek(0, IO::SEEK_SET)
+          expect(chunked_io.read).to eq(data)
+        end
+      end
+
+      context 'when buffer size is smaller than file size' do
+        before do
+          stub_buffer_size(data.bytesize / 2)
+        end
+
+        it_behaves_like 'writes a trace'
+      end
+
+      context 'when buffer size is larger than file size' do
+        before do
+          stub_buffer_size(data.bytesize * 2)
+        end
+
+        it_behaves_like 'writes a trace'
+      end
+    end
+
+    context 'when data already exists' do
+      let(:exist_data) { 'exist data' }
+
+      shared_examples 'appends a trace' do
+        it do
+          chunked_io.seek(0, IO::SEEK_END)
+          is_expected.to eq(data.bytesize)
+
+          chunked_io.seek(0, IO::SEEK_SET)
+          expect(chunked_io.read).to eq(exist_data + data)
+        end
+      end
+
+      context 'when buffer size is smaller than file size' do
+        before do
+          stub_buffer_size(sample_trace_raw.bytesize / 2)
+          build.trace.set(exist_data)
+        end
+
+        it_behaves_like 'appends a trace'
+      end
+
+      context 'when buffer size is larger than file size' do
+        before do
+          stub_buffer_size(sample_trace_raw.bytesize * 2)
+          build.trace.set(exist_data)
+        end
+
+        it_behaves_like 'appends a trace'
+      end
+    end
+  end
+
+  context "#truncate" do
+    let(:offset) { 10 }
+
+    context 'when data does not exist' do
+      shared_examples 'truncates a trace' do
+        it do
+          chunked_io.truncate(offset)
+
+          chunked_io.seek(0, IO::SEEK_SET)
+          expect(chunked_io.read).to eq(sample_trace_raw.byteslice(0, offset))
+        end
+      end
+
+      context 'when buffer size is smaller than file size' do
+        before do
+          stub_buffer_size(sample_trace_raw.bytesize / 2)
+          build.trace.set(sample_trace_raw)
+        end
+
+        it_behaves_like 'truncates a trace'
+      end
+
+      context 'when buffer size is larger than file size' do
+        before do
+          stub_buffer_size(sample_trace_raw.bytesize * 2)
+          build.trace.set(sample_trace_raw)
+        end
+
+        it_behaves_like 'truncates a trace'
+      end
+    end
+  end
+
+  context "#destroy!" do
+    subject { chunked_io.destroy! }
+
+    before do
+      build.trace.set(sample_trace_raw)
+    end
+
+    it 'deletes' do
+      expect { subject }.to change { chunked_io.size }
+        .from(sample_trace_raw.bytesize).to(0)
+
+      expect(Ci::BuildTraceChunk.where(build: build).count).to eq(0)
+    end
+  end
+end
--- a/spec/lib/gitlab/ci/trace/stream_spec.rb
+++ b/spec/lib/gitlab/ci/trace/stream_spec.rb
--- a/spec/lib/gitlab/ci/trace_spec.rb
+++ b/spec/lib/gitlab/ci/trace_spec.rb
--- a/spec/models/ci/build_trace_chunk_spec.rb
+++ b/spec/models/ci/build_trace_chunk_spec.rb
--- a/spec/requests/api/runner_spec.rb
+++ b/spec/requests/api/runner_spec.rb
 require 'spec_helper'

-describe API::Runner do
+describe API::Runner, :clean_gitlab_redis_shared_state do
  include StubGitlabCalls
+  include RedisHelpers

  let(:registration_token) { 'abcdefg123456' }

  before do
+    stub_feature_flags(ci_enable_live_trace: true)
    stub_gitlab_calls
    stub_application_setting(runners_registration_token: registration_token)
    allow_any_instance_of(Ci::Runner).to receive(:cache_attributes)
@@ -882,6 +884,49 @@ describe API::Runner do
            expect(response.status).to eq(403)
          end
        end
+
+        context 'when trace is patched' do
+          before do
+            patch_the_trace
+          end
+
+          it 'has valid trace' do
+            expect(response.status).to eq(202)
+            expect(job.reload.trace.raw).to eq 'BUILD TRACE appended appended'
+          end
+
+          context 'when redis data are flushed' do
+            before do
+              redis_shared_state_cleanup!
+            end
+
+            it 'has empty trace' do
+              expect(job.reload.trace.raw).to eq ''
+            end
+
+            context 'when we perform partial patch' do
+              before do
+                patch_the_trace('hello', headers.merge({ 'Content-Range' => "28-32/5" }))
+              end
+
+              it 'returns an error' do
+                expect(response.status).to eq(416)
+                expect(response.header['Range']).to eq('0-0')
+              end
+            end
+
+            context 'when we resend full trace' do
+              before do
+                patch_the_trace('BUILD TRACE appended appended hello', headers.merge({ 'Content-Range' => "0-34/35" }))
+              end
+
+              it 'succeeds with updating trace' do
+                expect(response.status).to eq(202)
+                expect(job.reload.trace.raw).to eq 'BUILD TRACE appended appended hello'
+              end
+            end
+          end
+        end
      end

      context 'when Runner makes a force-patch' do
@@ -898,7 +943,7 @@ describe API::Runner do
      end

      context 'when content-range start is too big' do
-        let(:headers_with_range) { headers.merge({ 'Content-Range' => '15-20' }) }
+        let(:headers_with_range) { headers.merge({ 'Content-Range' => '15-20/6' }) }

        it 'gets 416 error response with range headers' do
          expect(response.status).to eq 416
@@ -908,7 +953,7 @@ describe API::Runner do
      end

      context 'when content-range start is too small' do
-        let(:headers_with_range) { headers.merge({ 'Content-Range' => '8-20' }) }
+        let(:headers_with_range) { headers.merge({ 'Content-Range' => '8-20/13' }) }

        it 'gets 416 error response with range headers' do
          expect(response.status).to eq 416

--- a/spec/services/ci/retry_build_service_spec.rb
+++ b/spec/services/ci/retry_build_service_spec.rb
@@ -32,7 +32,7 @@ describe Ci::RetryBuildService do
       runner_id tag_taggings taggings tags trigger_request_id
       user_id auto_canceled_by_id retried failure_reason
       artifacts_file_store artifacts_metadata_store
-       metadata].freeze
+       metadata trace_chunks].freeze

  shared_examples 'build duplication' do
    let(:another_pipeline) { create(:ci_empty_pipeline, project: project) }

--- a/spec/spec_helper.rb
+++ b/spec/spec_helper.rb
@@ -86,6 +86,7 @@ RSpec.configure do |config|
  config.include WaitForRequests, :js
  config.include LiveDebugger, :js
  config.include MigrationsHelpers, :migration
+  config.include RedisHelpers

  if ENV['CI']
    # This includes the first try, i.e. tests will be run 4 times before failing.
@@ -146,21 +147,27 @@ RSpec.configure do |config|
  end

  config.around(:each, :clean_gitlab_redis_cache) do |example|
-    Gitlab::Redis::Cache.with(&:flushall)
+    redis_cache_cleanup!

    example.run

-    Gitlab::Redis::Cache.with(&:flushall)
+    redis_cache_cleanup!
  end

  config.around(:each, :clean_gitlab_redis_shared_state) do |example|
-    Gitlab::Redis::SharedState.with(&:flushall)
-    Sidekiq.redis(&:flushall)
+    redis_shared_state_cleanup!

    example.run

-    Gitlab::Redis::SharedState.with(&:flushall)
-    Sidekiq.redis(&:flushall)
+    redis_shared_state_cleanup!
+  end
+
+  config.around(:each, :clean_gitlab_redis_queues) do |example|
+    redis_queues_cleanup!
+
+    example.run
+
+    redis_queues_cleanup!
  end

  # The :each scope runs "inside" the example, so this hook ensures the DB is in the

--- a/spec/support/chunked_io/chunked_io_helpers.rb
+++ b/spec/support/chunked_io/chunked_io_helpers.rb
+module ChunkedIOHelpers
+  def sample_trace_raw
+    @sample_trace_raw ||= File.read(expand_fixture_path('trace/sample_trace'))
+      .force_encoding(Encoding::BINARY)
+  end
+
+  def stub_buffer_size(size)
+    stub_const('Ci::BuildTraceChunk::CHUNK_SIZE', size)
+    stub_const('Gitlab::Ci::Trace::ChunkedIO::CHUNK_SIZE', size)
+  end
+end
--- a/spec/support/redis/redis_helpers.rb
+++ b/spec/support/redis/redis_helpers.rb
+module RedisHelpers
+  # config/README.md
+
+  # Usage: performance enhancement
+  def redis_cache_cleanup!
+    Gitlab::Redis::Cache.with(&:flushall)
+  end
+
+  # Usage: SideKiq, Mailroom, CI Runner, Workhorse, push services
+  def redis_queues_cleanup!
+    Gitlab::Redis::Queues.with(&:flushall)
+  end
+
+  # Usage: session state, rate limiting
+  def redis_shared_state_cleanup!
+    Gitlab::Redis::SharedState.with(&:flushall)
+  end
+end
--- a/spec/support/shared_examples/ci_trace_shared_examples.rb
+++ b/spec/support/shared_examples/ci_trace_shared_examples.rb