BigW Consortium Gitlab

migration_helpers.rb 8.93 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
module Gitlab
  module Database
    module MigrationHelpers
      # Creates a new index, concurrently when supported
      #
      # On PostgreSQL this method creates an index concurrently, on MySQL this
      # creates a regular index.
      #
      # Example:
      #
      #     add_concurrent_index :users, :some_column
      #
      # See Rails' `add_index` for more info on the available arguments.
14
      def add_concurrent_index(table_name, column_name, options = {})
15 16 17 18 19 20 21
        if transaction_open?
          raise 'add_concurrent_index can not be run inside a transaction, ' \
            'you can disable transactions by calling disable_ddl_transaction! ' \
            'in the body of your migration class'
        end

        if Database.postgresql?
22
          options = options.merge({ algorithm: :concurrently })
23
          disable_statement_timeout
24 25
        end

26
        add_index(table_name, column_name, options)
27 28
      end

29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
      # Adds a foreign key with only minimal locking on the tables involved.
      #
      # This method only requires minimal locking when using PostgreSQL. When
      # using MySQL this method will use Rails' default `add_foreign_key`.
      #
      # source - The source table containing the foreign key.
      # target - The target table the key points to.
      # column - The name of the column to create the foreign key on.
      # on_delete - The action to perform when associated data is removed,
      #             defaults to "CASCADE".
      def add_concurrent_foreign_key(source, target, column:, on_delete: :cascade)
        # Transactions would result in ALTER TABLE locks being held for the
        # duration of the transaction, defeating the purpose of this method.
        if transaction_open?
          raise 'add_concurrent_foreign_key can not be run inside a transaction'
        end

        # While MySQL does allow disabling of foreign keys it has no equivalent
        # of PostgreSQL's "VALIDATE CONSTRAINT". As a result we'll just fall
        # back to the normal foreign key procedure.
        if Database.mysql?
          return add_foreign_key(source, target,
                                 column: column,
                                 on_delete: on_delete)
        end

        disable_statement_timeout

57
        key_name = concurrent_foreign_key_name(source, column)
58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76

        # Using NOT VALID allows us to create a key without immediately
        # validating it. This means we keep the ALTER TABLE lock only for a
        # short period of time. The key _is_ enforced for any newly created
        # data.
        execute <<-EOF.strip_heredoc
        ALTER TABLE #{source}
        ADD CONSTRAINT #{key_name}
        FOREIGN KEY (#{column})
        REFERENCES #{target} (id)
        ON DELETE #{on_delete} NOT VALID;
        EOF

        # Validate the existing constraint. This can potentially take a very
        # long time to complete, but fortunately does not lock the source table
        # while running.
        execute("ALTER TABLE #{source} VALIDATE CONSTRAINT #{key_name};")
      end

77 78 79 80 81 82 83 84 85
      # Returns the name for a concurrent foreign key.
      #
      # PostgreSQL constraint names have a limit of 63 bytes. The logic used
      # here is based on Rails' foreign_key_name() method, which unfortunately
      # is private so we can't rely on it directly.
      def concurrent_foreign_key_name(table, column)
        "fk_#{Digest::SHA256.hexdigest("#{table}_#{column}_fk").first(10)}"
      end

86 87 88 89
      # Long-running migrations may take more than the timeout allowed by
      # the database. Disable the session's statement timeout to ensure
      # migrations don't get killed prematurely. (PostgreSQL only)
      def disable_statement_timeout
90
        execute('SET statement_timeout TO 0') if Database.postgresql?
91 92
      end

93 94 95
      # Updates the value of a column in batches.
      #
      # This method updates the table in batches of 5% of the total row count.
96 97
      # This method will continue updating rows until no rows remain.
      #
98
      # When given a block this method will yield two values to the block:
99 100 101 102 103 104 105 106 107 108 109 110 111
      #
      # 1. An instance of `Arel::Table` for the table that is being updated.
      # 2. The query to run as an Arel object.
      #
      # By supplying a block one can add extra conditions to the queries being
      # executed. Note that the same block is used for _all_ queries.
      #
      # Example:
      #
      #     update_column_in_batches(:projects, :foo, 10) do |table, query|
      #       query.where(table[:some_column].eq('hello'))
      #     end
      #
112
      # This would result in this method updating only rows where
113
      # `projects.some_column` equals "hello".
114 115 116 117
      #
      # table - The name of the table.
      # column - The name of the column to update.
      # value - The value for the column.
118 119 120 121 122 123 124
      #
      # Rubocop's Metrics/AbcSize metric is disabled for this method as Rubocop
      # determines this method to be too complex while there's no way to make it
      # less "complex" without introducing extra methods (which actually will
      # make things _more_ complex).
      #
      # rubocop: disable Metrics/AbcSize
125
      def update_column_in_batches(table, column, value)
126
        table = Arel::Table.new(table)
127

128 129 130 131
        count_arel = table.project(Arel.star.count.as('count'))
        count_arel = yield table, count_arel if block_given?

        total = exec_query(count_arel.to_sql).to_hash.first['count'].to_i
132

133 134
        return if total == 0

135
        # Update in batches of 5% until we run out of any rows to update.
136 137
        batch_size = ((total / 100.0) * 5.0).ceil

138 139 140
        start_arel = table.project(table[:id]).order(table[:id].asc).take(1)
        start_arel = yield table, start_arel if block_given?
        start_id = exec_query(start_arel.to_sql).to_hash.first['id'].to_i
141

142
        loop do
143
          stop_arel = table.project(table[:id]).
144
            where(table[:id].gteq(start_id)).
145 146
            order(table[:id].asc).
            take(1).
147
            skip(batch_size)
148 149 150

          stop_arel = yield table, stop_arel if block_given?
          stop_row = exec_query(stop_arel.to_sql).to_hash.first
151

152 153
          update_arel = Arel::UpdateManager.new(ActiveRecord::Base).
            table(table).
154
            set([[table[column], value]]).
155
            where(table[:id].gteq(start_id))
156 157

          if stop_row
158 159 160
            stop_id = stop_row['id'].to_i
            start_id = stop_id
            update_arel = update_arel.where(table[:id].lt(stop_id))
161 162
          end

163 164
          update_arel = yield table, update_arel if block_given?

165
          execute(update_arel.to_sql)
166

167 168
          # There are no more rows left to update.
          break unless stop_row
169 170 171 172 173 174 175 176
        end
      end

      # Adds a column with a default value without locking an entire table.
      #
      # This method runs the following steps:
      #
      # 1. Add the column with a default value of NULL.
177 178 179
      # 2. Change the default value of the column to the specified value.
      # 3. Update all existing rows in batches.
      # 4. Set a `NOT NULL` constraint on the column if desired (the default).
180 181 182 183 184 185 186 187 188
      #
      # These steps ensure a column can be added to a large and commonly used
      # table without locking the entire table for the duration of the table
      # modification.
      #
      # table - The name of the table to update.
      # column - The name of the column to add.
      # type - The column type (e.g. `:integer`).
      # default - The default value for the column.
189 190
      # limit - Sets a column limit. For example, for :integer, the default is
      #         4-bytes. Set `limit: 8` to allow 8-byte integers.
191 192
      # allow_null - When set to `true` the column will allow NULL values, the
      #              default is to not allow NULL values.
193 194 195
      #
      # This method can also take a block which is passed directly to the
      # `update_column_in_batches` method.
196
      def add_column_with_default(table, column, type, default:, limit: nil, allow_null: false, &block)
197 198 199 200 201 202
        if transaction_open?
          raise 'add_column_with_default can not be run inside a transaction, ' \
            'you can disable transactions by calling disable_ddl_transaction! ' \
            'in the body of your migration class'
        end

203 204
        disable_statement_timeout

205
        transaction do
206 207 208 209 210
          if limit
            add_column(table, column, type, default: nil, limit: limit)
          else
            add_column(table, column, type, default: nil)
          end
211 212 213 214 215 216 217

          # Changing the default before the update ensures any newly inserted
          # rows already use the proper default value.
          change_column_default(table, column, default)
        end

        begin
218
          update_column_in_batches(table, column, default, &block)
219

220
          change_column_null(table, column, false) unless allow_null
221 222 223
        # We want to rescue _all_ exceptions here, even those that don't inherit
        # from StandardError.
        rescue Exception => error # rubocop: disable all
224 225 226 227 228 229 230 231
          remove_column(table, column)

          raise error
        end
      end
    end
  end
end