BigW Consortium Gitlab

Commit a6c60127 by Yorick Peterse

Removed tracking of raw SQL queries

This particular setup had 3 problems: 1. Storing SQL queries as tags is very inefficient as InfluxDB ends up indexing every query (and they can get pretty large). Storing these as values instead means we can't always display the SQL as easily. 2. We already instrument ActiveRecord query methods, thus we already have timing information about database queries. 3. SQL obfuscation is difficult to get right and I'd rather not expose sensitive data by accident.
parent ab08321b
......@@ -7,7 +7,6 @@ if Gitlab::Metrics.enabled?
# These are manually require'd so the classes are registered properly with
# ActiveSupport.
require 'gitlab/metrics/subscribers/action_view'
require 'gitlab/metrics/subscribers/active_record'
Gitlab::Application.configure do |config|
config.middleware.use(Gitlab::Metrics::RackMiddleware)
......
module Gitlab
module Metrics
# Class for producing SQL queries with sensitive data stripped out.
class ObfuscatedSQL
REPLACEMENT = /
\d+(\.\d+)? # integers, floats
| '.+?' # single quoted strings
| \/.+?(?<!\\)\/ # regexps (including escaped slashes)
/x
MYSQL_REPLACEMENTS = /
".+?" # double quoted strings
/x
# Regex to replace consecutive placeholders with a single one indicating
# the length. This can be useful when a "IN" statement uses thousands of
# IDs (storing this would just be a waste of space).
CONSECUTIVE = /(\?(\s*,\s*)?){2,}/
# sql - The raw SQL query as a String.
def initialize(sql)
@sql = sql
end
# Returns a new, obfuscated SQL query.
def to_s
regex = REPLACEMENT
if Gitlab::Database.mysql?
regex = Regexp.union(regex, MYSQL_REPLACEMENTS)
end
sql = @sql.gsub(regex, '?').gsub(CONSECUTIVE) do |match|
"#{match.count(',') + 1} values"
end
# InfluxDB escapes double quotes upon output, so lets get rid of them
# whenever we can.
if Gitlab::Database.postgresql?
sql = sql.delete('"')
end
sql.tr("\n", ' ')
end
end
end
end
module Gitlab
module Metrics
module Subscribers
# Class for tracking raw SQL queries.
#
# Queries are obfuscated before being logged to ensure no private data is
# exposed via InfluxDB/Grafana.
class ActiveRecord < ActiveSupport::Subscriber
attach_to :active_record
SERIES = 'sql_queries'
def sql(event)
return unless current_transaction
values = values_for(event)
tags = tags_for(event)
current_transaction.add_metric(SERIES, values, tags)
end
private
def values_for(event)
{ duration: event.duration }
end
def tags_for(event)
sql = ObfuscatedSQL.new(event.payload[:sql]).to_s
tags = { sql: sql }
file, line = Metrics.last_relative_application_frame
if file and line
tags[:file] = file
tags[:line] = line
end
tags
end
def current_transaction
Transaction.current
end
end
end
end
end
require 'spec_helper'
describe Gitlab::Metrics::ObfuscatedSQL do
describe '#to_s' do
it 'replaces newlines with a space' do
sql = described_class.new("SELECT x\nFROM y")
expect(sql.to_s).to eq('SELECT x FROM y')
end
describe 'using single values' do
it 'replaces a single integer' do
sql = described_class.new('SELECT x FROM y WHERE a = 10')
expect(sql.to_s).to eq('SELECT x FROM y WHERE a = ?')
end
it 'replaces a single float' do
sql = described_class.new('SELECT x FROM y WHERE a = 10.5')
expect(sql.to_s).to eq('SELECT x FROM y WHERE a = ?')
end
it 'replaces a single quoted string' do
sql = described_class.new("SELECT x FROM y WHERE a = 'foo'")
expect(sql.to_s).to eq('SELECT x FROM y WHERE a = ?')
end
if Gitlab::Database.mysql?
it 'replaces a double quoted string' do
sql = described_class.new('SELECT x FROM y WHERE a = "foo"')
expect(sql.to_s).to eq('SELECT x FROM y WHERE a = ?')
end
end
it 'replaces a single regular expression' do
sql = described_class.new('SELECT x FROM y WHERE a = /foo/')
expect(sql.to_s).to eq('SELECT x FROM y WHERE a = ?')
end
it 'replaces regular expressions using escaped slashes' do
sql = described_class.new('SELECT x FROM y WHERE a = /foo\/bar/')
expect(sql.to_s).to eq('SELECT x FROM y WHERE a = ?')
end
end
describe 'using consecutive values' do
it 'replaces multiple integers' do
sql = described_class.new('SELECT x FROM y WHERE z IN (10, 20, 30)')
expect(sql.to_s).to eq('SELECT x FROM y WHERE z IN (3 values)')
end
it 'replaces multiple floats' do
sql = described_class.new('SELECT x FROM y WHERE z IN (1.5, 2.5, 3.5)')
expect(sql.to_s).to eq('SELECT x FROM y WHERE z IN (3 values)')
end
it 'replaces multiple single quoted strings' do
sql = described_class.new("SELECT x FROM y WHERE z IN ('foo', 'bar')")
expect(sql.to_s).to eq('SELECT x FROM y WHERE z IN (2 values)')
end
if Gitlab::Database.mysql?
it 'replaces multiple double quoted strings' do
sql = described_class.new('SELECT x FROM y WHERE z IN ("foo", "bar")')
expect(sql.to_s).to eq('SELECT x FROM y WHERE z IN (2 values)')
end
end
it 'replaces multiple regular expressions' do
sql = described_class.new('SELECT x FROM y WHERE z IN (/foo/, /bar/)')
expect(sql.to_s).to eq('SELECT x FROM y WHERE z IN (2 values)')
end
end
if Gitlab::Database.postgresql?
it 'replaces double quotes' do
sql = described_class.new('SELECT "x" FROM "y"')
expect(sql.to_s).to eq('SELECT x FROM y')
end
end
end
end
require 'spec_helper'
describe Gitlab::Metrics::Subscribers::ActiveRecord do
let(:transaction) { Gitlab::Metrics::Transaction.new }
let(:subscriber) { described_class.new }
let(:event) do
double(:event, duration: 0.2,
payload: { sql: 'SELECT * FROM users WHERE id = 10' })
end
before do
allow(subscriber).to receive(:current_transaction).and_return(transaction)
allow(Gitlab::Metrics).to receive(:last_relative_application_frame).
and_return(['app/models/foo.rb', 4])
end
describe '#sql' do
it 'tracks the execution of a SQL query' do
sql = 'SELECT * FROM users WHERE id = ?'
values = { duration: 0.2 }
tags = { sql: sql, file: 'app/models/foo.rb', line: 4 }
expect(transaction).to receive(:add_metric).
with(described_class::SERIES, values, tags)
subscriber.sql(event)
end
end
end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment