1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
module Gitlab
module PathRegex
extend self
# All routes that appear on the top level must be listed here.
# This will make sure that groups cannot be created with these names
# as these routes would be masked by the paths already in place.
#
# Example:
# /api/api-project
#
# the path `api` shouldn't be allowed because it would be masked by `api/*`
#
TOP_LEVEL_ROUTES = %w[
-
.well-known
404.html
422.html
500.html
502.html
503.html
abuse_reports
admin
api
apple-touch-icon-precomposed.png
apple-touch-icon.png
assets
autocomplete
ci
dashboard
deploy.html
explore
favicon.ico
files
groups
health_check
help
import
invites
jwt
koding
notification_settings
oauth
profile
projects
public
robots.txt
s
search
sent_notifications
slash-command-logo.png
snippets
u
unsubscribes
uploads
users
].freeze
# This list should contain all words following `/*namespace_id/:project_id` in
# routes that contain a second wildcard.
#
# Example:
# /*namespace_id/:project_id/badges/*ref/build
#
# If `badges` was allowed as a project/group name, we would not be able to access the
# `badges` route for those projects:
#
# Consider a namespace with path `foo/bar` and a project called `badges`.
# The route to the build badge would then be `/foo/bar/badges/badges/master/build.svg`
#
# When accessing this path the route would be matched to the `badges` path
# with the following params:
# - namespace_id: `foo`
# - project_id: `bar`
# - ref: `badges/master`
#
# Failing to find the project, this would result in a 404.
#
# By rejecting `badges` the router can _count_ on the fact that `badges` will
# be preceded by the `namespace/project`.
PROJECT_WILDCARD_ROUTES = %w[
-
badges
blame
blob
builds
commits
create
create_dir
edit
environments/folders
files
find_file
gitlab-lfs/objects
info/lfs/objects
new
preview
raw
refs
tree
update
wikis
].freeze
# These are all the paths that follow `/groups/*id/ or `/groups/*group_id`
# We need to reject these because we have a `/groups/*id` page that is the same
# as the `/*id`.
#
# If we would allow a subgroup to be created with the name `activity` then
# this group would not be accessible through `/groups/parent/activity` since
# this would map to the activity-page of its parent.
GROUP_ROUTES = %w[
-
].freeze
ILLEGAL_PROJECT_PATH_WORDS = PROJECT_WILDCARD_ROUTES
ILLEGAL_GROUP_PATH_WORDS = (PROJECT_WILDCARD_ROUTES | GROUP_ROUTES).freeze
# The namespace regex is used in JavaScript to validate usernames in the "Register" form. However, Javascript
# does not support the negative lookbehind assertion (?<!) that disallows usernames ending in `.git` and `.atom`.
# Since this is a non-trivial problem to solve in Javascript (heavily complicate the regex, modify view code to
# allow non-regex validations, etc), `NAMESPACE_FORMAT_REGEX_JS` serves as a Javascript-compatible version of
# `NAMESPACE_FORMAT_REGEX`, with the negative lookbehind assertion removed. This means that the client-side validation
# will pass for usernames ending in `.atom` and `.git`, but will be caught by the server-side validation.
PATH_REGEX_STR = '[a-zA-Z0-9_\.][a-zA-Z0-9_\-\.]*'.freeze
NAMESPACE_FORMAT_REGEX_JS = PATH_REGEX_STR + '[a-zA-Z0-9_\-]|[a-zA-Z0-9_]'.freeze
NO_SUFFIX_REGEX = /(?<!\.git|\.atom)/.freeze
NAMESPACE_FORMAT_REGEX = /(?:#{NAMESPACE_FORMAT_REGEX_JS})#{NO_SUFFIX_REGEX}/.freeze
PROJECT_PATH_FORMAT_REGEX = /(?:#{PATH_REGEX_STR})#{NO_SUFFIX_REGEX}/.freeze
FULL_NAMESPACE_FORMAT_REGEX = %r{(#{NAMESPACE_FORMAT_REGEX}/)*#{NAMESPACE_FORMAT_REGEX}}.freeze
def root_namespace_route_regex
@root_namespace_route_regex ||= begin
illegal_words = Regexp.new(Regexp.union(TOP_LEVEL_ROUTES).source, Regexp::IGNORECASE)
single_line_regexp %r{
(?!(#{illegal_words})/)
#{NAMESPACE_FORMAT_REGEX}
}x
end
end
def full_namespace_route_regex
@full_namespace_route_regex ||= begin
illegal_words = Regexp.new(Regexp.union(ILLEGAL_GROUP_PATH_WORDS).source, Regexp::IGNORECASE)
single_line_regexp %r{
#{root_namespace_route_regex}
(?:
/
(?!#{illegal_words}/)
#{NAMESPACE_FORMAT_REGEX}
)*
}x
end
end
def project_route_regex
@project_route_regex ||= begin
illegal_words = Regexp.new(Regexp.union(ILLEGAL_PROJECT_PATH_WORDS).source, Regexp::IGNORECASE)
single_line_regexp %r{
(?!(#{illegal_words})/)
#{PROJECT_PATH_FORMAT_REGEX}
}x
end
end
def project_git_route_regex
@project_git_route_regex ||= /#{project_route_regex}\.git/.freeze
end
def root_namespace_path_regex
@root_namespace_path_regex ||= %r{\A#{root_namespace_route_regex}/\z}
end
def full_namespace_path_regex
@full_namespace_path_regex ||= %r{\A#{full_namespace_route_regex}/\z}
end
def project_path_regex
@project_path_regex ||= %r{\A#{project_route_regex}/\z}
end
def full_project_path_regex
@full_project_path_regex ||= %r{\A#{full_namespace_route_regex}/#{project_route_regex}/\z}
end
def full_namespace_format_regex
@namespace_format_regex ||= /A#{FULL_NAMESPACE_FORMAT_REGEX}\z/.freeze
end
def namespace_format_regex
@namespace_format_regex ||= /\A#{NAMESPACE_FORMAT_REGEX}\z/.freeze
end
def namespace_format_message
"can contain only letters, digits, '_', '-' and '.'. " \
"Cannot start with '-' or end in '.', '.git' or '.atom'." \
end
def project_path_format_regex
@project_path_format_regex ||= /\A#{PROJECT_PATH_FORMAT_REGEX}\z/.freeze
end
def project_path_format_message
"can contain only letters, digits, '_', '-' and '.'. " \
"Cannot start with '-', end in '.git' or end in '.atom'" \
end
def archive_formats_regex
# |zip|tar| tar.gz | tar.bz2 |
@archive_formats_regex ||= /(zip|tar|tar\.gz|tgz|gz|tar\.bz2|tbz|tbz2|tb2|bz2)/.freeze
end
def git_reference_regex
# Valid git ref regex, see:
# https://www.kernel.org/pub/software/scm/git/docs/git-check-ref-format.html
@git_reference_regex ||= single_line_regexp %r{
(?!
(?# doesn't begins with)
\/| (?# rule #6)
(?# doesn't contain)
.*(?:
[\/.]\.| (?# rule #1,3)
\/\/| (?# rule #6)
@\{| (?# rule #8)
\\ (?# rule #9)
)
)
[^\000-\040\177~^:?*\[]+ (?# rule #4-5)
(?# doesn't end with)
(?<!\.lock) (?# rule #1)
(?<![\/.]) (?# rule #6-7)
}x
end
private
def single_line_regexp(regex)
# Turns a multiline extended regexp into a single line one,
# beacuse `rake routes` breaks on multiline regexes.
Regexp.new(regex.source.gsub(/\(\?#.+?\)/, '').gsub(/\s*/, ''), regex.options ^ Regexp::EXTENDED).freeze
end
end
end