@@ -47,7 +47,7 @@ URI::URI(const std::string & uri_, bool allow_archive_path_syntax)
4747 // / Case when bucket name and key represented in the path of S3 URL.
4848 // / E.g. (https://s3.region.amazonaws.com/bucket-name/key)
4949 // / https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html#path-style-access
50- static const RE2 path_style_pattern (" ^/([^/]*)/ (.*)" );
50+ static const RE2 path_style_pattern (" ^/([^/]*)(?:/? (.*) )" );
5151
5252 if (allow_archive_path_syntax)
5353 std::tie (uri_str, archive_pattern) = getURIAndArchivePattern (uri_);
@@ -124,7 +124,6 @@ URI::URI(const std::string & uri_, bool allow_archive_path_syntax)
124124 {
125125 endpoint = uri.getScheme () + " ://" + name + endpoint_authority_from_uri;
126126 }
127- validateBucket (bucket, uri);
128127
129128 if (!uri.getPath ().empty ())
130129 {
@@ -142,7 +141,6 @@ URI::URI(const std::string & uri_, bool allow_archive_path_syntax)
142141 {
143142 is_virtual_hosted_style = false ;
144143 endpoint = uri.getScheme () + " ://" + uri.getAuthority ();
145- validateBucket (bucket, uri);
146144 }
147145 else
148146 {
@@ -155,6 +153,9 @@ URI::URI(const std::string & uri_, bool allow_archive_path_syntax)
155153 if (!uri.getPath ().empty ())
156154 key = uri.getPath ().substr (1 );
157155 }
156+
157+ validateBucket (bucket, uri);
158+ validateKey (key, uri);
158159}
159160
160161void URI::addRegionToURI (const std::string ®ion)
@@ -175,6 +176,37 @@ void URI::validateBucket(const String & bucket, const Poco::URI & uri)
175176 !uri.empty () ? " (" + uri.toString () + " )" : " " );
176177}
177178
179+ void URI::validateKey (const String & key, const Poco::URI & uri)
180+ {
181+ auto onError = [&]()
182+ {
183+ throw Exception (
184+ ErrorCodes::BAD_ARGUMENTS,
185+ " Invalid S3 key: {}{}" ,
186+ quoteString (key),
187+ !uri.empty () ? " (" + uri.toString () + " )" : " " );
188+ };
189+
190+
191+ // this shouldn't happen ever because the regex should not catch this
192+ if (key.size () == 1 && key[0 ] == ' /' )
193+ {
194+ onError ();
195+ }
196+
197+ // the current regex impl allows something like "bucket-name/////".
198+ // bucket: bucket-name
199+ // key: ////
200+ // throw exception in case such thing is found
201+ for (size_t i = 1 ; i < key.size (); i++)
202+ {
203+ if (key[i - 1 ] == ' /' && key[i] == ' /' )
204+ {
205+ onError ();
206+ }
207+ }
208+ }
209+
178210std::pair<std::string, std::optional<std::string>> URI::getURIAndArchivePattern (const std::string & source)
179211{
180212 size_t pos = source.find (" ::" );
0 commit comments