1# -*- coding: utf-8 -*- 2# Copyright 2013 Google Inc. All Rights Reserved. 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15"""Integration tests for rm command.""" 16 17from __future__ import absolute_import 18 19import re 20 21import gslib.tests.testcase as testcase 22from gslib.tests.testcase.base import MAX_BUCKET_LENGTH 23from gslib.tests.testcase.integration_testcase import SkipForS3 24from gslib.tests.util import GenerationFromURI as urigen 25from gslib.tests.util import ObjectToURI as suri 26from gslib.util import Retry 27 28 29class TestRm(testcase.GsUtilIntegrationTestCase): 30 """Integration tests for rm command.""" 31 32 def _RunRemoveCommandAndCheck(self, command_and_args, objects_to_remove=None, 33 buckets_to_remove=None, stdin=None): 34 """Tests a remove command in the presence of eventual listing consistency. 35 36 Eventual listing consistency means that a remove command may not see all 37 of the objects to be removed at once. When removing multiple objects 38 (or buckets via -r), some calls may return no matches and multiple calls 39 to the rm command may be necessary to reach the desired state. This function 40 retries the rm command, incrementally tracking what has been removed and 41 ensuring that the exact set of objects/buckets are removed across all 42 retried calls. 43 44 The caller is responsible for confirming the existence of buckets/objects 45 prior to calling this function. 46 47 Args: 48 command_and_args: List of strings representing the rm command+args to run. 49 objects_to_remove: List of object URL strings (optionally including 50 generation) that should be removed by the command, if any. 51 buckets_to_remove: List of bucket URL strings that should be removed by 52 the command, if any. 53 stdin: String of data to pipe to the process as standard input (for 54 testing -I option). 55 """ 56 cumulative_stderr_lines = set() 57 bucket_strings = [] 58 for bucket_to_remove in buckets_to_remove or []: 59 bucket_strings.append('Removing %s/...' % bucket_to_remove) 60 object_strings = [] 61 for object_to_remove in objects_to_remove or []: 62 object_strings.append('Removing %s...' % object_to_remove) 63 expected_stderr_lines = set(object_strings + bucket_strings) 64 65 @Retry(AssertionError, tries=5, timeout_secs=1) 66 def _RunRmCommandAndCheck(): 67 """Runs the command with retries, updating+checking cumulative output.""" 68 stderr = self.RunGsUtil(command_and_args, return_stderr=True, 69 expected_status=None, stdin=stdin) 70 update_lines = True 71 # Retry 404's and 409's due to eventual listing consistency, but don't add 72 # the output to the set. 73 if ('No URLs matched' in stderr or 74 '409 BucketNotEmpty' in stderr or 75 '409 VersionedBucketNotEmpty' in stderr): 76 update_lines = False 77 78 # For recursive deletes of buckets, it is possible that the bucket is 79 # deleted before the objects are all present in the listing, in which case 80 # we will never see all of the expected "Removing object..." messages. 81 # Since this is still a successful outcome, just return successfully. 82 if '-r' in command_and_args and 'bucket does not exist' in stderr: 83 for bucket_to_remove in buckets_to_remove: 84 matching_bucket = re.match(r'.*404\s+%s\s+bucket does not exist' % 85 re.escape(bucket_to_remove), stderr) 86 if matching_bucket: 87 for line in cumulative_stderr_lines: 88 if 'Removing %s/...' % bucket_to_remove in line: 89 return 90 if 'Removing %s/...' % bucket_to_remove in stderr: 91 return 92 93 if update_lines: 94 cumulative_stderr_lines.update(set(stderr.splitlines())) 95 96 # Ensure all of the expected strings are present. 97 self.assertEqual(cumulative_stderr_lines, expected_stderr_lines) 98 99 _RunRmCommandAndCheck() 100 101 def test_all_versions_current(self): 102 """Test that 'rm -a' for an object with a current version works.""" 103 bucket_uri = self.CreateVersionedBucket() 104 key_uri = bucket_uri.clone_replace_name('foo') 105 key_uri.set_contents_from_string('bar') 106 g1 = urigen(key_uri) 107 key_uri.set_contents_from_string('baz') 108 g2 = urigen(key_uri) 109 self.AssertNObjectsInBucket(bucket_uri, 2, versioned=True) 110 # Use @Retry as hedge against bucket listing eventual consistency. 111 @Retry(AssertionError, tries=3, timeout_secs=1) 112 def _Check1(stderr_lines): 113 stderr = self.RunGsUtil(['-m', 'rm', '-a', suri(key_uri)], 114 return_stderr=True) 115 stderr_lines.update(set(stderr.splitlines())) 116 stderr = '\n'.join(stderr_lines) 117 self.assertEqual(stderr.count('Removing %s://' % self.default_provider), 118 2) 119 self.assertIn('Removing %s#%s...' % (suri(key_uri), g1), stderr) 120 self.assertIn('Removing %s#%s...' % (suri(key_uri), g2), stderr) 121 all_stderr_lines = set() 122 _Check1(all_stderr_lines) 123 self.AssertNObjectsInBucket(bucket_uri, 0, versioned=True) 124 125 def test_all_versions_no_current(self): 126 """Test that 'rm -a' for an object without a current version works.""" 127 bucket_uri = self.CreateVersionedBucket() 128 key_uri = bucket_uri.clone_replace_name('foo') 129 key_uri.set_contents_from_string('bar') 130 g1 = urigen(key_uri) 131 key_uri.set_contents_from_string('baz') 132 g2 = urigen(key_uri) 133 self._RunRemoveCommandAndCheck( 134 ['-m', 'rm', '-a', suri(key_uri)], 135 objects_to_remove=['%s#%s' % (suri(key_uri), g1), 136 '%s#%s' % (suri(key_uri), g2)]) 137 self.AssertNObjectsInBucket(bucket_uri, 0, versioned=True) 138 139 def test_fails_for_missing_obj(self): 140 bucket_uri = self.CreateVersionedBucket() 141 stderr = self.RunGsUtil(['rm', '-a', '%s' % suri(bucket_uri, 'foo')], 142 return_stderr=True, expected_status=1) 143 self.assertIn('No URLs matched', stderr) 144 145 def test_remove_all_versions_recursive_on_bucket(self): 146 """Test that 'rm -r' works on bucket.""" 147 bucket_uri = self.CreateVersionedBucket() 148 k1_uri = bucket_uri.clone_replace_name('foo') 149 k2_uri = bucket_uri.clone_replace_name('foo2') 150 k1_uri.set_contents_from_string('bar') 151 k2_uri.set_contents_from_string('bar2') 152 k1g1 = urigen(k1_uri) 153 k2g1 = urigen(k2_uri) 154 k1_uri.set_contents_from_string('baz') 155 k2_uri.set_contents_from_string('baz2') 156 k1g2 = urigen(k1_uri) 157 k2g2 = urigen(k2_uri) 158 159 self.AssertNObjectsInBucket(bucket_uri, 4, versioned=True) 160 161 self._RunRemoveCommandAndCheck( 162 ['rm', '-r', suri(bucket_uri)], 163 objects_to_remove=['%s#%s' % (suri(k1_uri), k1g1), 164 '%s#%s' % (suri(k1_uri), k1g2), 165 '%s#%s' % (suri(k2_uri), k2g1), 166 '%s#%s' % (suri(k2_uri), k2g2)], 167 buckets_to_remove=[suri(bucket_uri)]) 168 169 # Use @Retry as hedge against bucket listing eventual consistency. 170 @Retry(AssertionError, tries=3, timeout_secs=1) 171 def _Check(): 172 # Bucket should no longer exist. 173 stderr = self.RunGsUtil(['ls', '-a', suri(bucket_uri)], 174 return_stderr=True, expected_status=1) 175 self.assertIn('bucket does not exist', stderr) 176 _Check() 177 178 def test_remove_all_versions_recursive_on_subdir(self): 179 """Test that 'rm -r' works on subdir.""" 180 bucket_uri = self.CreateVersionedBucket() 181 k1_uri = bucket_uri.clone_replace_name('dir/foo') 182 k2_uri = bucket_uri.clone_replace_name('dir/foo2') 183 k1_uri.set_contents_from_string('bar') 184 k2_uri.set_contents_from_string('bar2') 185 k1g1 = urigen(k1_uri) 186 k2g1 = urigen(k2_uri) 187 k1_uri.set_contents_from_string('baz') 188 k2_uri.set_contents_from_string('baz2') 189 k1g2 = urigen(k1_uri) 190 k2g2 = urigen(k2_uri) 191 192 self.AssertNObjectsInBucket(bucket_uri, 4, versioned=True) 193 194 self._RunRemoveCommandAndCheck( 195 ['rm', '-r', '%s' % suri(bucket_uri, 'dir')], 196 objects_to_remove=['%s#%s' % (suri(k1_uri), k1g1), 197 '%s#%s' % (suri(k1_uri), k1g2), 198 '%s#%s' % (suri(k2_uri), k2g1), 199 '%s#%s' % (suri(k2_uri), k2g2)]) 200 self.AssertNObjectsInBucket(bucket_uri, 0, versioned=True) 201 202 def test_missing_first_force(self): 203 bucket_uri = self.CreateBucket() 204 object_uri = self.CreateObject(bucket_uri=bucket_uri, object_name='present', 205 contents='foo') 206 self.AssertNObjectsInBucket(bucket_uri, 1) 207 self.RunGsUtil(['rm', '%s' % suri(bucket_uri, 'missing'), 208 suri(object_uri)], expected_status=1) 209 stderr = self.RunGsUtil( 210 ['rm', '-f', '%s' % suri(bucket_uri, 'missing'), suri(object_uri)], 211 return_stderr=True, expected_status=1) 212 self.assertEqual(stderr.count('Removing %s://' % self.default_provider), 1) 213 self.RunGsUtil(['stat', suri(object_uri)], expected_status=1) 214 215 def test_some_missing(self): 216 """Test that 'rm -a' fails when some but not all uris don't exist.""" 217 bucket_uri = self.CreateVersionedBucket() 218 key_uri = bucket_uri.clone_replace_name('foo') 219 key_uri.set_contents_from_string('bar') 220 self.AssertNObjectsInBucket(bucket_uri, 1, versioned=True) 221 stderr = self.RunGsUtil(['rm', '-a', suri(key_uri), 222 '%s' % suri(bucket_uri, 'missing')], 223 return_stderr=True, expected_status=1) 224 self.assertEqual(stderr.count('Removing %s://' % self.default_provider), 1) 225 self.assertIn('No URLs matched', stderr) 226 227 def test_some_missing_force(self): 228 """Test that 'rm -af' succeeds despite hidden first uri.""" 229 bucket_uri = self.CreateVersionedBucket() 230 key_uri = bucket_uri.clone_replace_name('foo') 231 key_uri.set_contents_from_string('bar') 232 self.AssertNObjectsInBucket(bucket_uri, 1, versioned=True) 233 stderr = self.RunGsUtil( 234 ['rm', '-af', suri(key_uri), '%s' % suri(bucket_uri, 'missing')], 235 return_stderr=True, expected_status=1) 236 self.assertEqual(stderr.count('Removing %s://' % self.default_provider), 1) 237 self.AssertNObjectsInBucket(bucket_uri, 0) 238 239 def test_folder_objects_deleted(self): 240 """Test for 'rm -r' of a folder with a dir_$folder$ marker.""" 241 bucket_uri = self.CreateVersionedBucket() 242 key_uri = bucket_uri.clone_replace_name('abc/o1') 243 key_uri.set_contents_from_string('foobar') 244 folder_uri = bucket_uri.clone_replace_name('abc_$folder$') 245 folder_uri.set_contents_from_string('') 246 self.AssertNObjectsInBucket(bucket_uri, 2, versioned=True) 247 # This could fail due to eventual listing consistency, so use retry and 248 # expected_status=None to guard against No URLs matched exceptions. 249 @Retry(AssertionError, tries=3, timeout_secs=1) 250 def _RemoveAndCheck(): 251 self.RunGsUtil(['rm', '-r', '%s' % suri(bucket_uri, 'abc')], 252 expected_status=None) 253 self.AssertNObjectsInBucket(bucket_uri, 0, versioned=True) 254 _RemoveAndCheck() 255 # Bucket should not be deleted (Should not get ServiceException). 256 bucket_uri.get_location(validate=False) 257 258 def test_folder_objects_deleted_with_wildcard(self): 259 """Test for 'rm -r' of a folder with a dir_$folder$ marker.""" 260 bucket_uri = self.CreateVersionedBucket() 261 key_uri = bucket_uri.clone_replace_name('abc/o1') 262 key_uri.set_contents_from_string('foobar') 263 folder_uri = bucket_uri.clone_replace_name('abc_$folder$') 264 folder_uri.set_contents_from_string('') 265 266 self.AssertNObjectsInBucket(bucket_uri, 2, versioned=True) 267 self._RunRemoveCommandAndCheck( 268 ['rm', '-r', '%s' % suri(bucket_uri, '**')], 269 objects_to_remove=['%s#%s' % (suri(key_uri), urigen(key_uri)), 270 '%s#%s' % (suri(folder_uri), urigen(folder_uri))]) 271 self.AssertNObjectsInBucket(bucket_uri, 0, versioned=True) 272 # Bucket should not be deleted (Should not get ServiceException). 273 bucket_uri.get_location(validate=False) 274 275 @SkipForS3('Listing/removing S3 DeleteMarkers is not supported') 276 def test_recursive_bucket_rm(self): 277 """Test for 'rm -r' of a bucket.""" 278 bucket_uri = self.CreateBucket() 279 object_uri = self.CreateObject(bucket_uri, contents='foo') 280 self.AssertNObjectsInBucket(bucket_uri, 1) 281 self._RunRemoveCommandAndCheck( 282 ['rm', '-r', suri(bucket_uri)], 283 objects_to_remove=['%s#%s' % (suri(object_uri), urigen(object_uri))], 284 buckets_to_remove=[suri(bucket_uri)]) 285 286 # Use @Retry as hedge against bucket listing eventual consistency. 287 @Retry(AssertionError, tries=3, timeout_secs=1) 288 def _Check1(): 289 # Bucket should be deleted. 290 stderr = self.RunGsUtil(['ls', '-Lb', suri(bucket_uri)], 291 return_stderr=True, expected_status=1) 292 self.assertIn('bucket does not exist', stderr) 293 _Check1() 294 295 # Now try same thing, but for a versioned bucket with multiple versions of 296 # an object present. 297 bucket_uri = self.CreateVersionedBucket() 298 self.CreateObject(bucket_uri, 'obj', 'z') 299 self.CreateObject(bucket_uri, 'obj', 'z') 300 final_uri = self.CreateObject(bucket_uri, 'obj', 'z') 301 self.AssertNObjectsInBucket(bucket_uri, 3, versioned=True) 302 self._RunRemoveCommandAndCheck(['rm', suri(bucket_uri, '**')], 303 objects_to_remove=['%s' % final_uri]) 304 305 stderr = self.RunGsUtil(['rb', suri(bucket_uri)], 306 return_stderr=True, expected_status=1) 307 self.assertIn('Bucket is not empty', stderr) 308 309 # Now try with rm -r. 310 @Retry(AssertionError, tries=3, timeout_secs=1) 311 def _Check2(): 312 self.RunGsUtil(['rm', '-r', suri(bucket_uri)]) 313 # Bucket should be deleted. 314 stderr = self.RunGsUtil(['ls', '-Lb', suri(bucket_uri)], 315 return_stderr=True, expected_status=1) 316 self.assertIn('bucket does not exist', stderr) 317 _Check2() 318 319 def test_recursive_bucket_rm_with_wildcarding(self): 320 """Tests removing all objects and buckets matching a bucket wildcard.""" 321 buri_base = 'gsutil-test-%s' % self.GetTestMethodName() 322 buri_base = buri_base[:MAX_BUCKET_LENGTH-20] 323 buri_base = '%s-%s' % (buri_base, self.MakeRandomTestString()) 324 buri1 = self.CreateBucket(bucket_name='%s-tbuck1' % buri_base) 325 buri2 = self.CreateBucket(bucket_name='%s-tbuck2' % buri_base) 326 buri3 = self.CreateBucket(bucket_name='%s-tb3' % buri_base) 327 ouri1 = self.CreateObject(bucket_uri=buri1, object_name='o1', contents='z') 328 ouri2 = self.CreateObject(bucket_uri=buri2, object_name='o2', contents='z') 329 self.CreateObject(bucket_uri=buri3, object_name='o3', contents='z') 330 331 self.AssertNObjectsInBucket(buri1, 1) 332 self.AssertNObjectsInBucket(buri2, 1) 333 self.AssertNObjectsInBucket(buri3, 1) 334 335 self._RunRemoveCommandAndCheck( 336 ['rm', '-r', '%s://%s-tbu*' % (self.default_provider, buri_base)], 337 objects_to_remove=['%s#%s' % (suri(ouri1), urigen(ouri1)), 338 '%s#%s' % (suri(ouri2), urigen(ouri2))], 339 buckets_to_remove=[suri(buri1), suri(buri2)]) 340 341 self.AssertNObjectsInBucket(buri3, 1) 342 343 def test_rm_quiet(self): 344 """Test that 'rm -q' outputs no progress indications.""" 345 bucket_uri = self.CreateBucket() 346 key_uri = self.CreateObject(bucket_uri=bucket_uri, contents='foo') 347 self.AssertNObjectsInBucket(bucket_uri, 1) 348 self._RunRemoveCommandAndCheck(['-q', 'rm', suri(key_uri)], []) 349 self.AssertNObjectsInBucket(bucket_uri, 0) 350 351 def test_rm_object_with_slash(self): 352 """Tests removing a bucket that has an object with a slash in it.""" 353 bucket_uri = self.CreateVersionedBucket() 354 ouri1 = self.CreateObject(bucket_uri=bucket_uri, 355 object_name='/dirwithslash/foo', contents='z') 356 ouri2 = self.CreateObject(bucket_uri=bucket_uri, 357 object_name='dirnoslash/foo', contents='z') 358 ouri3 = self.CreateObject(bucket_uri=bucket_uri, 359 object_name='dirnoslash/foo2', contents='z') 360 361 self.AssertNObjectsInBucket(bucket_uri, 3, versioned=True) 362 363 self._RunRemoveCommandAndCheck( 364 ['rm', '-r', suri(bucket_uri)], 365 objects_to_remove=['%s#%s' % (suri(ouri1), urigen(ouri1)), 366 '%s#%s' % (suri(ouri2), urigen(ouri2)), 367 '%s#%s' % (suri(ouri3), urigen(ouri3))], 368 buckets_to_remove=[suri(bucket_uri)]) 369 370 def test_slasher_horror_film(self): 371 """Tests removing a bucket with objects that are filled with slashes.""" 372 bucket_uri = self.CreateVersionedBucket() 373 ouri1 = self.CreateObject(bucket_uri=bucket_uri, 374 object_name='h/e/l//lo', 375 contents='Halloween') 376 ouri2 = self.CreateObject(bucket_uri=bucket_uri, 377 object_name='/h/e/l/l/o', 378 contents='A Nightmare on Elm Street') 379 ouri3 = self.CreateObject(bucket_uri=bucket_uri, 380 object_name='//h//e/l//l/o', 381 contents='Friday the 13th') 382 ouri4 = self.CreateObject(bucket_uri=bucket_uri, 383 object_name='//h//e//l//l//o', 384 contents='I Know What You Did Last Summer') 385 ouri5 = self.CreateObject(bucket_uri=bucket_uri, 386 object_name='/', 387 contents='Scream') 388 ouri6 = self.CreateObject(bucket_uri=bucket_uri, 389 object_name='//', 390 contents='Child\'s Play') 391 ouri7 = self.CreateObject(bucket_uri=bucket_uri, 392 object_name='///', 393 contents='The Prowler') 394 ouri8 = self.CreateObject(bucket_uri=bucket_uri, 395 object_name='////', 396 contents='Black Christmas') 397 ouri9 = self.CreateObject( 398 bucket_uri=bucket_uri, 399 object_name='everything/is/better/with/slashes///////', 400 contents='Maniac') 401 402 self.AssertNObjectsInBucket(bucket_uri, 9, versioned=True) 403 404 # We add a slash to URIs with a trailing slash, 405 # because ObjectToURI (suri) removes one trailing slash. 406 objects_to_remove = [ 407 '%s#%s' % (suri(ouri1), urigen(ouri1)), 408 '%s#%s' % (suri(ouri2), urigen(ouri2)), 409 '%s#%s' % (suri(ouri3), urigen(ouri3)), 410 '%s#%s' % (suri(ouri4), urigen(ouri4)), 411 '%s#%s' % (suri(ouri5) + '/', urigen(ouri5)), 412 '%s#%s' % (suri(ouri6) + '/', urigen(ouri6)), 413 '%s#%s' % (suri(ouri7) + '/', urigen(ouri7)), 414 '%s#%s' % (suri(ouri8) + '/', urigen(ouri8)), 415 '%s#%s' % (suri(ouri9) + '/', urigen(ouri9))] 416 417 self._RunRemoveCommandAndCheck(['-m', 'rm', '-r', suri(bucket_uri)], 418 objects_to_remove=objects_to_remove, 419 buckets_to_remove=[suri(bucket_uri)]) 420 421 @SkipForS3('GCS versioning headers not supported by S3') 422 def test_rm_failing_precondition(self): 423 """Test for '-h x-goog-if-generation-match:value rm' of an object.""" 424 bucket_uri = self.CreateBucket() 425 object_uri = self.CreateObject(bucket_uri, contents='foo') 426 stderr = self.RunGsUtil(['-h', 'x-goog-if-generation-match:12345', 'rm', 427 suri(object_uri)], return_stderr=True, 428 expected_status=1) 429 self.assertRegexpMatches( 430 stderr, r'PreconditionException: 412 Precondition\s*Failed') 431 432 def test_stdin_args(self): 433 """Tests rm with the -I option.""" 434 buri1 = self.CreateVersionedBucket() 435 ouri1 = self.CreateObject(bucket_uri=buri1, 436 object_name='foo', 437 contents='foocontents') 438 self.CreateObject(bucket_uri=buri1, object_name='bar', 439 contents='barcontents') 440 ouri3 = self.CreateObject(bucket_uri=buri1, 441 object_name='baz', 442 contents='bazcontents') 443 buri2 = self.CreateVersionedBucket() 444 ouri4 = self.CreateObject(bucket_uri=buri2, 445 object_name='moo', 446 contents='moocontents') 447 self.AssertNObjectsInBucket(buri1, 3, versioned=True) 448 self.AssertNObjectsInBucket(buri2, 1, versioned=True) 449 450 objects_to_remove = ['%s#%s' % (suri(ouri1), urigen(ouri1)), 451 '%s#%s' % (suri(ouri3), urigen(ouri3)), 452 '%s#%s' % (suri(ouri4), urigen(ouri4))] 453 stdin = '\n'.join(objects_to_remove) 454 self._RunRemoveCommandAndCheck(['rm', '-I'], 455 objects_to_remove=objects_to_remove, 456 stdin=stdin) 457 self.AssertNObjectsInBucket(buri1, 1, versioned=True) 458 self.AssertNObjectsInBucket(buri2, 0, versioned=True) 459 460 def test_rm_nonexistent_bucket_recursive(self): 461 stderr = self.RunGsUtil( 462 ['rm', '-rf', '%s://%s' % (self.default_provider, 463 self.nonexistent_bucket_name)], 464 return_stderr=True, expected_status=1) 465 self.assertIn('Encountered non-existent bucket', stderr) 466 467 def test_rm_multiple_nonexistent_objects(self): 468 bucket_uri = self.CreateBucket() 469 nonexistent_object1 = suri(bucket_uri, 'nonexistent1') 470 nonexistent_object2 = suri(bucket_uri, 'nonexistent1') 471 stderr = self.RunGsUtil( 472 ['rm', '-rf', nonexistent_object1, nonexistent_object2], 473 return_stderr=True, expected_status=1) 474 self.assertIn('2 files/objects could not be removed.', stderr) 475