1# -*- coding: utf-8 -*-
2# Copyright 2013 Google Inc. All Rights Reserved.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#     http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15"""Integration tests for rm command."""
16
17from __future__ import absolute_import
18
19import re
20
21import gslib.tests.testcase as testcase
22from gslib.tests.testcase.base import MAX_BUCKET_LENGTH
23from gslib.tests.testcase.integration_testcase import SkipForS3
24from gslib.tests.util import GenerationFromURI as urigen
25from gslib.tests.util import ObjectToURI as suri
26from gslib.util import Retry
27
28
29class TestRm(testcase.GsUtilIntegrationTestCase):
30  """Integration tests for rm command."""
31
32  def _RunRemoveCommandAndCheck(self, command_and_args, objects_to_remove=None,
33                                buckets_to_remove=None, stdin=None):
34    """Tests a remove command in the presence of eventual listing consistency.
35
36    Eventual listing consistency means that a remove command may not see all
37    of the objects to be removed at once. When removing multiple objects
38    (or buckets via -r), some calls may return no matches and multiple calls
39    to the rm command may be necessary to reach the desired state. This function
40    retries the rm command, incrementally tracking what has been removed and
41    ensuring that the exact set of objects/buckets are removed across all
42    retried calls.
43
44    The caller is responsible for confirming the existence of buckets/objects
45    prior to calling this function.
46
47    Args:
48      command_and_args: List of strings representing the rm command+args to run.
49      objects_to_remove: List of object URL strings (optionally including
50          generation) that should be removed by the command, if any.
51      buckets_to_remove: List of bucket URL strings that should be removed by
52         the command, if any.
53      stdin: String of data to pipe to the process as standard input (for
54         testing -I option).
55    """
56    cumulative_stderr_lines = set()
57    bucket_strings = []
58    for bucket_to_remove in buckets_to_remove or []:
59      bucket_strings.append('Removing %s/...' % bucket_to_remove)
60    object_strings = []
61    for object_to_remove in objects_to_remove or []:
62      object_strings.append('Removing %s...' % object_to_remove)
63    expected_stderr_lines = set(object_strings + bucket_strings)
64
65    @Retry(AssertionError, tries=5, timeout_secs=1)
66    def _RunRmCommandAndCheck():
67      """Runs the command with retries, updating+checking cumulative output."""
68      stderr = self.RunGsUtil(command_and_args, return_stderr=True,
69                              expected_status=None, stdin=stdin)
70      update_lines = True
71      # Retry 404's and 409's due to eventual listing consistency, but don't add
72      # the output to the set.
73      if ('No URLs matched' in stderr or
74          '409 BucketNotEmpty' in stderr or
75          '409 VersionedBucketNotEmpty' in stderr):
76        update_lines = False
77
78      # For recursive deletes of buckets, it is possible that the bucket is
79      # deleted before the objects are all present in the listing, in which case
80      # we will never see all of the expected "Removing object..." messages.
81      # Since this is still a successful outcome, just return successfully.
82      if '-r' in command_and_args and 'bucket does not exist' in stderr:
83        for bucket_to_remove in buckets_to_remove:
84          matching_bucket = re.match(r'.*404\s+%s\s+bucket does not exist' %
85                                     re.escape(bucket_to_remove), stderr)
86          if matching_bucket:
87            for line in cumulative_stderr_lines:
88              if 'Removing %s/...' % bucket_to_remove in line:
89                return
90            if 'Removing %s/...' % bucket_to_remove in stderr:
91              return
92
93      if update_lines:
94        cumulative_stderr_lines.update(set(stderr.splitlines()))
95
96      # Ensure all of the expected strings are present.
97      self.assertEqual(cumulative_stderr_lines, expected_stderr_lines)
98
99    _RunRmCommandAndCheck()
100
101  def test_all_versions_current(self):
102    """Test that 'rm -a' for an object with a current version works."""
103    bucket_uri = self.CreateVersionedBucket()
104    key_uri = bucket_uri.clone_replace_name('foo')
105    key_uri.set_contents_from_string('bar')
106    g1 = urigen(key_uri)
107    key_uri.set_contents_from_string('baz')
108    g2 = urigen(key_uri)
109    self.AssertNObjectsInBucket(bucket_uri, 2, versioned=True)
110    # Use @Retry as hedge against bucket listing eventual consistency.
111    @Retry(AssertionError, tries=3, timeout_secs=1)
112    def _Check1(stderr_lines):
113      stderr = self.RunGsUtil(['-m', 'rm', '-a', suri(key_uri)],
114                              return_stderr=True)
115      stderr_lines.update(set(stderr.splitlines()))
116      stderr = '\n'.join(stderr_lines)
117      self.assertEqual(stderr.count('Removing %s://' % self.default_provider),
118                       2)
119      self.assertIn('Removing %s#%s...' % (suri(key_uri), g1), stderr)
120      self.assertIn('Removing %s#%s...' % (suri(key_uri), g2), stderr)
121    all_stderr_lines = set()
122    _Check1(all_stderr_lines)
123    self.AssertNObjectsInBucket(bucket_uri, 0, versioned=True)
124
125  def test_all_versions_no_current(self):
126    """Test that 'rm -a' for an object without a current version works."""
127    bucket_uri = self.CreateVersionedBucket()
128    key_uri = bucket_uri.clone_replace_name('foo')
129    key_uri.set_contents_from_string('bar')
130    g1 = urigen(key_uri)
131    key_uri.set_contents_from_string('baz')
132    g2 = urigen(key_uri)
133    self._RunRemoveCommandAndCheck(
134        ['-m', 'rm', '-a', suri(key_uri)],
135        objects_to_remove=['%s#%s' % (suri(key_uri), g1),
136                           '%s#%s' % (suri(key_uri), g2)])
137    self.AssertNObjectsInBucket(bucket_uri, 0, versioned=True)
138
139  def test_fails_for_missing_obj(self):
140    bucket_uri = self.CreateVersionedBucket()
141    stderr = self.RunGsUtil(['rm', '-a', '%s' % suri(bucket_uri, 'foo')],
142                            return_stderr=True, expected_status=1)
143    self.assertIn('No URLs matched', stderr)
144
145  def test_remove_all_versions_recursive_on_bucket(self):
146    """Test that 'rm -r' works on bucket."""
147    bucket_uri = self.CreateVersionedBucket()
148    k1_uri = bucket_uri.clone_replace_name('foo')
149    k2_uri = bucket_uri.clone_replace_name('foo2')
150    k1_uri.set_contents_from_string('bar')
151    k2_uri.set_contents_from_string('bar2')
152    k1g1 = urigen(k1_uri)
153    k2g1 = urigen(k2_uri)
154    k1_uri.set_contents_from_string('baz')
155    k2_uri.set_contents_from_string('baz2')
156    k1g2 = urigen(k1_uri)
157    k2g2 = urigen(k2_uri)
158
159    self.AssertNObjectsInBucket(bucket_uri, 4, versioned=True)
160
161    self._RunRemoveCommandAndCheck(
162        ['rm', '-r', suri(bucket_uri)],
163        objects_to_remove=['%s#%s' % (suri(k1_uri), k1g1),
164                           '%s#%s' % (suri(k1_uri), k1g2),
165                           '%s#%s' % (suri(k2_uri), k2g1),
166                           '%s#%s' % (suri(k2_uri), k2g2)],
167        buckets_to_remove=[suri(bucket_uri)])
168
169    # Use @Retry as hedge against bucket listing eventual consistency.
170    @Retry(AssertionError, tries=3, timeout_secs=1)
171    def _Check():
172      # Bucket should no longer exist.
173      stderr = self.RunGsUtil(['ls', '-a', suri(bucket_uri)],
174                              return_stderr=True, expected_status=1)
175      self.assertIn('bucket does not exist', stderr)
176    _Check()
177
178  def test_remove_all_versions_recursive_on_subdir(self):
179    """Test that 'rm -r' works on subdir."""
180    bucket_uri = self.CreateVersionedBucket()
181    k1_uri = bucket_uri.clone_replace_name('dir/foo')
182    k2_uri = bucket_uri.clone_replace_name('dir/foo2')
183    k1_uri.set_contents_from_string('bar')
184    k2_uri.set_contents_from_string('bar2')
185    k1g1 = urigen(k1_uri)
186    k2g1 = urigen(k2_uri)
187    k1_uri.set_contents_from_string('baz')
188    k2_uri.set_contents_from_string('baz2')
189    k1g2 = urigen(k1_uri)
190    k2g2 = urigen(k2_uri)
191
192    self.AssertNObjectsInBucket(bucket_uri, 4, versioned=True)
193
194    self._RunRemoveCommandAndCheck(
195        ['rm', '-r', '%s' % suri(bucket_uri, 'dir')],
196        objects_to_remove=['%s#%s' % (suri(k1_uri), k1g1),
197                           '%s#%s' % (suri(k1_uri), k1g2),
198                           '%s#%s' % (suri(k2_uri), k2g1),
199                           '%s#%s' % (suri(k2_uri), k2g2)])
200    self.AssertNObjectsInBucket(bucket_uri, 0, versioned=True)
201
202  def test_missing_first_force(self):
203    bucket_uri = self.CreateBucket()
204    object_uri = self.CreateObject(bucket_uri=bucket_uri, object_name='present',
205                                   contents='foo')
206    self.AssertNObjectsInBucket(bucket_uri, 1)
207    self.RunGsUtil(['rm', '%s' % suri(bucket_uri, 'missing'),
208                    suri(object_uri)], expected_status=1)
209    stderr = self.RunGsUtil(
210        ['rm', '-f', '%s' % suri(bucket_uri, 'missing'), suri(object_uri)],
211        return_stderr=True, expected_status=1)
212    self.assertEqual(stderr.count('Removing %s://' % self.default_provider), 1)
213    self.RunGsUtil(['stat', suri(object_uri)], expected_status=1)
214
215  def test_some_missing(self):
216    """Test that 'rm -a' fails when some but not all uris don't exist."""
217    bucket_uri = self.CreateVersionedBucket()
218    key_uri = bucket_uri.clone_replace_name('foo')
219    key_uri.set_contents_from_string('bar')
220    self.AssertNObjectsInBucket(bucket_uri, 1, versioned=True)
221    stderr = self.RunGsUtil(['rm', '-a', suri(key_uri),
222                             '%s' % suri(bucket_uri, 'missing')],
223                            return_stderr=True, expected_status=1)
224    self.assertEqual(stderr.count('Removing %s://' % self.default_provider), 1)
225    self.assertIn('No URLs matched', stderr)
226
227  def test_some_missing_force(self):
228    """Test that 'rm -af' succeeds despite hidden first uri."""
229    bucket_uri = self.CreateVersionedBucket()
230    key_uri = bucket_uri.clone_replace_name('foo')
231    key_uri.set_contents_from_string('bar')
232    self.AssertNObjectsInBucket(bucket_uri, 1, versioned=True)
233    stderr = self.RunGsUtil(
234        ['rm', '-af', suri(key_uri), '%s' % suri(bucket_uri, 'missing')],
235        return_stderr=True, expected_status=1)
236    self.assertEqual(stderr.count('Removing %s://' % self.default_provider), 1)
237    self.AssertNObjectsInBucket(bucket_uri, 0)
238
239  def test_folder_objects_deleted(self):
240    """Test for 'rm -r' of a folder with a dir_$folder$ marker."""
241    bucket_uri = self.CreateVersionedBucket()
242    key_uri = bucket_uri.clone_replace_name('abc/o1')
243    key_uri.set_contents_from_string('foobar')
244    folder_uri = bucket_uri.clone_replace_name('abc_$folder$')
245    folder_uri.set_contents_from_string('')
246    self.AssertNObjectsInBucket(bucket_uri, 2, versioned=True)
247    # This could fail due to eventual listing consistency, so use retry and
248    # expected_status=None to guard against No URLs matched exceptions.
249    @Retry(AssertionError, tries=3, timeout_secs=1)
250    def _RemoveAndCheck():
251      self.RunGsUtil(['rm', '-r', '%s' % suri(bucket_uri, 'abc')],
252                     expected_status=None)
253      self.AssertNObjectsInBucket(bucket_uri, 0, versioned=True)
254    _RemoveAndCheck()
255    # Bucket should not be deleted (Should not get ServiceException).
256    bucket_uri.get_location(validate=False)
257
258  def test_folder_objects_deleted_with_wildcard(self):
259    """Test for 'rm -r' of a folder with a dir_$folder$ marker."""
260    bucket_uri = self.CreateVersionedBucket()
261    key_uri = bucket_uri.clone_replace_name('abc/o1')
262    key_uri.set_contents_from_string('foobar')
263    folder_uri = bucket_uri.clone_replace_name('abc_$folder$')
264    folder_uri.set_contents_from_string('')
265
266    self.AssertNObjectsInBucket(bucket_uri, 2, versioned=True)
267    self._RunRemoveCommandAndCheck(
268        ['rm', '-r', '%s' % suri(bucket_uri, '**')],
269        objects_to_remove=['%s#%s' % (suri(key_uri), urigen(key_uri)),
270                           '%s#%s' % (suri(folder_uri), urigen(folder_uri))])
271    self.AssertNObjectsInBucket(bucket_uri, 0, versioned=True)
272    # Bucket should not be deleted (Should not get ServiceException).
273    bucket_uri.get_location(validate=False)
274
275  @SkipForS3('Listing/removing S3 DeleteMarkers is not supported')
276  def test_recursive_bucket_rm(self):
277    """Test for 'rm -r' of a bucket."""
278    bucket_uri = self.CreateBucket()
279    object_uri = self.CreateObject(bucket_uri, contents='foo')
280    self.AssertNObjectsInBucket(bucket_uri, 1)
281    self._RunRemoveCommandAndCheck(
282        ['rm', '-r', suri(bucket_uri)],
283        objects_to_remove=['%s#%s' % (suri(object_uri), urigen(object_uri))],
284        buckets_to_remove=[suri(bucket_uri)])
285
286    # Use @Retry as hedge against bucket listing eventual consistency.
287    @Retry(AssertionError, tries=3, timeout_secs=1)
288    def _Check1():
289      # Bucket should be deleted.
290      stderr = self.RunGsUtil(['ls', '-Lb', suri(bucket_uri)],
291                              return_stderr=True, expected_status=1)
292      self.assertIn('bucket does not exist', stderr)
293    _Check1()
294
295    # Now try same thing, but for a versioned bucket with multiple versions of
296    # an object present.
297    bucket_uri = self.CreateVersionedBucket()
298    self.CreateObject(bucket_uri, 'obj', 'z')
299    self.CreateObject(bucket_uri, 'obj', 'z')
300    final_uri = self.CreateObject(bucket_uri, 'obj', 'z')
301    self.AssertNObjectsInBucket(bucket_uri, 3, versioned=True)
302    self._RunRemoveCommandAndCheck(['rm', suri(bucket_uri, '**')],
303                                   objects_to_remove=['%s' % final_uri])
304
305    stderr = self.RunGsUtil(['rb', suri(bucket_uri)],
306                            return_stderr=True, expected_status=1)
307    self.assertIn('Bucket is not empty', stderr)
308
309    # Now try with rm -r.
310    @Retry(AssertionError, tries=3, timeout_secs=1)
311    def _Check2():
312      self.RunGsUtil(['rm', '-r', suri(bucket_uri)])
313      # Bucket should be deleted.
314      stderr = self.RunGsUtil(['ls', '-Lb', suri(bucket_uri)],
315                              return_stderr=True, expected_status=1)
316      self.assertIn('bucket does not exist', stderr)
317    _Check2()
318
319  def test_recursive_bucket_rm_with_wildcarding(self):
320    """Tests removing all objects and buckets matching a bucket wildcard."""
321    buri_base = 'gsutil-test-%s' % self.GetTestMethodName()
322    buri_base = buri_base[:MAX_BUCKET_LENGTH-20]
323    buri_base = '%s-%s' % (buri_base, self.MakeRandomTestString())
324    buri1 = self.CreateBucket(bucket_name='%s-tbuck1' % buri_base)
325    buri2 = self.CreateBucket(bucket_name='%s-tbuck2' % buri_base)
326    buri3 = self.CreateBucket(bucket_name='%s-tb3' % buri_base)
327    ouri1 = self.CreateObject(bucket_uri=buri1, object_name='o1', contents='z')
328    ouri2 = self.CreateObject(bucket_uri=buri2, object_name='o2', contents='z')
329    self.CreateObject(bucket_uri=buri3, object_name='o3', contents='z')
330
331    self.AssertNObjectsInBucket(buri1, 1)
332    self.AssertNObjectsInBucket(buri2, 1)
333    self.AssertNObjectsInBucket(buri3, 1)
334
335    self._RunRemoveCommandAndCheck(
336        ['rm', '-r', '%s://%s-tbu*' % (self.default_provider, buri_base)],
337        objects_to_remove=['%s#%s' % (suri(ouri1), urigen(ouri1)),
338                           '%s#%s' % (suri(ouri2), urigen(ouri2))],
339        buckets_to_remove=[suri(buri1), suri(buri2)])
340
341    self.AssertNObjectsInBucket(buri3, 1)
342
343  def test_rm_quiet(self):
344    """Test that 'rm -q' outputs no progress indications."""
345    bucket_uri = self.CreateBucket()
346    key_uri = self.CreateObject(bucket_uri=bucket_uri, contents='foo')
347    self.AssertNObjectsInBucket(bucket_uri, 1)
348    self._RunRemoveCommandAndCheck(['-q', 'rm', suri(key_uri)], [])
349    self.AssertNObjectsInBucket(bucket_uri, 0)
350
351  def test_rm_object_with_slash(self):
352    """Tests removing a bucket that has an object with a slash in it."""
353    bucket_uri = self.CreateVersionedBucket()
354    ouri1 = self.CreateObject(bucket_uri=bucket_uri,
355                              object_name='/dirwithslash/foo', contents='z')
356    ouri2 = self.CreateObject(bucket_uri=bucket_uri,
357                              object_name='dirnoslash/foo', contents='z')
358    ouri3 = self.CreateObject(bucket_uri=bucket_uri,
359                              object_name='dirnoslash/foo2', contents='z')
360
361    self.AssertNObjectsInBucket(bucket_uri, 3, versioned=True)
362
363    self._RunRemoveCommandAndCheck(
364        ['rm', '-r', suri(bucket_uri)],
365        objects_to_remove=['%s#%s' % (suri(ouri1), urigen(ouri1)),
366                           '%s#%s' % (suri(ouri2), urigen(ouri2)),
367                           '%s#%s' % (suri(ouri3), urigen(ouri3))],
368        buckets_to_remove=[suri(bucket_uri)])
369
370  def test_slasher_horror_film(self):
371    """Tests removing a bucket with objects that are filled with slashes."""
372    bucket_uri = self.CreateVersionedBucket()
373    ouri1 = self.CreateObject(bucket_uri=bucket_uri,
374                              object_name='h/e/l//lo',
375                              contents='Halloween')
376    ouri2 = self.CreateObject(bucket_uri=bucket_uri,
377                              object_name='/h/e/l/l/o',
378                              contents='A Nightmare on Elm Street')
379    ouri3 = self.CreateObject(bucket_uri=bucket_uri,
380                              object_name='//h//e/l//l/o',
381                              contents='Friday the 13th')
382    ouri4 = self.CreateObject(bucket_uri=bucket_uri,
383                              object_name='//h//e//l//l//o',
384                              contents='I Know What You Did Last Summer')
385    ouri5 = self.CreateObject(bucket_uri=bucket_uri,
386                              object_name='/',
387                              contents='Scream')
388    ouri6 = self.CreateObject(bucket_uri=bucket_uri,
389                              object_name='//',
390                              contents='Child\'s Play')
391    ouri7 = self.CreateObject(bucket_uri=bucket_uri,
392                              object_name='///',
393                              contents='The Prowler')
394    ouri8 = self.CreateObject(bucket_uri=bucket_uri,
395                              object_name='////',
396                              contents='Black Christmas')
397    ouri9 = self.CreateObject(
398        bucket_uri=bucket_uri,
399        object_name='everything/is/better/with/slashes///////',
400        contents='Maniac')
401
402    self.AssertNObjectsInBucket(bucket_uri, 9, versioned=True)
403
404    # We add a slash to URIs with a trailing slash,
405    # because ObjectToURI (suri) removes one trailing slash.
406    objects_to_remove = [
407        '%s#%s' % (suri(ouri1), urigen(ouri1)),
408        '%s#%s' % (suri(ouri2), urigen(ouri2)),
409        '%s#%s' % (suri(ouri3), urigen(ouri3)),
410        '%s#%s' % (suri(ouri4), urigen(ouri4)),
411        '%s#%s' % (suri(ouri5) + '/', urigen(ouri5)),
412        '%s#%s' % (suri(ouri6) + '/', urigen(ouri6)),
413        '%s#%s' % (suri(ouri7) + '/', urigen(ouri7)),
414        '%s#%s' % (suri(ouri8) + '/', urigen(ouri8)),
415        '%s#%s' % (suri(ouri9) + '/', urigen(ouri9))]
416
417    self._RunRemoveCommandAndCheck(['-m', 'rm', '-r', suri(bucket_uri)],
418                                   objects_to_remove=objects_to_remove,
419                                   buckets_to_remove=[suri(bucket_uri)])
420
421  @SkipForS3('GCS versioning headers not supported by S3')
422  def test_rm_failing_precondition(self):
423    """Test for '-h x-goog-if-generation-match:value rm' of an object."""
424    bucket_uri = self.CreateBucket()
425    object_uri = self.CreateObject(bucket_uri, contents='foo')
426    stderr = self.RunGsUtil(['-h', 'x-goog-if-generation-match:12345', 'rm',
427                             suri(object_uri)], return_stderr=True,
428                            expected_status=1)
429    self.assertRegexpMatches(
430        stderr, r'PreconditionException: 412 Precondition\s*Failed')
431
432  def test_stdin_args(self):
433    """Tests rm with the -I option."""
434    buri1 = self.CreateVersionedBucket()
435    ouri1 = self.CreateObject(bucket_uri=buri1,
436                              object_name='foo',
437                              contents='foocontents')
438    self.CreateObject(bucket_uri=buri1, object_name='bar',
439                      contents='barcontents')
440    ouri3 = self.CreateObject(bucket_uri=buri1,
441                              object_name='baz',
442                              contents='bazcontents')
443    buri2 = self.CreateVersionedBucket()
444    ouri4 = self.CreateObject(bucket_uri=buri2,
445                              object_name='moo',
446                              contents='moocontents')
447    self.AssertNObjectsInBucket(buri1, 3, versioned=True)
448    self.AssertNObjectsInBucket(buri2, 1, versioned=True)
449
450    objects_to_remove = ['%s#%s' % (suri(ouri1), urigen(ouri1)),
451                         '%s#%s' % (suri(ouri3), urigen(ouri3)),
452                         '%s#%s' % (suri(ouri4), urigen(ouri4))]
453    stdin = '\n'.join(objects_to_remove)
454    self._RunRemoveCommandAndCheck(['rm', '-I'],
455                                   objects_to_remove=objects_to_remove,
456                                   stdin=stdin)
457    self.AssertNObjectsInBucket(buri1, 1, versioned=True)
458    self.AssertNObjectsInBucket(buri2, 0, versioned=True)
459
460  def test_rm_nonexistent_bucket_recursive(self):
461    stderr = self.RunGsUtil(
462        ['rm', '-rf', '%s://%s' % (self.default_provider,
463                                   self.nonexistent_bucket_name)],
464        return_stderr=True, expected_status=1)
465    self.assertIn('Encountered non-existent bucket', stderr)
466
467  def test_rm_multiple_nonexistent_objects(self):
468    bucket_uri = self.CreateBucket()
469    nonexistent_object1 = suri(bucket_uri, 'nonexistent1')
470    nonexistent_object2 = suri(bucket_uri, 'nonexistent1')
471    stderr = self.RunGsUtil(
472        ['rm', '-rf', nonexistent_object1, nonexistent_object2],
473        return_stderr=True, expected_status=1)
474    self.assertIn('2 files/objects could not be removed.', stderr)
475