1# -*- coding: utf-8 -*-
2# Copyright 2011 Google Inc. All Rights Reserved.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#     http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15"""Implementation of Unix-like rm command for cloud storage providers."""
16
17from __future__ import absolute_import
18
19from gslib.cloud_api import BucketNotFoundException
20from gslib.cloud_api import NotEmptyException
21from gslib.cloud_api import NotFoundException
22from gslib.cloud_api import ServiceException
23from gslib.command import Command
24from gslib.command import GetFailureCount
25from gslib.command import ResetFailureCount
26from gslib.command_argument import CommandArgument
27from gslib.cs_api_map import ApiSelector
28from gslib.exception import CommandException
29from gslib.name_expansion import NameExpansionIterator
30from gslib.storage_url import StorageUrlFromString
31from gslib.translation_helper import PreconditionsFromHeaders
32from gslib.util import GetCloudApiInstance
33from gslib.util import NO_MAX
34from gslib.util import Retry
35from gslib.util import StdinIterator
36
37
38_SYNOPSIS = """
39  gsutil rm [-f] [-r] url...
40  gsutil rm [-f] [-r] -I
41"""
42
43_DETAILED_HELP_TEXT = ("""
44<B>SYNOPSIS</B>
45""" + _SYNOPSIS + """
46
47
48<B>DESCRIPTION</B>
49  The gsutil rm command removes objects.
50  For example, the command:
51
52    gsutil rm gs://bucket/subdir/*
53
54  will remove all objects in gs://bucket/subdir, but not in any of its
55  sub-directories. In contrast:
56
57    gsutil rm gs://bucket/subdir/**
58
59  will remove all objects under gs://bucket/subdir or any of its
60  subdirectories.
61
62  You can also use the -r option to specify recursive object deletion. Thus, for
63  example, either of the following two commands will remove gs://bucket/subdir
64  and all objects and subdirectories under it:
65
66    gsutil rm gs://bucket/subdir**
67    gsutil rm -r gs://bucket/subdir
68
69  The -r option will also delete all object versions in the subdirectory for
70  versioning-enabled buckets, whereas the ** command will only delete the live
71  version of each object in the subdirectory.
72
73  Running gsutil rm -r on a bucket will delete all versions of all objects in
74  the bucket, and then delete the bucket:
75
76    gsutil rm -r gs://bucket
77
78  If you want to delete all objects in the bucket, but not the bucket itself,
79  this command will work:
80
81    gsutil rm gs://bucket/**
82
83  If you have a large number of objects to remove you might want to use the
84  gsutil -m option, to perform a parallel (multi-threaded/multi-processing)
85  removes:
86
87    gsutil -m rm -r gs://my_bucket/subdir
88
89  You can pass a list of URLs (one per line) to remove on stdin instead of as
90  command line arguments by using the -I option. This allows you to use gsutil
91  in a pipeline to remove objects identified by a program, such as:
92
93    some_program | gsutil -m rm -I
94
95  The contents of stdin can name cloud URLs and wildcards of cloud URLs.
96
97  Note that gsutil rm will refuse to remove files from the local
98  file system. For example this will fail:
99
100    gsutil rm *.txt
101
102  WARNING: Object removal cannot be undone. Google Cloud Storage is designed
103  to give developers a high amount of flexibility and control over their data,
104  and Google maintains strict controls over the processing and purging of
105  deleted data. To protect yourself from mistakes, you can configure object
106  versioning on your bucket(s). See 'gsutil help versions' for details.
107
108
109<B>DATA RESTORATION FROM ACCIDENTAL DELETION OR OVERWRITES</B>
110Google Cloud Storage does not provide support for restoring data lost
111or overwritten due to customer errors. If you have concerns that your
112application software (or your users) may at some point erroneously delete or
113overwrite data, you can protect yourself from that risk by enabling Object
114Versioning (see "gsutil help versioning"). Doing so increases storage costs,
115which can be partially mitigated by configuring Lifecycle Management to delete
116older object versions (see "gsutil help lifecycle").
117
118
119<B>OPTIONS</B>
120  -f          Continues silently (without printing error messages) despite
121              errors when removing multiple objects. If some of the objects
122              could not be removed, gsutil's exit status will be non-zero even
123              if this flag is set. This option is implicitly set when running
124              "gsutil -m rm ...".
125
126  -I          Causes gsutil to read the list of objects to remove from stdin.
127              This allows you to run a program that generates the list of
128              objects to remove.
129
130  -R, -r      Causes bucket or bucket subdirectory contents (all objects and
131              subdirectories that it contains) to be removed recursively. If
132              used with a bucket-only URL (like gs://bucket), after deleting
133              objects and subdirectories gsutil will delete the bucket.  The -r
134              flag implies the -a flag and will delete all object versions.
135
136  -a          Delete all versions of an object.
137""")
138
139
140def _RemoveExceptionHandler(cls, e):
141  """Simple exception handler to allow post-completion status."""
142  if not cls.continue_on_error:
143    cls.logger.error(str(e))
144  # TODO: Use shared state to track missing bucket names when we get a
145  # BucketNotFoundException. Then improve bucket removal logic and exception
146  # messages.
147  if isinstance(e, BucketNotFoundException):
148    cls.bucket_not_found_count += 1
149    cls.logger.error(str(e))
150  else:
151    cls.op_failure_count += 1
152
153
154# pylint: disable=unused-argument
155def _RemoveFoldersExceptionHandler(cls, e):
156  """When removing folders, we don't mind if none exist."""
157  if (isinstance(e, CommandException.__class__) and
158      'No URLs matched' in e.message) or isinstance(e, NotFoundException):
159    pass
160  else:
161    raise e
162
163
164def _RemoveFuncWrapper(cls, name_expansion_result, thread_state=None):
165  cls.RemoveFunc(name_expansion_result, thread_state=thread_state)
166
167
168class RmCommand(Command):
169  """Implementation of gsutil rm command."""
170
171  # Command specification. See base class for documentation.
172  command_spec = Command.CreateCommandSpec(
173      'rm',
174      command_name_aliases=['del', 'delete', 'remove'],
175      usage_synopsis=_SYNOPSIS,
176      min_args=0,
177      max_args=NO_MAX,
178      supported_sub_args='afIrR',
179      file_url_ok=False,
180      provider_url_ok=False,
181      urls_start_arg=0,
182      gs_api_support=[ApiSelector.XML, ApiSelector.JSON],
183      gs_default_api=ApiSelector.JSON,
184      argparse_arguments=[
185          CommandArgument.MakeZeroOrMoreCloudURLsArgument()
186      ]
187  )
188  # Help specification. See help_provider.py for documentation.
189  help_spec = Command.HelpSpec(
190      help_name='rm',
191      help_name_aliases=['del', 'delete', 'remove'],
192      help_type='command_help',
193      help_one_line_summary='Remove objects',
194      help_text=_DETAILED_HELP_TEXT,
195      subcommand_help_text={},
196  )
197
198  def RunCommand(self):
199    """Command entry point for the rm command."""
200    # self.recursion_requested is initialized in command.py (so it can be
201    # checked in parent class for all commands).
202    self.continue_on_error = self.parallel_operations
203    self.read_args_from_stdin = False
204    self.all_versions = False
205    if self.sub_opts:
206      for o, unused_a in self.sub_opts:
207        if o == '-a':
208          self.all_versions = True
209        elif o == '-f':
210          self.continue_on_error = True
211        elif o == '-I':
212          self.read_args_from_stdin = True
213        elif o == '-r' or o == '-R':
214          self.recursion_requested = True
215          self.all_versions = True
216
217    if self.read_args_from_stdin:
218      if self.args:
219        raise CommandException('No arguments allowed with the -I flag.')
220      url_strs = StdinIterator()
221    else:
222      if not self.args:
223        raise CommandException('The rm command (without -I) expects at '
224                               'least one URL.')
225      url_strs = self.args
226
227    # Tracks if any deletes failed.
228    self.op_failure_count = 0
229
230    # Tracks if any buckets were missing.
231    self.bucket_not_found_count = 0
232
233    bucket_urls_to_delete = []
234    bucket_strings_to_delete = []
235    if self.recursion_requested:
236      bucket_fields = ['id']
237      for url_str in url_strs:
238        url = StorageUrlFromString(url_str)
239        if url.IsBucket() or url.IsProvider():
240          for blr in self.WildcardIterator(url_str).IterBuckets(
241              bucket_fields=bucket_fields):
242            bucket_urls_to_delete.append(blr.storage_url)
243            bucket_strings_to_delete.append(url_str)
244
245    self.preconditions = PreconditionsFromHeaders(self.headers or {})
246
247    try:
248      # Expand wildcards, dirs, buckets, and bucket subdirs in URLs.
249      name_expansion_iterator = NameExpansionIterator(
250          self.command_name, self.debug, self.logger, self.gsutil_api,
251          url_strs, self.recursion_requested, project_id=self.project_id,
252          all_versions=self.all_versions,
253          continue_on_error=self.continue_on_error or self.parallel_operations)
254
255      # Perform remove requests in parallel (-m) mode, if requested, using
256      # configured number of parallel processes and threads. Otherwise,
257      # perform requests with sequential function calls in current process.
258      self.Apply(_RemoveFuncWrapper, name_expansion_iterator,
259                 _RemoveExceptionHandler,
260                 fail_on_error=(not self.continue_on_error),
261                 shared_attrs=['op_failure_count', 'bucket_not_found_count'])
262
263    # Assuming the bucket has versioning enabled, url's that don't map to
264    # objects should throw an error even with all_versions, since the prior
265    # round of deletes only sends objects to a history table.
266    # This assumption that rm -a is only called for versioned buckets should be
267    # corrected, but the fix is non-trivial.
268    except CommandException as e:
269      # Don't raise if there are buckets to delete -- it's valid to say:
270      #   gsutil rm -r gs://some_bucket
271      # if the bucket is empty.
272      if not bucket_urls_to_delete and not self.continue_on_error:
273        raise
274      # Reset the failure count if we failed due to an empty bucket that we're
275      # going to delete.
276      msg = 'No URLs matched: '
277      if msg in str(e):
278        parts = str(e).split(msg)
279        if len(parts) == 2 and parts[1] in bucket_strings_to_delete:
280          ResetFailureCount()
281        else:
282          raise
283    except ServiceException, e:
284      if not self.continue_on_error:
285        raise
286
287    if self.bucket_not_found_count:
288      raise CommandException('Encountered non-existent bucket during listing')
289
290    if self.op_failure_count and not self.continue_on_error:
291      raise CommandException('Some files could not be removed.')
292
293    # If this was a gsutil rm -r command covering any bucket subdirs,
294    # remove any dir_$folder$ objects (which are created by various web UI
295    # tools to simulate folders).
296    if self.recursion_requested:
297      had_previous_failures = GetFailureCount() > 0
298      folder_object_wildcards = []
299      for url_str in url_strs:
300        url = StorageUrlFromString(url_str)
301        if url.IsObject():
302          folder_object_wildcards.append('%s**_$folder$' % url_str)
303      if folder_object_wildcards:
304        self.continue_on_error = True
305        try:
306          name_expansion_iterator = NameExpansionIterator(
307              self.command_name, self.debug,
308              self.logger, self.gsutil_api,
309              folder_object_wildcards, self.recursion_requested,
310              project_id=self.project_id,
311              all_versions=self.all_versions)
312          # When we're removing folder objects, always continue on error
313          self.Apply(_RemoveFuncWrapper, name_expansion_iterator,
314                     _RemoveFoldersExceptionHandler,
315                     fail_on_error=False)
316        except CommandException as e:
317          # Ignore exception from name expansion due to an absent folder file.
318          if not e.reason.startswith('No URLs matched:'):
319            raise
320        if not had_previous_failures:
321          ResetFailureCount()
322
323    # Now that all data has been deleted, delete any bucket URLs.
324    for url in bucket_urls_to_delete:
325      self.logger.info('Removing %s...', url)
326
327      @Retry(NotEmptyException, tries=3, timeout_secs=1)
328      def BucketDeleteWithRetry():
329        self.gsutil_api.DeleteBucket(url.bucket_name, provider=url.scheme)
330
331      BucketDeleteWithRetry()
332
333    if self.op_failure_count:
334      plural_str = 's' if self.op_failure_count else ''
335      raise CommandException('%d file%s/object%s could not be removed.' % (
336          self.op_failure_count, plural_str, plural_str))
337
338    return 0
339
340  def RemoveFunc(self, name_expansion_result, thread_state=None):
341    gsutil_api = GetCloudApiInstance(self, thread_state=thread_state)
342
343    exp_src_url = name_expansion_result.expanded_storage_url
344    self.logger.info('Removing %s...', exp_src_url)
345    gsutil_api.DeleteObject(
346        exp_src_url.bucket_name, exp_src_url.object_name,
347        preconditions=self.preconditions, generation=exp_src_url.generation,
348        provider=exp_src_url.scheme)
349
350