1# -*- coding: utf-8 -*- 2# Copyright 2011 Google Inc. All Rights Reserved. 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15"""Implementation of Unix-like rm command for cloud storage providers.""" 16 17from __future__ import absolute_import 18 19from gslib.cloud_api import BucketNotFoundException 20from gslib.cloud_api import NotEmptyException 21from gslib.cloud_api import NotFoundException 22from gslib.cloud_api import ServiceException 23from gslib.command import Command 24from gslib.command import GetFailureCount 25from gslib.command import ResetFailureCount 26from gslib.command_argument import CommandArgument 27from gslib.cs_api_map import ApiSelector 28from gslib.exception import CommandException 29from gslib.name_expansion import NameExpansionIterator 30from gslib.storage_url import StorageUrlFromString 31from gslib.translation_helper import PreconditionsFromHeaders 32from gslib.util import GetCloudApiInstance 33from gslib.util import NO_MAX 34from gslib.util import Retry 35from gslib.util import StdinIterator 36 37 38_SYNOPSIS = """ 39 gsutil rm [-f] [-r] url... 40 gsutil rm [-f] [-r] -I 41""" 42 43_DETAILED_HELP_TEXT = (""" 44<B>SYNOPSIS</B> 45""" + _SYNOPSIS + """ 46 47 48<B>DESCRIPTION</B> 49 The gsutil rm command removes objects. 50 For example, the command: 51 52 gsutil rm gs://bucket/subdir/* 53 54 will remove all objects in gs://bucket/subdir, but not in any of its 55 sub-directories. In contrast: 56 57 gsutil rm gs://bucket/subdir/** 58 59 will remove all objects under gs://bucket/subdir or any of its 60 subdirectories. 61 62 You can also use the -r option to specify recursive object deletion. Thus, for 63 example, either of the following two commands will remove gs://bucket/subdir 64 and all objects and subdirectories under it: 65 66 gsutil rm gs://bucket/subdir** 67 gsutil rm -r gs://bucket/subdir 68 69 The -r option will also delete all object versions in the subdirectory for 70 versioning-enabled buckets, whereas the ** command will only delete the live 71 version of each object in the subdirectory. 72 73 Running gsutil rm -r on a bucket will delete all versions of all objects in 74 the bucket, and then delete the bucket: 75 76 gsutil rm -r gs://bucket 77 78 If you want to delete all objects in the bucket, but not the bucket itself, 79 this command will work: 80 81 gsutil rm gs://bucket/** 82 83 If you have a large number of objects to remove you might want to use the 84 gsutil -m option, to perform a parallel (multi-threaded/multi-processing) 85 removes: 86 87 gsutil -m rm -r gs://my_bucket/subdir 88 89 You can pass a list of URLs (one per line) to remove on stdin instead of as 90 command line arguments by using the -I option. This allows you to use gsutil 91 in a pipeline to remove objects identified by a program, such as: 92 93 some_program | gsutil -m rm -I 94 95 The contents of stdin can name cloud URLs and wildcards of cloud URLs. 96 97 Note that gsutil rm will refuse to remove files from the local 98 file system. For example this will fail: 99 100 gsutil rm *.txt 101 102 WARNING: Object removal cannot be undone. Google Cloud Storage is designed 103 to give developers a high amount of flexibility and control over their data, 104 and Google maintains strict controls over the processing and purging of 105 deleted data. To protect yourself from mistakes, you can configure object 106 versioning on your bucket(s). See 'gsutil help versions' for details. 107 108 109<B>DATA RESTORATION FROM ACCIDENTAL DELETION OR OVERWRITES</B> 110Google Cloud Storage does not provide support for restoring data lost 111or overwritten due to customer errors. If you have concerns that your 112application software (or your users) may at some point erroneously delete or 113overwrite data, you can protect yourself from that risk by enabling Object 114Versioning (see "gsutil help versioning"). Doing so increases storage costs, 115which can be partially mitigated by configuring Lifecycle Management to delete 116older object versions (see "gsutil help lifecycle"). 117 118 119<B>OPTIONS</B> 120 -f Continues silently (without printing error messages) despite 121 errors when removing multiple objects. If some of the objects 122 could not be removed, gsutil's exit status will be non-zero even 123 if this flag is set. This option is implicitly set when running 124 "gsutil -m rm ...". 125 126 -I Causes gsutil to read the list of objects to remove from stdin. 127 This allows you to run a program that generates the list of 128 objects to remove. 129 130 -R, -r Causes bucket or bucket subdirectory contents (all objects and 131 subdirectories that it contains) to be removed recursively. If 132 used with a bucket-only URL (like gs://bucket), after deleting 133 objects and subdirectories gsutil will delete the bucket. The -r 134 flag implies the -a flag and will delete all object versions. 135 136 -a Delete all versions of an object. 137""") 138 139 140def _RemoveExceptionHandler(cls, e): 141 """Simple exception handler to allow post-completion status.""" 142 if not cls.continue_on_error: 143 cls.logger.error(str(e)) 144 # TODO: Use shared state to track missing bucket names when we get a 145 # BucketNotFoundException. Then improve bucket removal logic and exception 146 # messages. 147 if isinstance(e, BucketNotFoundException): 148 cls.bucket_not_found_count += 1 149 cls.logger.error(str(e)) 150 else: 151 cls.op_failure_count += 1 152 153 154# pylint: disable=unused-argument 155def _RemoveFoldersExceptionHandler(cls, e): 156 """When removing folders, we don't mind if none exist.""" 157 if (isinstance(e, CommandException.__class__) and 158 'No URLs matched' in e.message) or isinstance(e, NotFoundException): 159 pass 160 else: 161 raise e 162 163 164def _RemoveFuncWrapper(cls, name_expansion_result, thread_state=None): 165 cls.RemoveFunc(name_expansion_result, thread_state=thread_state) 166 167 168class RmCommand(Command): 169 """Implementation of gsutil rm command.""" 170 171 # Command specification. See base class for documentation. 172 command_spec = Command.CreateCommandSpec( 173 'rm', 174 command_name_aliases=['del', 'delete', 'remove'], 175 usage_synopsis=_SYNOPSIS, 176 min_args=0, 177 max_args=NO_MAX, 178 supported_sub_args='afIrR', 179 file_url_ok=False, 180 provider_url_ok=False, 181 urls_start_arg=0, 182 gs_api_support=[ApiSelector.XML, ApiSelector.JSON], 183 gs_default_api=ApiSelector.JSON, 184 argparse_arguments=[ 185 CommandArgument.MakeZeroOrMoreCloudURLsArgument() 186 ] 187 ) 188 # Help specification. See help_provider.py for documentation. 189 help_spec = Command.HelpSpec( 190 help_name='rm', 191 help_name_aliases=['del', 'delete', 'remove'], 192 help_type='command_help', 193 help_one_line_summary='Remove objects', 194 help_text=_DETAILED_HELP_TEXT, 195 subcommand_help_text={}, 196 ) 197 198 def RunCommand(self): 199 """Command entry point for the rm command.""" 200 # self.recursion_requested is initialized in command.py (so it can be 201 # checked in parent class for all commands). 202 self.continue_on_error = self.parallel_operations 203 self.read_args_from_stdin = False 204 self.all_versions = False 205 if self.sub_opts: 206 for o, unused_a in self.sub_opts: 207 if o == '-a': 208 self.all_versions = True 209 elif o == '-f': 210 self.continue_on_error = True 211 elif o == '-I': 212 self.read_args_from_stdin = True 213 elif o == '-r' or o == '-R': 214 self.recursion_requested = True 215 self.all_versions = True 216 217 if self.read_args_from_stdin: 218 if self.args: 219 raise CommandException('No arguments allowed with the -I flag.') 220 url_strs = StdinIterator() 221 else: 222 if not self.args: 223 raise CommandException('The rm command (without -I) expects at ' 224 'least one URL.') 225 url_strs = self.args 226 227 # Tracks if any deletes failed. 228 self.op_failure_count = 0 229 230 # Tracks if any buckets were missing. 231 self.bucket_not_found_count = 0 232 233 bucket_urls_to_delete = [] 234 bucket_strings_to_delete = [] 235 if self.recursion_requested: 236 bucket_fields = ['id'] 237 for url_str in url_strs: 238 url = StorageUrlFromString(url_str) 239 if url.IsBucket() or url.IsProvider(): 240 for blr in self.WildcardIterator(url_str).IterBuckets( 241 bucket_fields=bucket_fields): 242 bucket_urls_to_delete.append(blr.storage_url) 243 bucket_strings_to_delete.append(url_str) 244 245 self.preconditions = PreconditionsFromHeaders(self.headers or {}) 246 247 try: 248 # Expand wildcards, dirs, buckets, and bucket subdirs in URLs. 249 name_expansion_iterator = NameExpansionIterator( 250 self.command_name, self.debug, self.logger, self.gsutil_api, 251 url_strs, self.recursion_requested, project_id=self.project_id, 252 all_versions=self.all_versions, 253 continue_on_error=self.continue_on_error or self.parallel_operations) 254 255 # Perform remove requests in parallel (-m) mode, if requested, using 256 # configured number of parallel processes and threads. Otherwise, 257 # perform requests with sequential function calls in current process. 258 self.Apply(_RemoveFuncWrapper, name_expansion_iterator, 259 _RemoveExceptionHandler, 260 fail_on_error=(not self.continue_on_error), 261 shared_attrs=['op_failure_count', 'bucket_not_found_count']) 262 263 # Assuming the bucket has versioning enabled, url's that don't map to 264 # objects should throw an error even with all_versions, since the prior 265 # round of deletes only sends objects to a history table. 266 # This assumption that rm -a is only called for versioned buckets should be 267 # corrected, but the fix is non-trivial. 268 except CommandException as e: 269 # Don't raise if there are buckets to delete -- it's valid to say: 270 # gsutil rm -r gs://some_bucket 271 # if the bucket is empty. 272 if not bucket_urls_to_delete and not self.continue_on_error: 273 raise 274 # Reset the failure count if we failed due to an empty bucket that we're 275 # going to delete. 276 msg = 'No URLs matched: ' 277 if msg in str(e): 278 parts = str(e).split(msg) 279 if len(parts) == 2 and parts[1] in bucket_strings_to_delete: 280 ResetFailureCount() 281 else: 282 raise 283 except ServiceException, e: 284 if not self.continue_on_error: 285 raise 286 287 if self.bucket_not_found_count: 288 raise CommandException('Encountered non-existent bucket during listing') 289 290 if self.op_failure_count and not self.continue_on_error: 291 raise CommandException('Some files could not be removed.') 292 293 # If this was a gsutil rm -r command covering any bucket subdirs, 294 # remove any dir_$folder$ objects (which are created by various web UI 295 # tools to simulate folders). 296 if self.recursion_requested: 297 had_previous_failures = GetFailureCount() > 0 298 folder_object_wildcards = [] 299 for url_str in url_strs: 300 url = StorageUrlFromString(url_str) 301 if url.IsObject(): 302 folder_object_wildcards.append('%s**_$folder$' % url_str) 303 if folder_object_wildcards: 304 self.continue_on_error = True 305 try: 306 name_expansion_iterator = NameExpansionIterator( 307 self.command_name, self.debug, 308 self.logger, self.gsutil_api, 309 folder_object_wildcards, self.recursion_requested, 310 project_id=self.project_id, 311 all_versions=self.all_versions) 312 # When we're removing folder objects, always continue on error 313 self.Apply(_RemoveFuncWrapper, name_expansion_iterator, 314 _RemoveFoldersExceptionHandler, 315 fail_on_error=False) 316 except CommandException as e: 317 # Ignore exception from name expansion due to an absent folder file. 318 if not e.reason.startswith('No URLs matched:'): 319 raise 320 if not had_previous_failures: 321 ResetFailureCount() 322 323 # Now that all data has been deleted, delete any bucket URLs. 324 for url in bucket_urls_to_delete: 325 self.logger.info('Removing %s...', url) 326 327 @Retry(NotEmptyException, tries=3, timeout_secs=1) 328 def BucketDeleteWithRetry(): 329 self.gsutil_api.DeleteBucket(url.bucket_name, provider=url.scheme) 330 331 BucketDeleteWithRetry() 332 333 if self.op_failure_count: 334 plural_str = 's' if self.op_failure_count else '' 335 raise CommandException('%d file%s/object%s could not be removed.' % ( 336 self.op_failure_count, plural_str, plural_str)) 337 338 return 0 339 340 def RemoveFunc(self, name_expansion_result, thread_state=None): 341 gsutil_api = GetCloudApiInstance(self, thread_state=thread_state) 342 343 exp_src_url = name_expansion_result.expanded_storage_url 344 self.logger.info('Removing %s...', exp_src_url) 345 gsutil_api.DeleteObject( 346 exp_src_url.bucket_name, exp_src_url.object_name, 347 preconditions=self.preconditions, generation=exp_src_url.generation, 348 provider=exp_src_url.scheme) 349 350