1# -*- coding: utf-8 -*-
2# Copyright 2012 Google Inc. All Rights Reserved.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#     http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15"""Additional help about wildcards."""
16
17from __future__ import absolute_import
18
19from gslib.help_provider import HelpProvider
20
21_DETAILED_HELP_TEXT = ("""
22<B>DESCRIPTION</B>
23  gsutil supports URI wildcards. For example, the command:
24
25    gsutil cp gs://bucket/data/abc* .
26
27  will copy all objects that start with gs://bucket/data/abc followed by any
28  number of characters within that subdirectory.
29
30
31<B>DIRECTORY BY DIRECTORY VS RECURSIVE WILDCARDS</B>
32  The "*" wildcard only matches up to the end of a path within
33  a subdirectory. For example, if bucket contains objects
34  named gs://bucket/data/abcd, gs://bucket/data/abcdef,
35  and gs://bucket/data/abcxyx, as well as an object in a sub-directory
36  (gs://bucket/data/abc/def) the above gsutil cp command would match the
37  first 3 object names but not the last one.
38
39  If you want matches to span directory boundaries, use a '**' wildcard:
40
41    gsutil cp gs://bucket/data/abc** .
42
43  will match all four objects above.
44
45  Note that gsutil supports the same wildcards for both objects and file names.
46  Thus, for example:
47
48    gsutil cp data/abc* gs://bucket
49
50  will match all names in the local file system. Most command shells also
51  support wildcarding, so if you run the above command probably your shell
52  is expanding the matches before running gsutil. However, most shells do not
53  support recursive wildcards ('**'), and you can cause gsutil's wildcarding
54  support to work for such shells by single-quoting the arguments so they
55  don't get interpreted by the shell before being passed to gsutil:
56
57    gsutil cp 'data/abc**' gs://bucket
58
59
60<B>BUCKET WILDCARDS</B>
61  You can specify wildcards for bucket names within a single project. For
62  example:
63
64    gsutil ls gs://data*.example.com
65
66  will list the contents of all buckets whose name starts with "data" and
67  ends with ".example.com" in the default project. The -p option can be used
68  to specify a project other than the default.  For example:
69
70    gsutil ls -p other-project gs://data*.example.com
71
72  You can also combine bucket and object name wildcards. For example this
73  command will remove all ".txt" files in any of your Google Cloud Storage
74  buckets in the default project:
75
76    gsutil rm gs://*/**.txt
77
78
79<B>OTHER WILDCARD CHARACTERS</B>
80  In addition to '*', you can use these wildcards:
81
82  ?
83    Matches a single character. For example "gs://bucket/??.txt"
84    only matches objects with two characters followed by .txt.
85
86  [chars]
87    Match any of the specified characters. For example
88    "gs://bucket/[aeiou].txt" matches objects that contain a single vowel
89    character followed by .txt
90
91  [char range]
92    Match any of the range of characters. For example
93    "gs://bucket/[a-m].txt" matches objects that contain letters
94    a, b, c, ... or m, and end with .txt.
95
96  You can combine wildcards to provide more powerful matches, for example:
97
98    gs://bucket/[a-m]??.j*g
99
100
101<B>DIFFERENT BEHAVIOR FOR "DOT" FILES IN LOCAL FILE SYSTEM</B>
102  Per standard Unix behavior, the wildcard "*" only matches files that don't
103  start with a "." character (to avoid confusion with the "." and ".."
104  directories present in all Unix directories). gsutil provides this same
105  behavior when using wildcards over a file system URI, but does not provide
106  this behavior over cloud URIs. For example, the following command will copy
107  all objects from gs://bucket1 to gs://bucket2:
108
109    gsutil cp gs://bucket1/* gs://bucket2
110
111  but the following command will copy only files that don't start with a "."
112  from the directory "dir" to gs://bucket1:
113
114    gsutil cp dir/* gs://bucket1
115
116
117<B>EFFICIENCY CONSIDERATION: USING WILDCARDS OVER MANY OBJECTS</B>
118  It is more efficient, faster, and less network traffic-intensive
119  to use wildcards that have a non-wildcard object-name prefix, like:
120
121    gs://bucket/abc*.txt
122
123  than it is to use wildcards as the first part of the object name, like:
124
125    gs://bucket/*abc.txt
126
127  This is because the request for "gs://bucket/abc*.txt" asks the server to send
128  back the subset of results whose object name start with "abc" at the bucket
129  root, and then gsutil filters the result list for objects whose name ends with
130  ".txt".  In contrast, "gs://bucket/*abc.txt" asks the server for the complete
131  list of objects in the bucket root, and then filters for those objects whose
132  name ends with "abc.txt". This efficiency consideration becomes increasingly
133  noticeable when you use buckets containing thousands or more objects. It is
134  sometimes possible to set up the names of your objects to fit with expected
135  wildcard matching patterns, to take advantage of the efficiency of doing
136  server-side prefix requests. See, for example "gsutil help prod" for a
137  concrete use case example.
138
139
140<B>EFFICIENCY CONSIDERATION: USING MID-PATH WILDCARDS</B>
141  Suppose you have a bucket with these objects:
142
143    gs://bucket/obj1
144    gs://bucket/obj2
145    gs://bucket/obj3
146    gs://bucket/obj4
147    gs://bucket/dir1/obj5
148    gs://bucket/dir2/obj6
149
150  If you run the command:
151
152    gsutil ls gs://bucket/*/obj5
153
154  gsutil will perform a /-delimited top-level bucket listing and then one bucket
155  listing for each subdirectory, for a total of 3 bucket listings:
156
157    GET /bucket/?delimiter=/
158    GET /bucket/?prefix=dir1/obj5&delimiter=/
159    GET /bucket/?prefix=dir2/obj5&delimiter=/
160
161  The more bucket listings your wildcard requires, the slower and more expensive
162  it will be. The number of bucket listings required grows as:
163
164  - the number of wildcard components (e.g., "gs://bucket/a??b/c*/*/d"
165    has 3 wildcard components);
166  - the number of subdirectories that match each component; and
167  - the number of results (pagination is implemented using one GET
168    request per 1000 results, specifying markers for each).
169
170  If you want to use a mid-path wildcard, you might try instead using a
171  recursive wildcard, for example:
172
173    gsutil ls gs://bucket/**/obj5
174
175  This will match more objects than "gs://bucket/*/obj5" (since it spans
176  directories), but is implemented using a delimiter-less bucket listing
177  request (which means fewer bucket requests, though it will list the entire
178  bucket and filter locally, so that could require a non-trivial amount of
179  network traffic).
180""")
181
182
183class CommandOptions(HelpProvider):
184  """Additional help about wildcards."""
185
186  # Help specification. See help_provider.py for documentation.
187  help_spec = HelpProvider.HelpSpec(
188      help_name='wildcards',
189      help_name_aliases=['wildcard', '*', '**'],
190      help_type='additional_help',
191      help_one_line_summary='Wildcard Names',
192      help_text=_DETAILED_HELP_TEXT,
193      subcommand_help_text={},
194  )
195