1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/i18n/file_util_icu.h"
6
7 #include <stddef.h>
8
9 #include "base/files/file_util.h"
10 #include "base/macros.h"
11 #include "base/strings/utf_string_conversions.h"
12 #include "build/build_config.h"
13 #include "testing/gtest/include/gtest/gtest.h"
14 #include "testing/platform_test.h"
15
16 namespace base {
17 namespace i18n {
18
19 // file_util winds up using autoreleased objects on the Mac, so this needs
20 // to be a PlatformTest
21 class FileUtilICUTest : public PlatformTest {
22 };
23
24 #if defined(OS_POSIX) && !defined(OS_MACOSX)
25
26 // On linux, file path is parsed and filtered as UTF-8.
27 static const struct GoodBadPairLinux {
28 const char* bad_name;
29 const char* good_name;
30 } kLinuxIllegalCharacterCases[] = {
31 {"bad*\\/file:name?.jpg", "bad---file-name-.jpg"},
32 {"**********::::.txt", "--------------.txt"},
33 {"\xe9\xf0zzzz.\xff", "\xe9\xf0zzzz.\xff"},
34 {" _ ", "-_-"},
35 {".", "-"},
36 {" .( ). ", "-.( ).-"},
37 {" ", "- -"},
38 };
39
TEST_F(FileUtilICUTest,ReplaceIllegalCharactersInPathLinuxTest)40 TEST_F(FileUtilICUTest, ReplaceIllegalCharactersInPathLinuxTest) {
41 for (size_t i = 0; i < arraysize(kLinuxIllegalCharacterCases); ++i) {
42 std::string bad_name(kLinuxIllegalCharacterCases[i].bad_name);
43 ReplaceIllegalCharactersInPath(&bad_name, '-');
44 EXPECT_EQ(kLinuxIllegalCharacterCases[i].good_name, bad_name);
45 }
46 }
47
48 #endif
49
50 // For Mac & Windows, which both do Unicode validation on filenames. These
51 // characters are given as wide strings since its more convenient to specify
52 // unicode characters. For Mac they should be converted to UTF-8.
53 static const struct goodbad_pair {
54 const wchar_t* bad_name;
55 const wchar_t* good_name;
56 } kIllegalCharacterCases[] = {
57 {L"bad*file:name?.jpg", L"bad-file-name-.jpg"},
58 {L"**********::::.txt", L"--------------.txt"},
59 // We can't use UCNs (universal character names) for C0/C1 characters and
60 // U+007F, but \x escape is interpreted by MSVC and gcc as we intend.
61 {L"bad\x0003\x0091 file\u200E\u200Fname.png", L"bad-- file--name.png"},
62 {L"bad*file\\?name.jpg", L"bad-file--name.jpg"},
63 {L"\t bad*file\\name/.jpg", L"- bad-file-name-.jpg"},
64 {L"this_file_name is okay!.mp3", L"this_file_name is okay!.mp3"},
65 {L"\u4E00\uAC00.mp3", L"\u4E00\uAC00.mp3"},
66 {L"\u0635\u200C\u0644.mp3", L"\u0635-\u0644.mp3"},
67 {L"\U00010330\U00010331.mp3", L"\U00010330\U00010331.mp3"},
68 // Unassigned codepoints are ok.
69 {L"\u0378\U00040001.mp3", L"\u0378\U00040001.mp3"},
70 // Non-characters are not allowed.
71 {L"bad\uFFFFfile\U0010FFFEname.jpg", L"bad-file-name.jpg"},
72 {L"bad\uFDD0file\uFDEFname.jpg", L"bad-file-name.jpg"},
73 // CVE-2014-9390
74 {L"(\u200C.\u200D.\u200E.\u200F.\u202A.\u202B.\u202C.\u202D.\u202E.\u206A."
75 L"\u206B.\u206C.\u206D.\u206F.\uFEFF)",
76 L"(-.-.-.-.-.-.-.-.-.-.-.-.-.-.-)"},
77 {L"config~1", L"config-1"},
78 {L" _ ", L"-_-"},
79 {L" ", L"-"},
80 {L"\u2008.(\u2007).\u3000", L"-.(\u2007).-"},
81 {L" ", L"- -"},
82 {L". ", L"- -"}
83 };
84
85 #if defined(OS_WIN) || defined(OS_MACOSX) || defined(OS_POSIX)
86
TEST_F(FileUtilICUTest,ReplaceIllegalCharactersInPathTest)87 TEST_F(FileUtilICUTest, ReplaceIllegalCharactersInPathTest) {
88 for (size_t i = 0; i < arraysize(kIllegalCharacterCases); ++i) {
89 #if defined(OS_WIN)
90 std::wstring bad_name(kIllegalCharacterCases[i].bad_name);
91 ReplaceIllegalCharactersInPath(&bad_name, '-');
92 EXPECT_EQ(kIllegalCharacterCases[i].good_name, bad_name);
93 #else
94 std::string bad_name(WideToUTF8(kIllegalCharacterCases[i].bad_name));
95 ReplaceIllegalCharactersInPath(&bad_name, '-');
96 EXPECT_EQ(WideToUTF8(kIllegalCharacterCases[i].good_name), bad_name);
97 #endif
98 }
99 }
100
101 #endif
102
TEST_F(FileUtilICUTest,IsFilenameLegalTest)103 TEST_F(FileUtilICUTest, IsFilenameLegalTest) {
104 EXPECT_TRUE(IsFilenameLegal(string16()));
105
106 for (const auto& test_case : kIllegalCharacterCases) {
107 string16 bad_name = WideToUTF16(test_case.bad_name);
108 string16 good_name = WideToUTF16(test_case.good_name);
109
110 EXPECT_TRUE(IsFilenameLegal(good_name)) << good_name;
111 if (good_name != bad_name)
112 EXPECT_FALSE(IsFilenameLegal(bad_name)) << bad_name;
113 }
114 }
115
116 #if defined(OS_CHROMEOS)
117 static const struct normalize_name_encoding_test_cases {
118 const char* original_path;
119 const char* normalized_path;
120 } kNormalizeFileNameEncodingTestCases[] = {
121 { "foo_na\xcc\x88me.foo", "foo_n\xc3\xa4me.foo"},
122 { "foo_dir_na\xcc\x88me/foo_na\xcc\x88me.foo",
123 "foo_dir_na\xcc\x88me/foo_n\xc3\xa4me.foo"},
124 { "", ""},
125 { "foo_dir_na\xcc\x88me/", "foo_dir_n\xc3\xa4me"}
126 };
127
TEST_F(FileUtilICUTest,NormalizeFileNameEncoding)128 TEST_F(FileUtilICUTest, NormalizeFileNameEncoding) {
129 for (size_t i = 0; i < arraysize(kNormalizeFileNameEncodingTestCases); i++) {
130 FilePath path(kNormalizeFileNameEncodingTestCases[i].original_path);
131 NormalizeFileNameEncoding(&path);
132 EXPECT_EQ(FilePath(kNormalizeFileNameEncodingTestCases[i].normalized_path),
133 path);
134 }
135 }
136
137 #endif
138
139 } // namespace i18n
140 } // namespace base
141