1 //===- llvm/unittest/Support/RegexTest.cpp - Regex tests --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "llvm/Support/Regex.h"
10 #include "llvm/ADT/SmallVector.h"
11 #include "gtest/gtest.h"
12 #include <cstring>
13
14 using namespace llvm;
15 namespace {
16
17 class RegexTest : public ::testing::Test {
18 };
19
TEST_F(RegexTest,Basics)20 TEST_F(RegexTest, Basics) {
21 Regex r1("^[0-9]+$");
22 EXPECT_TRUE(r1.match("916"));
23 EXPECT_TRUE(r1.match("9"));
24 EXPECT_FALSE(r1.match("9a"));
25
26 SmallVector<StringRef, 1> Matches;
27 Regex r2("[0-9]+");
28 EXPECT_TRUE(r2.match("aa216b", &Matches));
29 EXPECT_EQ(1u, Matches.size());
30 EXPECT_EQ("216", Matches[0].str());
31
32 Regex r3("[0-9]+([a-f])?:([0-9]+)");
33 EXPECT_TRUE(r3.match("9a:513b", &Matches));
34 EXPECT_EQ(3u, Matches.size());
35 EXPECT_EQ("9a:513", Matches[0].str());
36 EXPECT_EQ("a", Matches[1].str());
37 EXPECT_EQ("513", Matches[2].str());
38
39 EXPECT_TRUE(r3.match("9:513b", &Matches));
40 EXPECT_EQ(3u, Matches.size());
41 EXPECT_EQ("9:513", Matches[0].str());
42 EXPECT_EQ("", Matches[1].str());
43 EXPECT_EQ("513", Matches[2].str());
44
45 Regex r4("a[^b]+b");
46 std::string String="axxb";
47 String[2] = '\0';
48 EXPECT_FALSE(r4.match("abb"));
49 EXPECT_TRUE(r4.match(String, &Matches));
50 EXPECT_EQ(1u, Matches.size());
51 EXPECT_EQ(String, Matches[0].str());
52
53 std::string NulPattern="X[0-9]+X([a-f])?:([0-9]+)";
54 String="YX99a:513b";
55 NulPattern[7] = '\0';
56 Regex r5(NulPattern);
57 EXPECT_FALSE(r5.match(String));
58 EXPECT_FALSE(r5.match("X9"));
59 String[3]='\0';
60 EXPECT_TRUE(r5.match(String));
61 }
62
TEST_F(RegexTest,Backreferences)63 TEST_F(RegexTest, Backreferences) {
64 Regex r1("([a-z]+)_\\1");
65 SmallVector<StringRef, 4> Matches;
66 EXPECT_TRUE(r1.match("abc_abc", &Matches));
67 EXPECT_EQ(2u, Matches.size());
68 EXPECT_FALSE(r1.match("abc_ab", &Matches));
69
70 Regex r2("a([0-9])b\\1c\\1");
71 EXPECT_TRUE(r2.match("a4b4c4", &Matches));
72 EXPECT_EQ(2u, Matches.size());
73 EXPECT_EQ("4", Matches[1].str());
74 EXPECT_FALSE(r2.match("a2b2c3"));
75
76 Regex r3("a([0-9])([a-z])b\\1\\2");
77 EXPECT_TRUE(r3.match("a6zb6z", &Matches));
78 EXPECT_EQ(3u, Matches.size());
79 EXPECT_EQ("6", Matches[1].str());
80 EXPECT_EQ("z", Matches[2].str());
81 EXPECT_FALSE(r3.match("a6zb6y"));
82 EXPECT_FALSE(r3.match("a6zb7z"));
83 }
84
TEST_F(RegexTest,Substitution)85 TEST_F(RegexTest, Substitution) {
86 std::string Error;
87
88 EXPECT_EQ("aNUMber", Regex("[0-9]+").sub("NUM", "a1234ber"));
89
90 // Standard Escapes
91 EXPECT_EQ("a\\ber", Regex("[0-9]+").sub("\\\\", "a1234ber", &Error));
92 EXPECT_EQ("", Error);
93 EXPECT_EQ("a\nber", Regex("[0-9]+").sub("\\n", "a1234ber", &Error));
94 EXPECT_EQ("", Error);
95 EXPECT_EQ("a\tber", Regex("[0-9]+").sub("\\t", "a1234ber", &Error));
96 EXPECT_EQ("", Error);
97 EXPECT_EQ("ajber", Regex("[0-9]+").sub("\\j", "a1234ber", &Error));
98 EXPECT_EQ("", Error);
99
100 EXPECT_EQ("aber", Regex("[0-9]+").sub("\\", "a1234ber", &Error));
101 EXPECT_EQ(Error, "replacement string contained trailing backslash");
102
103 // Backreferences
104 EXPECT_EQ("aa1234bber", Regex("a[0-9]+b").sub("a\\0b", "a1234ber", &Error));
105 EXPECT_EQ("", Error);
106
107 EXPECT_EQ("a1234ber", Regex("a([0-9]+)b").sub("a\\1b", "a1234ber", &Error));
108 EXPECT_EQ("", Error);
109
110 EXPECT_EQ("aber", Regex("a[0-9]+b").sub("a\\100b", "a1234ber", &Error));
111 EXPECT_EQ(Error, "invalid backreference string '100'");
112 }
113
TEST_F(RegexTest,IsLiteralERE)114 TEST_F(RegexTest, IsLiteralERE) {
115 EXPECT_TRUE(Regex::isLiteralERE("abc"));
116 EXPECT_FALSE(Regex::isLiteralERE("a(bc)"));
117 EXPECT_FALSE(Regex::isLiteralERE("^abc"));
118 EXPECT_FALSE(Regex::isLiteralERE("abc$"));
119 EXPECT_FALSE(Regex::isLiteralERE("a|bc"));
120 EXPECT_FALSE(Regex::isLiteralERE("abc*"));
121 EXPECT_FALSE(Regex::isLiteralERE("abc+"));
122 EXPECT_FALSE(Regex::isLiteralERE("abc?"));
123 EXPECT_FALSE(Regex::isLiteralERE("abc."));
124 EXPECT_FALSE(Regex::isLiteralERE("a[bc]"));
125 EXPECT_FALSE(Regex::isLiteralERE("abc\\1"));
126 EXPECT_FALSE(Regex::isLiteralERE("abc{1,2}"));
127 }
128
TEST_F(RegexTest,Escape)129 TEST_F(RegexTest, Escape) {
130 EXPECT_EQ("a\\[bc\\]", Regex::escape("a[bc]"));
131 EXPECT_EQ("abc\\{1\\\\,2\\}", Regex::escape("abc{1\\,2}"));
132 }
133
TEST_F(RegexTest,IsValid)134 TEST_F(RegexTest, IsValid) {
135 std::string Error;
136 EXPECT_FALSE(Regex("(foo").isValid(Error));
137 EXPECT_EQ("parentheses not balanced", Error);
138 EXPECT_FALSE(Regex("a[b-").isValid(Error));
139 EXPECT_EQ("invalid character range", Error);
140 }
141
TEST_F(RegexTest,MoveConstruct)142 TEST_F(RegexTest, MoveConstruct) {
143 Regex r1("^[0-9]+$");
144 Regex r2(std::move(r1));
145 EXPECT_TRUE(r2.match("916"));
146 }
147
TEST_F(RegexTest,MoveAssign)148 TEST_F(RegexTest, MoveAssign) {
149 Regex r1("^[0-9]+$");
150 Regex r2("abc");
151 r2 = std::move(r1);
152 EXPECT_TRUE(r2.match("916"));
153 std::string Error;
154 EXPECT_FALSE(r1.isValid(Error));
155 }
156
TEST_F(RegexTest,NoArgConstructor)157 TEST_F(RegexTest, NoArgConstructor) {
158 std::string Error;
159 Regex r1;
160 EXPECT_FALSE(r1.isValid(Error));
161 EXPECT_EQ("invalid regular expression", Error);
162 r1 = Regex("abc");
163 EXPECT_TRUE(r1.isValid(Error));
164 }
165
TEST_F(RegexTest,MatchInvalid)166 TEST_F(RegexTest, MatchInvalid) {
167 Regex r1;
168 std::string Error;
169 EXPECT_FALSE(r1.isValid(Error));
170 EXPECT_FALSE(r1.match("X"));
171 }
172
173 // https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=3727
TEST_F(RegexTest,OssFuzz3727Regression)174 TEST_F(RegexTest, OssFuzz3727Regression) {
175 // Wrap in a StringRef so the NUL byte doesn't terminate the string
176 Regex r(StringRef("[[[=GS\x00[=][", 10));
177 std::string Error;
178 EXPECT_FALSE(r.isValid(Error));
179 }
180
181 }
182