1 // Copyright (c) 2005, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 //     * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 //     * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 //     * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // Author: Sanjay Ghemawat
31 
32 #include <vector>
33 #include <assert.h>
34 
35 #include "pcrecpp_internal.h"
36 #include "pcre_scanner.h"
37 
38 using std::vector;
39 
40 namespace pcrecpp {
41 
Scanner()42 Scanner::Scanner()
43   : data_(),
44     input_(data_),
45     skip_(NULL),
46     should_skip_(false),
47     skip_repeat_(false),
48     save_comments_(false),
49     comments_(NULL),
50     comments_offset_(0) {
51 }
52 
Scanner(const string & in)53 Scanner::Scanner(const string& in)
54   : data_(in),
55     input_(data_),
56     skip_(NULL),
57     should_skip_(false),
58     skip_repeat_(false),
59     save_comments_(false),
60     comments_(NULL),
61     comments_offset_(0) {
62 }
63 
~Scanner()64 Scanner::~Scanner() {
65   delete skip_;
66   delete comments_;
67 }
68 
SetSkipExpression(const char * re)69 void Scanner::SetSkipExpression(const char* re) {
70   delete skip_;
71   if (re != NULL) {
72     skip_ = new RE(re);
73     should_skip_ = true;
74     skip_repeat_ = true;
75     ConsumeSkip();
76   } else {
77     skip_ = NULL;
78     should_skip_ = false;
79     skip_repeat_ = false;
80   }
81 }
82 
Skip(const char * re)83 void Scanner::Skip(const char* re) {
84   delete skip_;
85   if (re != NULL) {
86     skip_ = new RE(re);
87     should_skip_ = true;
88     skip_repeat_ = false;
89     ConsumeSkip();
90   } else {
91     skip_ = NULL;
92     should_skip_ = false;
93     skip_repeat_ = false;
94   }
95 }
96 
DisableSkip()97 void Scanner::DisableSkip() {
98   assert(skip_ != NULL);
99   should_skip_ = false;
100 }
101 
EnableSkip()102 void Scanner::EnableSkip() {
103   assert(skip_ != NULL);
104   should_skip_ = true;
105   ConsumeSkip();
106 }
107 
LineNumber() const108 int Scanner::LineNumber() const {
109   // TODO: Make it more efficient by keeping track of the last point
110   // where we computed line numbers and counting newlines since then.
111   // We could use std:count, but not all systems have it. :-(
112   int count = 1;
113   for (const char* p = data_.data(); p < input_.data(); ++p)
114     if (*p == '\n')
115       ++count;
116   return count;
117 }
118 
Offset() const119 int Scanner::Offset() const {
120   return (int)(input_.data() - data_.c_str());
121 }
122 
LookingAt(const RE & re) const123 bool Scanner::LookingAt(const RE& re) const {
124   int consumed;
125   return re.DoMatch(input_, RE::ANCHOR_START, &consumed, 0, 0);
126 }
127 
128 
129 // helper function to consume *skip_ and honour save_comments_
ConsumeSkip()130 void Scanner::ConsumeSkip() {
131   const char* start_data = input_.data();
132   while (skip_->Consume(&input_)) {
133     if (!skip_repeat_) {
134       // Only one skip allowed.
135       break;
136     }
137   }
138   if (save_comments_) {
139     if (comments_ == NULL) {
140       comments_ = new vector<StringPiece>;
141     }
142     // already pointing one past end, so no need to +1
143     int length = (int)(input_.data() - start_data);
144     if (length > 0) {
145       comments_->push_back(StringPiece(start_data, length));
146     }
147   }
148 }
149 
150 
GetComments(int start,int end,vector<StringPiece> * ranges)151 void Scanner::GetComments(int start, int end, vector<StringPiece> *ranges) {
152   // short circuit out if we've not yet initialized comments_
153   // (e.g., when save_comments is false)
154   if (!comments_) {
155     return;
156   }
157   // TODO: if we guarantee that comments_ will contain StringPieces
158   // that are ordered by their start, then we can do a binary search
159   // for the first StringPiece at or past start and then scan for the
160   // ones contained in the range, quit early (use equal_range or
161   // lower_bound)
162   for (vector<StringPiece>::const_iterator it = comments_->begin();
163        it != comments_->end(); ++it) {
164     if ((it->data() >= data_.c_str() + start &&
165          it->data() + it->size() <= data_.c_str() + end)) {
166       ranges->push_back(*it);
167     }
168   }
169 }
170 
171 
GetNextComments(vector<StringPiece> * ranges)172 void Scanner::GetNextComments(vector<StringPiece> *ranges) {
173   // short circuit out if we've not yet initialized comments_
174   // (e.g., when save_comments is false)
175   if (!comments_) {
176     return;
177   }
178   for (vector<StringPiece>::const_iterator it =
179          comments_->begin() + comments_offset_;
180        it != comments_->end(); ++it) {
181     ranges->push_back(*it);
182     ++comments_offset_;
183   }
184 }
185 
186 }   // namespace pcrecpp
187