1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- Mode: C++ -*-
3 //
4 // Copyright (C) 2013-2020 Red Hat, Inc.
5 
6 /// @file
7 ///
8 /// This file declares types and operations implementing the "O(ND)
9 /// Difference Algorithm" (aka diff2) from Eugene W. Myers, to compute
10 /// the difference between two sequences.
11 ///
12 /// To understand what is going on here, one must read the paper at
13 /// http://www.xmailserver.org/diff2.pdf.  Throughout this file, that
14 /// paper is referred to as "the paper".
15 ///
16 /// The implementations goes as far as calculating the shortest edit
17 /// script (the set of insertions and deletions) for transforming a
18 /// sequence into another.  The main entry point for that is the
19 /// compute_diff() function.
20 
21 #ifndef __ABG_DIFF_UTILS_H__
22 #define __ABG_DIFF_UTILS_H__
23 
24 #include <cassert>
25 #include <cstdlib>
26 #include <memory>
27 #include <ostream>
28 #include <sstream>
29 #include <stdexcept>
30 #include <string>
31 #include <vector>
32 #include "abg-fwd.h"
33 
34 namespace abigail
35 {
36 
37 /// @brief Libabigail's core diffing algorithms
38 ///
39 /// This is the namespace defining the core diffing algorithm used by
40 /// libabigail @ref comparison tools.  This based on the diff
41 /// algorithm from Eugene Myers.
42 namespace diff_utils
43 {
44 
45 using std::shared_ptr;
46 
47 // Inject the names from std:: below into this namespace
48 using std::string;
49 using std::ostream;
50 using std::vector;
51 using std::abs;
52 using std::ostringstream;
53 
54 /// A class representing a vertex in an edit graph, as explained in
55 /// the paper.  A vertex is a basically a pair of coordinates
56 /// (abscissa and ordinate).
57 class point
58 {
59   int x_;
60   int y_;
61   bool empty_;
62 
63 public:
64 
point()65   point()
66     : x_(-1), y_(-1),empty_(true)
67   {}
68 
point(int x,int y)69   point(int x, int y)
70     : x_(x), y_(y), empty_(false)
71   {}
72 
point(const point & p)73   point(const point& p)
74     : x_(p.x()), y_(p.y()), empty_(p.is_empty())
75   {}
76 
77   int
x()78   x() const
79   {return x_;}
80 
81   void
x(int x)82   x(int x)
83   {
84     x_ = x;
85     empty_ = false;
86   }
87 
88   int
y()89   y() const
90   {return y_;}
91 
92   void
y(int y)93   y(int y)
94   {
95     y_ = y;
96     empty_ = false;
97   }
98 
99   void
set(int x,int y)100   set(int x, int y)
101   {
102     x_ = x;
103     y_ = y;
104     empty_ = false;
105   }
106 
107   void
set(int x,int y,bool empty)108   set(int x, int y, bool empty)
109   {
110     x_ = x;
111     y_ = y;
112     empty_ = empty;
113   }
114 
115   void
add(int ax,int ay)116   add(int ax, int ay)
117   {set (x() + ax, y() + ay);}
118 
119   bool
120   operator!=(const point& o) const
121   {return (x() != o.x() || y() != o.y() || is_empty() != o.is_empty());}
122 
123   bool
124   operator==(const point& o) const
125   {return !(operator!=(o));}
126 
127   bool
128   operator<(const point& o) const
129   {return (x() < o.x() && y() < o.y());}
130 
131   bool
132   operator>(const point& o) const
133   {return (x() > o.x() && y() > o.y());}
134 
135   bool
136   operator<=(const point& o) const
137   {return (x() <= o.x() && y() <= o.y());}
138 
139   bool
140   operator>=(const point& o) const
141   {return (x() >= o.x() && y() >= o.y());}
142 
143   point
144   operator+(int val) const
145   {return point(x() + val, y() + val);}
146 
147   point
148   operator-(int val) const
149   {return point(x() - val, y() - val);}
150 
151   point&
152   operator+= (int val)
153   {
154     set(x_ + val, y_ + val);
155     return *this;
156   }
157 
158   point&
159   operator-= (int val)
160   {return (*this) += (-val);}
161 
162   point&
163   operator--()
164   {return (*this) -= 1;}
165 
166   point&
167   operator++()
168   {return (*this) += 1;}
169 
170   point
171   operator--(int)
172   {
173     point tmp(*this);
174     --(*this);
175     return tmp;
176   }
177 
178   point
179   operator++(int)
180   {
181     point tmp(*this);
182     ++(*this);
183     return tmp;
184   }
185 
186   point&
187   operator=(int val)
188   {
189     set(val, val);
190     return *this;
191   }
192 
193   point&
194   operator=(const point& p)
195   {
196     set(p.x(), p.y(), p.is_empty());
197     return *this;
198   }
199 
200   bool
is_empty()201   is_empty() const
202   {return empty_;}
203 
204   operator bool () const
205   {return !is_empty();}
206 
207   bool
208   operator!() const
209   {return is_empty();}
210 
211   void
clear()212   clear()
213   {
214     x_ = -1;
215     y_ = -1;
216     empty_ = true;
217   }
218 };// end point
219 
220 /// The abstraction of the Snake concept, from the paper.
221 ///
222 /// In a given path from the edit graph, a snake is a non-diagonal
223 /// edge followed by zero or more diagonal edges.
224 ///
225 /// The starting poing of the non-diagonal edge is the beginning of
226 /// the snake.  This is given by the snake::begin() method.  This point
227 /// is not explicitely referenced in the paper, but we need it for
228 /// some grunt implementation details of the algorithm.
229 ///
230 /// The end point of the non-diagonal edge is the intermediate point
231 /// of the snake; it's given by the snake::intermediate() method.
232 /// This point is what is referred to as "the begining of the snake"
233 /// in the paper.
234 ///
235 /// The end point of the first diagonal edge is given by the
236 /// snake::diagonal_start() method.
237 ///
238 /// The end point of the last diagonal edge is given by the
239 /// snake::end() method.  Note that when the snake contains no
240 /// diagonal edge, snake::intermediate(), and snake::end() return the
241 /// same point; snake::diagonal_start() contains an empty point (i.e,
242 /// a point for which point::is_empty() returns true).
243 class snake
244 {
245   point begin_, intermediate_, diagonal_start_, end_;
246   bool forward_;
247 
248 public:
249 
250   /// Default constructor for snake.
snake()251   snake()
252     : forward_(false)
253   {}
254 
255   /// Constructor from the beginning, intermediate and end points.
256   ///
257   /// @param b the beginning point of the snake.  That is, the
258   /// starting point of the non-diagonal edge.
259   ///
260   /// @param i the intermediate point of the snake.  That is, the end
261   /// point of the non-diagonal edge.
262   ///
263   /// @param e the end point of the snake.  That is the end point of
264   /// the last diagonal edge.
snake(const point & b,const point & i,const point & e)265   snake(const point& b,
266 	const point& i,
267 	const point& e)
268     : begin_(b), intermediate_(i),
269       end_(e), forward_(false)
270   {}
271 
272   /// Constructor from the beginning, intermediate and end points.
273   ///
274   /// @param b the beginning point of the snake.  That is, the
275   /// starting point of the non-diagonal edge.
276   ///
277   /// @param i the intermediate point of the snake.  That is, the end
278   /// point of the non-diagonal edge.
279   ///
280   /// @param d the beginning of the diagonal edge.  That is the end of
281   /// the first diagonal edge of the snake.
282   ///
283   /// @param e the end point of the snake.  That is the end point of
284   /// the last diagonal edge.
snake(const point & b,const point & i,const point & d,const point & e)285   snake(const point& b,
286 	const point& i,
287 	const point& d,
288 	const point& e)
289     : begin_(b), intermediate_(i),
290       diagonal_start_(d), end_(e),
291       forward_(false)
292   {}
293 
294   /// Getter for the starting point of the non-diagonal edge of the
295   /// snake.
296   ///
297   /// @return the starting point of the non-diagonal edge of the snake
298   const point&
begin()299   begin() const
300   {return begin_;}
301 
302   /// Getter for the starting point of the non-diagonal edge of the
303   /// snake, aka begin point.
304   ///
305   ///@param p the new begin point.
306   void
begin(const point & p)307   begin(const point& p)
308   {begin_ = p;}
309 
310   /// Getter for the end point of the non-diagonal edge of the snake.
311   ///
312   /// @return the end point of the non-diagonal edge of the snake
313   const point&
intermediate()314   intermediate() const
315   {return intermediate_;}
316 
317   /// Setter for the end point of the non-diagonal edge of the snake,
318   /// aka intermediate point.
319   ///
320   /// @param p the new intermediate point.
321   void
intermediate(const point & p)322   intermediate(const point& p)
323   {intermediate_ = p;}
324 
325   /// Getter for the end point of the first diagonal edge, aka
326   /// diagonal start point.  Note that if the snake has no diagonal
327   /// edge, this point is empty.
328   ///
329   /// @return the end point of the first diagonal edge.
330   const point&
diagonal_start()331   diagonal_start() const
332   {return diagonal_start_;}
333 
334   /// Setter for the end point of the first diagonal edge, aka
335   /// diagonal start point.
336   ///
337   /// @param p the new diagonal start.d
338   void
diagonal_start(const point & p)339   diagonal_start(const point& p)
340   {diagonal_start_ = p;}
341 
342   /// Getter for the end point of the last diagonal edge, aka snake
343   /// end point.  Note that if the snake has no diagonal edge, this
344   /// point is equal to the intermediate point.
345   ///
346   /// @return the end point of the last diagonal edge
347   const point&
end()348   end() const
349   {return end_;}
350 
351   /// Setter for the end point of the last diagonal edge, aka snake
352   /// end point.  Note that if the snake has no diagonal edge, this
353   /// point is equal to the intermediate point.
354   void
end(const point & p)355   end(const point& p)
356   {end_ = p;}
357 
358   /// Setter for the begin, intermediate and end points of the snake.
359   ///
360   /// @param b the new snake begin point
361   ///
362   /// @param i the new snake intermediate point
363   ///
364   /// @param e the new snake end point
365   void
set(const point & b,const point & i,const point & e)366   set(const point& b, const point&i, const point&e)
367   {
368     begin(b);
369     intermediate(i);
370     end(e);
371   }
372 
373   /// Setter for the begin, intermediate, diagonal start and end points
374   /// of the snake.
375   ///
376   /// @param b the new snake begin point
377   ///
378   /// @param i the new snake intermediate point
379   ///
380   /// @param d the new diagonal start point
381   ///
382   /// @param e the new snake end point
383   void
set(const point & b,const point & i,const point & d,const point & e)384   set(const point& b, const point&i, const point& d, const point&e)
385   {
386     begin(b);
387     intermediate(i);
388     diagonal_start(d);
389     end(e);
390   }
391 
392   /// @return true iff the snake is a forward snake.  That is, if it
393   /// was built while walking the edit graph going forward (from the
394   /// top left corner to the right bottom corner.
395   bool
is_forward()396   is_forward() const
397   {return forward_;}
398 
399   /// Set to true if the snake is a forward snake; that is, if it was
400   /// built while walking the edit graph going forward (from the top
401   /// left corner to the right bottom corner.  Set to false otherwise.
402   ///
403   /// @param f whether the snake is a forward snake or not.
404   void
set_forward(bool f)405   set_forward(bool f)
406   {forward_ = f;}
407 
408   /// Add an offset to the abscissas of the points of the snake, and
409   /// add another offset to the ordinates of these same points.
410   ///
411   /// @param x_offset the offset to add to the abscissas of all the
412   /// points of the snake.
413   ///
414   /// @param y_offset the offset to add to the ordinates of all the
415   /// points of the snake.
416   void
add(int x_offset,int y_offset)417   add(int x_offset, int y_offset)
418   {
419     if (is_empty())
420       return;
421 
422     begin_.add(x_offset, y_offset);
423     intermediate_.add(x_offset, y_offset);
424     if (diagonal_start_)
425       diagonal_start_.add(x_offset, y_offset);
426     end_.add(x_offset, y_offset);
427   }
428 
429   /// @return true iff the snake has at least one diagonal edge.
430   bool
has_diagonal_edge()431   has_diagonal_edge() const
432   {return !diagonal_start().is_empty();}
433 
434   /// @return true iff the non-diagonal edge is horizontal.
435   bool
has_horizontal_edge()436   has_horizontal_edge() const
437   {return (begin().y() == intermediate().y());}
438 
439   /// @return true iff the non-diagonal edge is vertical.
440   bool
has_vertical_edge()441   has_vertical_edge() const
442   {return (begin().x() == intermediate().x());}
443 
444   /// @return true iff the snake is empty, that is, if all the points
445   /// it contains are empty.
is_empty()446   bool is_empty() const
447   {return begin().is_empty() && intermediate().is_empty() && end().is_empty();}
448 };// end class snake
449 
450 /// The array containing the furthest D-path end-points, for each value
451 /// of K.  MAX_D is the maximum value of the D-Path.  That is, M+N if
452 /// M is the size of the first input string, and N is the size of the
453 /// second.
454 class d_path_vec : public std::vector<int>
455 {
456 private:
457 
458   unsigned a_size_;
459   unsigned b_size_;
460 
461   /// Forbid vector size modifications
462   void
463   push_back(const vector<int>::value_type&);
464 
465   /// Forbid default constructor.
466   d_path_vec();
467 
468   bool
over_bounds(long long index)469   over_bounds(long long index) const
470   {return  (index + offset()) >= (long long) size();}
471 
472   void
check_index_against_bound(int index)473   check_index_against_bound(int index) const
474   {
475     if (over_bounds(index))
476       {
477 	ostringstream o;
478 	o << "index '" << index
479 	  << "' out of range [-" << max_d() << ", " << max_d() << "]";
480 	throw std::out_of_range(o.str());
481       }
482   }
483 
484 public:
485 
486   /// Constructor of the d_path_vec.
487   ///
488   /// For forward vectors, the underlying vector allocates 2 *
489   /// [MAX_D+1].
490   /// space, so that one can address elements in the index range
491   /// [-MAX_D, MAX_D].  And MAX_D is the sum of the two sequence
492   /// sizes. delta is the difference.
493   ///
494   /// For reverse vectors, note that we need to be able to address
495   /// [-MAX_D - delta, MAX_D + delta], with delta being the (signed)
496   /// difference between the size of the two sequences.  We consider
497   /// delta being bounded by MAX_D itself; so we say we need to be
498   /// able to address [-2MAX_D, 2MAX_D].
499   ///
500   /// @param size1 the size of the first sequence we are interested
501   /// in.
502   ///
503   /// @param size2 the size of the second sequence we are interested
504   /// in.
d_path_vec(unsigned size1,unsigned size2)505   d_path_vec(unsigned size1, unsigned size2)
506     : vector<int>(2 * (size1 + size2 + 1 + (size1 + size2)) + 1, 0),
507       a_size_(size1), b_size_(size2)
508   {
509   }
510 
511   std::vector<int>::const_reference
512   operator[](int index) const
513   {return at(index);}
514 
515   std::vector<int>::reference
516   operator[](int index)
517   {return at(index);}
518 
519   std::vector<int>::reference
at(long long index)520   at(long long index)
521   {
522     //check_index_against_bound(index);
523     long long i = index + offset();
524     return vector<int>::operator[](i);
525   }
526 
527   std::vector<int>::const_reference
at(long long index)528   at(long long index) const
529   {
530     check_index_against_bound(index);
531     long long i = offset() + index;
532     return static_cast<const vector<int>* >(this)->at(i);
533   }
534 
535   unsigned
a_size()536   a_size() const
537   {return a_size_;}
538 
539   unsigned
b_size()540   b_size() const
541   {return b_size_;}
542 
543   unsigned
max_d()544   max_d() const
545   {return a_size_ + b_size_;}
546 
547   unsigned
offset()548   offset() const
549   {return max_d() + abs((long long) a_size() - (long long) b_size());}
550 }; // end class d_path_vec
551 
552 /// The abstration of an insertion of elements of a sequence B into a
553 /// sequence A.  This is used to represent the edit script for
554 /// transforming a sequence A into a sequence B.
555 ///
556 /// And insertion mainly encapsulates two components:
557 ///
558 ///   - An insertion point: this is the index (starting at 0) of the
559 ///     element of the sequence A after which the insertion occurs.
560 ///
561 ///   - Inserted elements: this is a vector of indexes of elements of
562 ///     sequence B (starting at 0) that got inserted into sequence A,
563 ///     after the insertion point.
564 class insertion
565 {
566   int			insertion_point_;
567   vector<unsigned>	inserted_;
568 
569 public:
570 
insertion(int insertion_point,const vector<unsigned> & inserted_indexes)571   insertion(int insertion_point,
572 	    const vector<unsigned>& inserted_indexes)
573     : insertion_point_(insertion_point),
574       inserted_(inserted_indexes)
575   {}
576 
577     insertion(int insertion_point = 0)
insertion_point_(insertion_point)578       : insertion_point_(insertion_point)
579   {}
580 
581   int
insertion_point_index()582   insertion_point_index() const
583   {return insertion_point_;}
584 
585   void
insertion_point_index(int i)586   insertion_point_index(int i)
587   {insertion_point_ = i;}
588 
589   const vector<unsigned>&
inserted_indexes()590   inserted_indexes() const
591   {return inserted_;}
592 
593   vector<unsigned>&
inserted_indexes()594   inserted_indexes()
595   {return inserted_;}
596 };// end class insertion
597 
598 /// The abstraction of the deletion of one element of a sequence A.
599 ///
600 /// This encapsulates the index of the element A that got deleted.
601 class deletion
602 {
603   int index_;
604 
605 public:
606 
deletion(int i)607   deletion(int i)
608     : index_(i)
609   {}
610 
611   int
index()612   index() const
613   {return index_;}
614 
615   void
index(int i)616   index(int i)
617   {index_ = i;}
618 };// end class deletion
619 
620 /// The abstraction of an edit script for transforming a sequence A
621 /// into a sequence B.
622 ///
623 /// It encapsulates the insertions and deletions for transforming A
624 /// into B.
625 class edit_script
626 {
627   vector<insertion>	insertions_;
628   vector<deletion>	deletions_;
629 
630 public:
631 
edit_script()632   edit_script()
633   {}
634 
635   const vector<insertion>&
insertions()636   insertions() const
637   {return insertions_;}
638 
639   vector<insertion>&
insertions()640   insertions()
641   {return insertions_;}
642 
643   const vector<deletion>&
deletions()644   deletions() const
645   {return deletions_;}
646 
647   vector<deletion>&
deletions()648   deletions()
649   {return deletions_;}
650 
651   void
append(const edit_script & es)652   append(const edit_script& es)
653   {
654     insertions().insert(insertions().end(),
655 			es.insertions().begin(),
656 			es.insertions().end());
657     deletions().insert(deletions().end(),
658 		       es.deletions().begin(),
659 		       es.deletions().end());
660   }
661 
662   void
prepend(const edit_script & es)663   prepend(const edit_script& es)
664   {
665     insertions().insert(insertions().begin(),
666 			es.insertions().begin(),
667 			es.insertions().end());
668     deletions().insert(deletions().begin(),
669 		       es.deletions().begin(),
670 		       es.deletions().end());
671   }
672 
673   void
clear()674   clear()
675   {
676     insertions().clear();
677     deletions().clear();
678   }
679 
680   bool
is_empty()681   is_empty() const
682   {return insertions().empty() && deletions().empty();}
683 
684   operator bool() const
685   {return !is_empty();}
686 
687   int
num_insertions()688   num_insertions() const
689   {
690     int l = 0;
691     for (vector<insertion>::const_iterator i = insertions().begin();
692 	 i != insertions().end();
693 	 ++i)
694       l += i->inserted_indexes().size();
695     return l;
696   }
697 
698   int
num_deletions()699   num_deletions() const
700   {return deletions().size();}
701 
702   int
length()703   length() const
704   {return num_insertions() + num_deletions();}
705 };//end class edit_script
706 
707 bool
708 point_is_valid_in_graph(point& p,
709 			unsigned a_size,
710 			unsigned b_size);
711 
712 bool
713 ends_of_furthest_d_paths_overlap(const point& forward_d_path_end,
714 				 const point& reverse_d_path_end);
715 
716 /// The default equality functor used by the core diffing algorithms.
717 struct default_eq_functor
718 {
719   /// This equality operator uses the default "==" to compare its
720   /// arguments.
721   ///
722   /// @param a the first comparison argument.
723   ///
724   /// @param b the second comparison argument.
725   ///
726   /// @return true if the two arguments are equal, false otherwise.
727   template<typename T>
728   bool
operatordefault_eq_functor729   operator()(const T a, const T b) const
730   {return a == b;}
731 };
732 
733 
734 /// An equality functor to deeply compare pointers.
735 struct deep_ptr_eq_functor
736 {
737   /// This equality operator compares pointers by comparing the
738   /// pointed-to objects.
739   ///
740   /// @param first the first comparison argument.
741   ///
742   /// @param second the second comparison argument.
743   ///
744   /// @return true if the objects pointed to by the pointers are
745   /// equal, false otherwise.
746   template<typename T>
747   bool
operatordeep_ptr_eq_functor748   operator()(const T* first,
749 	     const T* second) const
750   {
751     if (!!first != !!second)
752       return false;
753 
754     if (!first)
755       return true;
756 
757     return *first == *second;
758   }
759 
760   /// This equality operator compares pointers by comparing the
761   /// pointed-to objects.
762   ///
763   /// @param first the first comparison argument.
764   ///
765   /// @param second the second comparison argument.
766   ///
767   /// @return true if the objects pointed to by the pointers are
768   /// equal, false otherwise.
769   template<typename T>
770   bool
operatordeep_ptr_eq_functor771   operator()(const shared_ptr<T> first,
772 	     const shared_ptr<T> second) const
773   {return operator()(first.get(), second.get());}
774 
775   /// This equality operator compares pointers by comparing the
776   /// pointed-to objects.
777   ///
778   /// @param first the first comparison argument.
779   ///
780   /// @param second the second comparison argument.
781   ///
782   /// @return true if the objects pointed to by the pointers are
783   /// equal, false otherwise.
784   template<typename T>
785   bool
operatordeep_ptr_eq_functor786   operator()(const weak_ptr<T> first,
787 	     const weak_ptr<T> second) const
788   {return operator()(shared_ptr<T>(first), shared_ptr<T>(second));}
789 }; // end struct deep_ptr_eq_functor
790 
791 /// Find the end of the furthest reaching d-path on diagonal k, for
792 /// two sequences.  In the paper This is referred to as "the basic
793 /// algorithm".
794 ///
795 /// Unlike in the paper, the coordinates of the edit graph start at
796 /// (-1,-1), rather than (0,0), and they end at (M-1, N-1), rather
797 /// than (M,N).
798 ///
799 /// @tparm RandomAccessOutputIterator the type of iterators passed to
800 /// this function.  It must be a random access output iterator kind.
801 ///
802 /// @tparm EqualityFunctor this must be a class that declares a public
803 /// call operator member returning a boolean and taking two arguments
804 /// that must be of the same type as the one pointed to by the @ref
805 /// RandomAccessOutputIterator template parameter. This functor is
806 /// used to compare the elements referred to by the iterators pased in
807 /// argument to this function.
808 ///
809 /// @param k the number of the diagonal on which we want to find the
810 /// end of the furthest reaching D-path.
811 ///
812 /// @param d the D in D-Path.  That's the number of insertions/deletions
813 /// (the number of changes, in other words) in the changeset.  That is
814 /// also the number of non-diagonals in the D-Path.
815 ///
816 /// @param a_begin an iterator to the beginning of the first sequence
817 ///
818 /// @param a_end an iterator that points right after the last element
819 /// of the second sequence to consider.
820 ///
821 /// @param b_begin an iterator to the beginning of the second sequence.
822 ///
823 /// @param b_end an iterator that points right after the last element
824 /// of the second sequence to consider.
825 ///
826 /// @param v the vector of furthest end points of d_paths, at (d-1).
827 /// It contains the abscissas of the furthest end points for different
828 /// values of k, at (d-1).  That is, for k in [-D + 1, -D + 3, -D + 5,
829 /// ..., D - 1], v[k] is the abscissa of the end of the furthest
830 /// reaching (D-1)-path on diagonal k.
831 ///
832 /// @param snak the last snake of the furthest path found.  The end
833 /// point of the snake is the end point of the furthest path.
834 ///
835 /// @return true if the end of the furthest reaching path that was
836 /// found was inside the boundaries of the edit graph, false
837 /// otherwise.
838 template<typename RandomAccessOutputIterator,
839 	 typename EqualityFunctor>
840 bool
end_of_fr_d_path_in_k(int k,int d,RandomAccessOutputIterator a_begin,RandomAccessOutputIterator a_end,RandomAccessOutputIterator b_start,RandomAccessOutputIterator b_end,d_path_vec & v,snake & snak)841 end_of_fr_d_path_in_k(int k, int d,
842 		      RandomAccessOutputIterator a_begin,
843 		      RandomAccessOutputIterator a_end,
844 		      RandomAccessOutputIterator b_start,
845 		      RandomAccessOutputIterator b_end,
846 		      d_path_vec& v, snake& snak)
847 {
848   int x = -1, y = -1;
849   point begin, intermediate, diag_start, end;
850   snake s;
851   EqualityFunctor eq;
852 
853   // Let's pick the end point of the furthest reaching
854   // (D-1)-path.  It's either v[k-1] or v[k+1]; the word
855   // "furthest" means we choose the one which abscissa is the
856   // greatest (that is, furthest from abscissa zero).
857   if (k == -d || ((k != d) && (v[k-1] < v[k + 1])))
858     // So, the abscissa of the end point of the furthest
859     // reaching (D-1)-path is v[k+1].  That is a diagonal that
860     // is above the current (k) diagonal, and on the right.
861     // To move to the current k diagonal, one has to move
862     // "down" from the diagonal k+1.  So the abscissa won't
863     // change.  Only the ordinate will.  It will be given by y
864     // = x - k (a bit below); as k has changed from k - 1 (it
865     // has increased), y is going to be the new y that is
866     // 'down' from the previous y in k - 1.
867     {
868       x = v[k+1];
869       begin.set(x, x - (k + 1));
870     }
871   else
872     {
873       // So the abscissa of the end point of the furthest
874       // (D-1)-path is v[k-1].  That is on the left of the
875       // current k diagonal.  To move to the current k diagonal,
876       // one has to move "right" from diagonal k - 1.  That is,
877       // the y stays constant and x is incremented.
878       x = v[k-1];
879       begin.set(x, x - (k - 1));
880       ++x;
881     }
882 
883   // Now get the value of y from the equation k = x -y.
884   // This is the point where we first touch K, when we move
885   // from the end of the furthest reaching (D-1)-path.
886   y = x - k;
887 
888   intermediate.x(x);
889   intermediate.y(y);
890 
891   int last_x_index = a_end - a_begin - 1;
892   int last_y_index = b_end - b_start - 1;
893   // Now, follow the snake (aka, zero or more consecutive
894   // diagonals).  Note that we stay on the k diagonal when we
895   // do this.
896   while ((x < last_x_index) && (y < last_y_index))
897     if (eq(a_begin[x + 1], b_start[y + 1]))
898       {
899 	x = x + 1;
900 	y = y + 1;
901 	if (!diag_start)
902 	  diag_start.set(x, y);
903       }
904     else
905       break;
906 
907   end.x(x);
908   end.y(y);
909 
910   // Note the point that we store in v here might be outside the
911   // bounds of the edit graph.  But we store it at this step (for a
912   // given D) anyway, because out of bound or not, we need this value
913   // at this step to be able to compute the value of the point on the
914   // "next" diagonal for the next D.
915   v[k] = x;
916 
917   if (x >= (int) v.a_size()
918       || y >= (int) v.b_size()
919       || x < -1 || y < -1)
920     return false;
921 
922   s.set(begin, intermediate, diag_start, end);
923   s.set_forward(true);
924   snak = s;
925 
926   return true;
927 }
928 
929 /// Find the end of the furthest reaching reverse d-path on diagonal k
930 /// + delta.  Delta is abs(M - N), with M being the size of a and N
931 /// being the size of b.  This is the "basic algorithm", run backward.
932 /// That is, starting from the point (M,N) of the edit graph.
933 ///
934 /// Unlike in the paper, the coordinates of the edit graph start at
935 /// (-1,-1), rather than (0,0), and they end at (M-1, N-1), rather
936 /// than (M,N).
937 ///
938 /// @tparm RandomAccessOutputIterator the type of iterators passed to
939 /// this function.  It must be a random access output iterator kind.
940 ///
941 /// @tparm EqualityFunctor this must be a class that declares a public
942 /// call operator member returning a boolean and taking two arguments
943 /// that must be of the same type as the one pointed to by the @ref
944 /// RandomAccessOutputIterator template parameter. This functor is
945 /// used to compare the elements referred to by the iterators pased in
946 /// argument to this function.
947 ///
948 /// @param k the number of the diagonal on which we want to find the
949 /// end of the furthest reaching reverse D-path.  Actually, we want to
950 /// find the end of the furthest reaching reverse D-path on diagonal (k
951 /// - delta).
952 ///
953 /// @param d the D in D-path.  That's the number of insertions/deletions
954 /// (the number of changes, in other words) in the changeset.  That is
955 /// also the number of non-diagonals in the D-Path.
956 ///
957 /// @param a_begin an iterator to the beginning of the first sequence
958 ///
959 /// @param a_end an iterator that points right after the last element
960 /// of the second sequence to consider.
961 ///
962 /// @param b_begin an iterator to the beginning of the second sequence.
963 ///
964 /// @param b_end an iterator that points right after the last element
965 /// of the second sequence to consider.
966 ///
967 /// @param v the vector of furthest end points of d_paths, at (d-1).
968 /// It contains the abscissae of the furthest end points for different
969 /// values of k - delta, at (d-1).  That is, for k in [-D + 1, -D + 3,
970 /// -D + 5, ..., D - 1], v[k - delta] is the abscissa of the end of the
971 /// furthest reaching (D-1)-path on diagonal k - delta.
972 ///
973 /// @param snak the last snake of the furthest path found.  The end
974 /// point of the snake is the end point of the furthest path.
975 ///
976 /// @return true iff the end of the furthest reaching path that was
977 /// found was inside the boundaries of the edit graph, false
978 /// otherwise.
979 template<typename RandomAccessOutputIterator,
980 	 typename EqualityFunctor>
981 bool
end_of_frr_d_path_in_k_plus_delta(int k,int d,RandomAccessOutputIterator a_begin,RandomAccessOutputIterator a_end,RandomAccessOutputIterator b_begin,RandomAccessOutputIterator b_end,d_path_vec & v,snake & snak)982 end_of_frr_d_path_in_k_plus_delta (int k, int d,
983 				   RandomAccessOutputIterator a_begin,
984 				   RandomAccessOutputIterator a_end,
985 				   RandomAccessOutputIterator b_begin,
986 				   RandomAccessOutputIterator b_end,
987 				   d_path_vec& v, snake& snak)
988 {
989   int a_size = a_end - a_begin;
990   int b_size = b_end - b_begin;
991   int delta = a_size - b_size;
992   int k_plus_delta = k + delta;
993   int x = -1, y = -1;
994   point begin, intermediate, diag_start, end;
995   snake s;
996   EqualityFunctor eq;
997 
998   // Let's pick the end point of the furthest reaching (D-1)-path and
999   // move from there to reach the current k_plus_delta-line.  That end
1000   // point of the furthest reaching (D-1)-path is either on
1001   // v[k_plus_delta-1] or on v[k_plus_delta+1]; the word "furthest"
1002   // means we choose the one which abscissa is the lowest (that is,
1003   // furthest from abscissa M).
1004   if (k_plus_delta == -d + delta
1005       || ((k_plus_delta != d + delta)
1006 	  && (v[k_plus_delta + 1] <= v[k_plus_delta - 1])))
1007     {
1008       // We move left, that means ordinate won't change ...
1009       x = v[k_plus_delta + 1];
1010       y = x - (k_plus_delta + 1);
1011       begin.set(x, y);
1012       // ... and abscissa decreases.
1013       x = x - 1;
1014     }
1015   else
1016     {
1017       // So the furthest end point is on the k_plus_delta - 1
1018       // diagonal.  That is a diagonal that is 'below' the
1019       // k_plus_delta current diagonal.  So to join the current
1020       // diagonal from the k_plus_delta - 1 one, we need to move up.
1021 
1022       // So moving up means abscissa won't change ...
1023       x = v[k_plus_delta - 1];
1024       begin.set(x, x - (k_plus_delta - 1));
1025       // ... and that ordinate decreases.
1026       y = x - (k_plus_delta - 1) - 1;
1027     }
1028 
1029   intermediate.set(x, y);
1030 
1031   // Now, follow the snake.  Note that we stay on the k_plus_delta
1032   // diagonal when we do this.
1033   while (x >= 0 && y >= 0)
1034     if (eq(a_begin[x], b_begin[y]))
1035       {
1036 	if (!diag_start)
1037 	  diag_start.set(x, y);
1038 	x = x - 1;
1039 	y = y - 1;
1040       }
1041     else
1042       break;
1043 
1044   end.set(x, y);
1045 
1046   // Note the point that we store in v here might be outside the
1047   // bounds of the edit graph.  But we store it at this step (for a
1048   // given D) anyway, because out of bound or not, we need this value
1049   // at this step to be able to compute the value of the point on the
1050   // "next" diagonal for the next D.
1051   v[k_plus_delta] = x;
1052 
1053   if (x == -1 && y == -1)
1054     ;
1055   else if (x < -1 || y < -1)
1056     return false;
1057 
1058   s.set(begin, intermediate, diag_start, end);
1059   s.set_forward(false);
1060   snak = s;
1061 
1062   return true;
1063 }
1064 
1065 /// Tests if a given point is a match point in an edit graph.
1066 ///
1067 /// @param a_begin the begin iterator of the first input sequence of
1068 /// the edit graph.
1069 ///
1070 /// @param a_end the end iterator of the first input sequence of the
1071 /// edit graph.  This points to one element passed the end of the
1072 /// sequence.
1073 ///
1074 /// @param b_begin the begin iterator of the second input sequence of
1075 /// the edit graph.
1076 ///
1077 /// @param b_end the end iterator of the second input sequence of the
1078 /// edit graph.  This points the one element passed the end of the
1079 /// sequence.
1080 ///
1081 /// @param point the point to test for being a match point.
1082 ///
1083 /// @return true iff \a point is a match point.
1084 template<typename RandomAccessOutputIterator>
1085 bool
is_match_point(RandomAccessOutputIterator a_begin,RandomAccessOutputIterator a_end,RandomAccessOutputIterator b_begin,RandomAccessOutputIterator b_end,const point & point)1086 is_match_point(RandomAccessOutputIterator a_begin,
1087 	       RandomAccessOutputIterator a_end,
1088 	       RandomAccessOutputIterator b_begin,
1089 	       RandomAccessOutputIterator b_end,
1090 	       const point& point)
1091 {
1092   int a_size = a_end - a_begin, b_size = b_end - b_begin;
1093 
1094   if (point.x() < 0
1095       || point.x () >= a_size
1096       || point.y() < 0
1097       || point.y() >= b_size)
1098     return false;
1099 
1100   return (a_begin[point.x()] == b_begin[point.y()]);
1101 }
1102 
1103 /// Returns the middle snake of two sequences A and B, as well as the
1104 /// length of their shortest editing script.
1105 ///
1106 ///  This uses the "linear space refinement" algorithm presented in
1107 /// section 4b in the paper.  As the paper says, "The idea for doing
1108 /// so is to simultaneously run the basic algorithm in both the
1109 /// forward and reverse directions until furthest reaching forward and
1110 /// reverse paths starting at opposing corners 'overlap'."
1111 ///
1112 /// @tparm RandomAccessOutputIterator the type of iterators passed to
1113 /// this function.  It must be a random access output iterator kind.
1114 ///
1115 /// @tparm EqualityFunctor this must be a class that declares a public
1116 /// call operator member returning a boolean and taking two arguments
1117 /// that must be of the same type as the one pointed to by the @ref
1118 /// RandomAccessOutputIterator template parameter. This functor is
1119 /// used to compare the elements referred to by the iterators pased in
1120 /// argument to this function.
1121 ///
1122 /// @param a_begin an iterator pointing to the begining of sequence A.
1123 ///
1124 /// @param a_end an iterator pointing to the end of sequence A.  Note
1125 /// that this points right /after/ the end of vector A.
1126 ///
1127 /// @param b_begin an iterator pointing to the begining of sequence B.
1128 ///
1129 /// @param b_end an iterator pointing to the end of sequence B.  Note
1130 /// that this points right /after/ the end of vector B
1131 ///
1132 /// @param snak out parameter.  This is the snake current when the two
1133 /// paths overlapped.  This is set iff the function returns true;
1134 /// otherwise, this is not touched.
1135 ///
1136 /// @return true is the snake was found, false otherwise.
1137 template<typename RandomAccessOutputIterator,
1138 	 typename EqualityFunctor>
1139 bool
compute_middle_snake(RandomAccessOutputIterator a_begin,RandomAccessOutputIterator a_end,RandomAccessOutputIterator b_begin,RandomAccessOutputIterator b_end,snake & snak,int & ses_len)1140 compute_middle_snake(RandomAccessOutputIterator a_begin,
1141 		     RandomAccessOutputIterator a_end,
1142 		     RandomAccessOutputIterator b_begin,
1143 		     RandomAccessOutputIterator b_end,
1144 		     snake& snak, int& ses_len)
1145 {
1146   int a_size = a_end - a_begin;
1147   int N = a_size;
1148   int b_size = b_end - b_begin;
1149   int M = b_size;
1150   int delta = N - M;
1151   d_path_vec forward_d_paths(a_size, b_size);
1152   d_path_vec reverse_d_paths(a_size, b_size);
1153   // These points below are the top leftmost point and bottom
1154   // right-most points of the edit graph.
1155   point first_point(-1, -1), last_point(a_size -1, b_size -1), point_zero(0, 0);
1156 
1157   // We want the initial step (D = 0, k = 0 in the paper) to find a
1158   // furthest reaching point on diagonal k == 0; For that, we need the
1159   // value of x for k == 1; So let's set that value to -1; that is for
1160   // k == 1 (diagonal 1), the point in the edit graph is (-1,-2).
1161   // That way, to get the furthest reaching point on diagonal 0 (k ==
1162   // 0), we go down from (-1,-2) on diagonal 1 and we hit diagonal 0
1163   // on (-1,-1); that is the starting value that the algorithm expects
1164   // for k == 0.
1165   forward_d_paths[1] = -1;
1166 
1167   // Similarly for the reverse paths, for diagonal delta + 1 (note
1168   // that diagonals are centered on delta, unlike for forward paths
1169   // where they are centered on zero), we set the initial point to
1170   // (a_size, b_size - 1).  That way, at step D == 0 and k == delta,
1171   // to reach diagonal delta from the point (a_size, b_size - 1) on
1172   // diagonal delta + 1, we just have to move left, and we hit
1173   // diagonal delta on (a_size - 1, b_size -1); that is the starting
1174   // point value the algorithm expects for k == 0 in the reverse case.
1175   reverse_d_paths[delta + 1] = a_size;
1176 
1177   int d_max = (M + N) / 2 + 1;
1178   for (int d = 0; d <= d_max; ++d)
1179     {
1180       // First build forward paths.
1181       for (int k = -d; k <= d; k += 2)
1182 	{
1183 	  snake s;
1184 	  bool found =
1185 	    end_of_fr_d_path_in_k<RandomAccessOutputIterator,
1186 				  EqualityFunctor>(k, d,
1187 						   a_begin, a_end,
1188 						   b_begin, b_end,
1189 						   forward_d_paths, s);
1190 	  if (!found)
1191 	    continue;
1192 
1193 	  // As the paper says in 4b while explaining the middle snake
1194 	  // algorithm:
1195 	  //
1196 	  // "Thus when delta is odd, check for overlap only while
1197 	  //  extending forward paths ..."
1198 	  if ((delta % 2)
1199 	      && (k >= (delta - (d - 1))) && (k <= (delta + (d - 1))))
1200 	    {
1201 	      point reverse_end;
1202 	      reverse_end.x(reverse_d_paths[k]);
1203 	      reverse_end.y(reverse_end.x() - k);
1204 	      if (ends_of_furthest_d_paths_overlap(s.end(), reverse_end))
1205 		{
1206 		  ses_len = 2 * d - 1;
1207 		  snak = s;
1208 		  return true;
1209 		}
1210 	    }
1211 	}
1212 
1213       // Now build reverse paths.
1214       for (int k = -d; k <= d; k += 2)
1215 	{
1216 	  snake s;
1217 	  bool found =
1218 	    end_of_frr_d_path_in_k_plus_delta<RandomAccessOutputIterator,
1219 					      EqualityFunctor>(k, d,
1220 							       a_begin, a_end,
1221 							       b_begin, b_end,
1222 							       reverse_d_paths,
1223 							       s);
1224 
1225 	  if (!found)
1226 	    continue;
1227 
1228 	  // And the paper continues by saying:
1229 	  //
1230 	  // "... and when delta is even, check for overlap only while
1231 	  // extending reverse paths."
1232 	  int k_plus_delta = k + delta;
1233 	  if (!(delta % 2)
1234 	      && (k_plus_delta >= -d) && (k_plus_delta <= d))
1235 	    {
1236 	      point forward_end;
1237 	      forward_end.x(forward_d_paths[k_plus_delta]);
1238 	      forward_end.y(forward_end.x() - k_plus_delta);
1239 	      if (ends_of_furthest_d_paths_overlap(forward_end, s.end()))
1240 		{
1241 		    ses_len = 2 * d;
1242 		    snak = s;
1243 		    return true;
1244 		}
1245 	    }
1246 	}
1247     }
1248   return false;
1249 }
1250 
1251 bool
1252 compute_middle_snake(const char* str1, const char* str2,
1253 		     snake& s, int& ses_len);
1254 
1255 /// This prints the middle snake of two strings.
1256 ///
1257 /// @param a_begin the beginning of the first string.
1258 ///
1259 /// @param b_begin the beginning of the second string.
1260 ///
1261 /// @param s the snake to print.
1262 ///
1263 /// @param out the output stream to print the snake to.
1264 template<typename RandomAccessOutputIterator>
1265 void
print_snake(RandomAccessOutputIterator a_begin,RandomAccessOutputIterator b_begin,const snake & s,ostream & out)1266 print_snake(RandomAccessOutputIterator a_begin,
1267 	    RandomAccessOutputIterator b_begin,
1268 	    const snake &s, ostream& out)
1269 {
1270   if (s.is_empty())
1271     return;
1272 
1273    out << "snake start: ";
1274    out << "(" << s.begin().x() << ", " << s.end().y() << ")\n";
1275 
1276    out << "snake intermediate: ";
1277    out << "(" << s.intermediate().x() << ", " << s.intermediate().y() << ")\n";
1278 
1279    out << "diagonal point(s): ";
1280    if (s.has_diagonal_edge())
1281      for (int x = s.intermediate().x(), y = s.intermediate().y();
1282 	  x <= s.end().x() && y <= s.end().y();
1283 	  ++x, ++y)
1284        {
1285 	 ABG_ASSERT(a_begin[x] == b_begin[y]);
1286 	 out << "(" << x << "," << y << ") ";
1287        }
1288    out << "\n";
1289 
1290    out << "snake end: ";
1291    out << "(" << s.end().x() << ", " << s.end().y() << ")\n";
1292 }
1293 
1294 /// Compute the length of the shortest edit script for two sequences a
1295 /// and b.  This is done using the "Greedy LCS/SES" of figure 2 in the
1296 /// paper.  It can walk the edit graph either foward (when reverse is
1297 /// false) or backward starting from the end (when reverse is true).
1298 ///
1299 /// Here, note that the real content of a and b should start at index
1300 /// 1, for this implementatikon algorithm to match the paper's
1301 /// algorithm in a straightforward manner.  So pleast make sure that
1302 /// at index 0, we just get some non-used value.
1303 ///
1304 /// @tparm RandomAccessOutputIterator the type of iterators passed to
1305 /// this function.  It must be a random access output iterator kind.
1306 ///
1307 /// @tparm EqualityFunctor this must be a class that declares a public
1308 /// call operator member returning a boolean and taking two arguments
1309 /// that must be of the same type as the one pointed to by the @ref
1310 /// RandomAccessOutputIterator template parameter. This functor is
1311 /// used to compare the elements referred to by the iterators pased in
1312 /// argument to this function.
1313 ///
1314 /// @param a the first sequence we care about.
1315 ///
1316 /// @param b the second sequence we care about.
1317 ///
1318 /// @param v the vector that contains the end points of the furthest
1319 /// reaching d-path and (d-1)-path.
1320 template<typename RandomAccessOutputIterator,
1321 	 typename EqualityFunctor>
1322 int
ses_len(RandomAccessOutputIterator a_begin,RandomAccessOutputIterator a_end,RandomAccessOutputIterator b_begin,RandomAccessOutputIterator b_end,d_path_vec & v,bool reverse)1323 ses_len(RandomAccessOutputIterator a_begin,
1324 	RandomAccessOutputIterator a_end,
1325 	RandomAccessOutputIterator b_begin,
1326 	RandomAccessOutputIterator b_end,
1327 	d_path_vec& v, bool reverse)
1328 {
1329   unsigned a_size = a_end - a_begin;
1330   unsigned b_size = b_end - b_begin;
1331   snake snak;
1332 
1333   ABG_ASSERT(v.max_d() == a_size + b_size);
1334 
1335   int delta = a_size - b_size;
1336 
1337   if (reverse)
1338     // Set a fictitious (M, N-1) into v[1], to find the furthest
1339     // reaching reverse 0-path (i.e, when we are at d == 0 and k == 0).
1340     v[delta + 1] = a_size - 1;
1341   else
1342     // Set a fictitious (-1,-2) point into v[1], to find the furthest
1343     // reaching forward 0-path (i.e, when we are at d == 0 and k == 0).
1344     v[1] = -1;
1345 
1346   for (unsigned d = 0; d <= v.max_d(); ++d)
1347     {
1348       for (int k = -d; k <= (int) d; k += 2)
1349 	{
1350 	  point end;
1351 	  if (reverse)
1352 	    {
1353 	      bool found =
1354 		end_of_frr_d_path_in_k_plus_delta<RandomAccessOutputIterator,
1355 						  EqualityFunctor>(k, d,
1356 								   a_begin, a_end,
1357 								   b_begin, b_end,
1358 								   v, snak);
1359 	      // If we reached the upper left corner of the edit graph then
1360 	      // we are done.
1361 	      if (found && snak.end().x() == -1 && snak.end().y() == -1)
1362 		return d;
1363 	    }
1364 	  else
1365 	    {
1366 	      end_of_fr_d_path_in_k<RandomAccessOutputIterator,
1367 				    EqualityFunctor>(k, d,
1368 						     a_begin, a_end,
1369 						     b_begin, b_end,
1370 						     v, snak);
1371 	      // If we reached the lower right corner of the edit
1372 	      // graph then we are done.
1373 	      if ((snak.end().x() == (int) a_size - 1)
1374 		  && (snak.end().y() == (int) b_size - 1))
1375 		return d;
1376 	    }
1377 	}
1378     }
1379   return 0;
1380 }
1381 
1382 /// Compute the length of the shortest edit script for two sequences a
1383 /// and b.  This is done using the "Greedy LCS/SES" of figure 2 in the
1384 /// paper.  It can walk the edit graph either foward (when reverse is
1385 /// false) or backward starting from the end (when reverse is true).
1386 ///
1387 /// Here, note that the real content of a and b should start at index
1388 /// 1, for this implementatikon algorithm to match the paper's
1389 /// algorithm in a straightforward manner.  So pleast make sure that
1390 /// at index 0, we just get some non-used value.
1391 ///
1392 /// Note that the equality operator used to compare the elements
1393 /// passed in argument to this function is the default "==" operator.
1394 ///
1395 /// @tparm RandomAccessOutputIterator the type of iterators passed to
1396 /// this function.  It must be a random access output iterator kind.
1397 ///
1398 /// @param a the first sequence we care about.
1399 ///
1400 /// @param b the second sequence we care about.
1401 ///
1402 /// @param v the vector that contains the end points of the furthest
1403 /// reaching d-path and (d-1)-path.
1404 template<typename RandomAccessOutputIterator>
1405 int
ses_len(RandomAccessOutputIterator a_begin,RandomAccessOutputIterator a_end,RandomAccessOutputIterator b_begin,RandomAccessOutputIterator b_end,d_path_vec & v,bool reverse)1406 ses_len(RandomAccessOutputIterator a_begin,
1407 	RandomAccessOutputIterator a_end,
1408 	RandomAccessOutputIterator b_begin,
1409 	RandomAccessOutputIterator b_end,
1410 	d_path_vec& v, bool reverse)
1411 {
1412   return ses_len<RandomAccessOutputIterator, default_eq_functor>(a_begin, a_end,
1413 								 b_begin, b_end,
1414 								 v, reverse);
1415 }
1416 
1417 int
1418 ses_len(const char* str1,
1419 	const char* str2,
1420 	bool reverse = false);
1421 
1422 bool
1423 snake_end_points(const snake& s, point&, point&);
1424 
1425 /// Compute the longest common subsequence of two (sub-regions of)
1426 /// sequences as well as the shortest edit script from transforming
1427 /// the first (sub-region of) sequence into the second (sub-region of)
1428 /// sequence.
1429 ///
1430 /// A sequence is determined by a base, a beginning offset and an end
1431 /// offset.  The base always points to the container that contains the
1432 /// sequence to consider.  The beginning offset is an iterator that
1433 /// points the beginning of the sub-region of the sequence that we
1434 /// actually want to consider.  The end offset is an iterator that
1435 /// points to the end of the sub-region of the sequence that we
1436 /// actually want to consider.
1437 ///
1438 /// This uses the LCS algorithm of the paper at section 4b.
1439 ///
1440 /// @tparm RandomAccessOutputIterator the type of iterators passed to
1441 /// this function.  It must be a random access output iterator kind.
1442 ///
1443 /// @tparm EqualityFunctor this must be a class that declares a public
1444 /// call operator member returning a boolean and taking two arguments
1445 /// that must be of the same type as the one pointed to by the @ref
1446 /// RandomAccessOutputIterator template parameter. This functor is
1447 /// used to compare the elements referred to by the iterators pased in
1448 /// argument to this function.
1449 ///
1450 /// @param a_base the iterator to the base of the first sequence.
1451 ///
1452 /// @param a_start an iterator to the beginning of the sub-region
1453 /// of the first sequence to actually consider.
1454 ///
1455 /// @param a_end an iterator to the end of the sub-region of the first
1456 /// sequence to consider.
1457 ///
1458 ///@param b_base an iterator to the base of the second sequence to
1459 ///consider.
1460 ///
1461 /// @param b_start an iterator to the beginning of the sub-region
1462 /// of the second sequence to actually consider.
1463 ///
1464 /// @param b_end an iterator to the end of the sub-region of the
1465 /// second sequence to actually consider.
1466 ///
1467 /// @param lcs the resulting lcs.  This is set iff the function
1468 /// returns true.
1469 ///
1470 /// @param ses the resulting shortest editing script.
1471 ///
1472 /// @param ses_len the length of the ses above.  Normally this can be
1473 /// retrieved from ses.length(), but this parameter is here for sanity
1474 /// check purposes.  The function computes the length of the ses in
1475 /// two redundant ways and ensures that both methods lead to the same
1476 /// result.
1477 ///
1478 /// @return true upon successful completion, false otherwise.
1479 template<typename RandomAccessOutputIterator,
1480 	 typename EqualityFunctor>
1481 void
compute_diff(RandomAccessOutputIterator a_base,RandomAccessOutputIterator a_begin,RandomAccessOutputIterator a_end,RandomAccessOutputIterator b_base,RandomAccessOutputIterator b_begin,RandomAccessOutputIterator b_end,vector<point> & lcs,edit_script & ses,int & ses_len)1482 compute_diff(RandomAccessOutputIterator a_base,
1483 	     RandomAccessOutputIterator a_begin,
1484 	     RandomAccessOutputIterator a_end,
1485 	     RandomAccessOutputIterator b_base,
1486 	     RandomAccessOutputIterator b_begin,
1487 	     RandomAccessOutputIterator b_end,
1488 	     vector<point>& lcs,
1489 	     edit_script& ses,
1490 	     int& ses_len)
1491 {
1492   int a_size = a_end - a_begin;
1493   int b_size = b_end - b_begin;
1494   unsigned a_offset = a_begin - a_base, b_offset = b_begin - b_base;
1495 
1496   if (a_size == 0 || b_size == 0)
1497     {
1498       if (a_size > 0 && b_size == 0)
1499 	// All elements of the first sequences have been deleted.  So add
1500 	// the relevant deletions to the edit script.
1501 	for (RandomAccessOutputIterator i = a_begin; i < a_end; ++i)
1502 	  ses.deletions().push_back(deletion(i - a_base));
1503 
1504       if (b_size > 0 && a_size == 0)
1505 	{
1506 	  // All elements present in the second sequence are part of
1507 	  // an insertion into the first sequence at a_end.  So add
1508 	  // that insertion to the edit script.
1509 	  int a_full_size = a_end - a_base;
1510 	  int insertion_index = a_full_size ? a_full_size - 1 : -1;
1511 	  insertion ins(insertion_index);
1512 	  for (RandomAccessOutputIterator i = b_begin; i < b_end; ++i)
1513 	    ins.inserted_indexes().push_back(i - b_base);
1514 
1515 	  ses.insertions().push_back(ins);
1516 	}
1517 
1518       ses_len =  a_size + b_size;
1519       return;
1520     }
1521 
1522   int d = 0;
1523   snake snak;
1524   vector<point> trace; // the trace of the edit graph.  Read the paper
1525 		       // to understand what a trace is.
1526   bool has_snake =
1527     compute_middle_snake<RandomAccessOutputIterator,
1528 			 EqualityFunctor>(a_begin, a_end,
1529 					  b_begin, b_end,
1530 					  snak, d);
1531   if (has_snake)
1532     {
1533       // So middle_{begin,end} are expressed wrt a_begin and b_begin.
1534       // Let's express them wrt a_base and b_base.
1535       snak.add(a_offset, b_offset);
1536       ses_len = d;
1537     }
1538 
1539   if (has_snake)
1540     {
1541       if ( snak.has_diagonal_edge())
1542 	for (int x = snak.diagonal_start().x(), y = snak.diagonal_start().y();
1543 	     x <= snak.end().x() && y <= snak.end().y();
1544 	     ++x, ++y)
1545 	  {
1546 	    point p(x, y);
1547 	    trace.push_back(p);
1548 	  }
1549     }
1550   else
1551     {
1552       // So there is no middle snake.  That means there is no lcs, so
1553       // the two sequences are different.
1554 
1555       // In other words, all the elements of the first sequence have
1556       // been deleted ...
1557       for (RandomAccessOutputIterator i = a_begin; i < a_end; ++i)
1558 	ses.deletions().push_back(deletion(i - a_base));
1559 
1560       // ... and all the elements of the second sequence are insertions
1561       // that happen at the beginning of the first sequence.
1562       insertion ins(a_begin - a_base);
1563       for (RandomAccessOutputIterator i = b_begin; i < b_end; ++i)
1564 	ins.inserted_indexes().push_back(i - b_base);
1565       ses.insertions().push_back(ins);
1566 
1567       ses_len = a_size + b_size;
1568       ABG_ASSERT(ses_len == ses.length());
1569       return;
1570     }
1571 
1572   if (d > 1)
1573     {
1574       int tmp_ses_len0 = 0;
1575       edit_script tmp_ses0;
1576       point px, pu;
1577       snake_end_points(snak, px, pu);
1578       compute_diff<RandomAccessOutputIterator,
1579 		   EqualityFunctor>(a_base, a_begin, a_base + (px.x() + 1),
1580 				    b_base, b_begin, b_base + (px.y() + 1),
1581 				    lcs, tmp_ses0, tmp_ses_len0);
1582 
1583       lcs.insert(lcs.end(), trace.begin(), trace.end());
1584 
1585       int tmp_ses_len1 = 0;
1586       edit_script tmp_ses1;
1587       compute_diff<RandomAccessOutputIterator,
1588 		   EqualityFunctor>(a_base, a_base + (pu.x() + 1), a_end,
1589 				    b_base, b_base + (pu.y() + 1), b_end,
1590 				    lcs, tmp_ses1, tmp_ses_len1);
1591       ABG_ASSERT(tmp_ses0.length() + tmp_ses1.length() == d);
1592       ABG_ASSERT(tmp_ses_len0 + tmp_ses_len1 == d);
1593       ses.append(tmp_ses0);
1594       ses.append(tmp_ses1);
1595     }
1596   else if (d == 1)
1597     {
1598       if (snak.has_diagonal_edge())
1599 	{
1600 	  for (int x = snak.diagonal_start().x(), y = snak.diagonal_start().y();
1601 	       x <= snak.end().x() && y <= snak.end().y();
1602 	       ++x, ++y)
1603 	    {
1604 	      point p(x, y);
1605 	      trace.push_back(p);
1606 	    }
1607 	}
1608 
1609       if (snak.has_vertical_edge())
1610 	{
1611 	  point p = snak.intermediate();
1612 	  insertion ins(p.x());
1613 	  ins.inserted_indexes().push_back(p.y());
1614 	  ses.insertions().push_back(ins);
1615 	}
1616       else if (snak.has_horizontal_edge())
1617 	{
1618 	  if (snak.is_forward())
1619 	    {
1620 	      deletion del(snak.intermediate().x());
1621 	      ses.deletions().push_back(del);
1622 	    }
1623 	  else
1624 	    {
1625 	      deletion del(snak.begin().x());
1626 	      ses.deletions().push_back(del);
1627 	    }
1628 	}
1629     }
1630   else if (d == 0)
1631     {
1632       // Obviously on the middle snake is part of the solution, as
1633       // there is no edit script; iow, the two sequences are
1634       // identical.
1635       lcs.insert(lcs.end(), trace.begin(), trace.end());
1636       ses_len = 0;
1637     }
1638 
1639   ABG_ASSERT(ses_len == ses.length());
1640 }
1641 
1642 /// Compute the longest common subsequence of two (sub-regions of)
1643 /// sequences as well as the shortest edit script from transforming
1644 /// the first (sub-region of) sequence into the second (sub-region of)
1645 /// sequence.
1646 ///
1647 /// This uses the LCS algorithm of the paper at section 4b.
1648 ///
1649 /// @tparm RandomAccessOutputIterator the type of iterators passed to
1650 /// this function.  It must be a random access output iterator kind.
1651 ///
1652 /// @tparm EqualityFunctor this must be a class that declares a public
1653 /// call operator member returning a boolean and taking two arguments
1654 /// that must be of the same type as the one pointed to by the @ref
1655 /// RandomAccessOutputIterator template parameter. This functor is
1656 /// used to compare the elements referred to by the iterators pased in
1657 /// argument to this function.
1658 ///
1659 /// @param a_start an iterator to the beginning of the first sequence
1660 /// to consider.
1661 ///
1662 /// @param a_end an iterator to the end of the first sequence to
1663 /// consider.
1664 ///
1665 /// @param b_start an iterator to the beginning of the second sequence
1666 /// to consider.
1667 ///
1668 /// @param b_end an iterator to the end of the second sequence to
1669 /// consider.
1670 ///
1671 /// @param lcs the resulting lcs.  This is set iff the function
1672 /// returns true.
1673 ///
1674 /// @param ses the resulting shortest editing script.
1675 ///
1676 /// @param ses_len the length of the ses above.  Normally this can be
1677 /// retrieved from ses.length(), but this parameter is here for sanity
1678 /// check purposes.  The function computes the length of the ses in
1679 /// two redundant ways and ensures that both methods lead to the same
1680 /// result.
1681 ///
1682 /// @return true upon successful completion, false otherwise.
1683 template<typename RandomAccessOutputIterator,
1684 	 typename EqualityFunctor>
1685 void
compute_diff(RandomAccessOutputIterator a_begin,RandomAccessOutputIterator a_end,RandomAccessOutputIterator b_begin,RandomAccessOutputIterator b_end,vector<point> & lcs,edit_script & ses,int & ses_len)1686 compute_diff(RandomAccessOutputIterator a_begin,
1687 	     RandomAccessOutputIterator a_end,
1688 	     RandomAccessOutputIterator b_begin,
1689 	     RandomAccessOutputIterator b_end,
1690 	     vector<point>& lcs,
1691 	     edit_script& ses,
1692 	     int& ses_len)
1693 {
1694   compute_diff<RandomAccessOutputIterator,
1695 	       EqualityFunctor>(a_begin, a_begin, a_end,
1696 				b_begin, b_begin, b_end,
1697 				lcs, ses, ses_len);
1698 }
1699 
1700 /// Compute the longest common subsequence of two (sub-regions of)
1701 /// sequences as well as the shortest edit script from transforming
1702 /// the first (sub-region of) sequence into the second (sub-region of)
1703 /// sequence.
1704 ///
1705 /// A sequence is determined by a base, a beginning offset and an end
1706 /// offset.  The base always points to the container that contains the
1707 /// sequence to consider.  The beginning offset is an iterator that
1708 /// points the beginning of the sub-region of the sequence that we
1709 /// actually want to consider.  The end offset is an iterator that
1710 /// points to the end of the sub-region of the sequence that we
1711 /// actually want to consider.
1712 ///
1713 /// This uses the LCS algorithm of the paper at section 4b.
1714 ///
1715 /// @tparm RandomAccessOutputIterator the type of iterators passed to
1716 /// this function.  It must be a random access output iterator kind.
1717 ///
1718 /// @tparm EqualityFunctor this must be a class that declares a public
1719 /// call operator member returning a boolean and taking two arguments
1720 /// that must be of the same type as the one pointed to by the @ref
1721 /// RandomAccessOutputIterator template parameter. This functor is
1722 /// used to compare the elements referred to by the iterators pased in
1723 /// argument to this function.
1724 ///
1725 /// @param a_base the iterator to the base of the first sequence.
1726 ///
1727 /// @param a_start an iterator to the beginning of the sub-region
1728 /// of the first sequence to actually consider.
1729 ///
1730 /// @param a_end an iterator to the end of the sub-region of the first
1731 /// sequence to consider.
1732 ///
1733 ///@param b_base an iterator to the base of the second sequence to
1734 ///consider.
1735 ///
1736 /// @param b_start an iterator to the beginning of the sub-region
1737 /// of the second sequence to actually consider.
1738 ///
1739 /// @param b_end an iterator to the end of the sub-region of the
1740 /// second sequence to actually consider.
1741 ///
1742 /// @param lcs the resulting lcs.  This is set iff the function
1743 /// returns true.
1744 ///
1745 /// @param ses the resulting shortest editing script.
1746 ///
1747 /// @return true upon successful completion, false otherwise.
1748 template<typename RandomAccessOutputIterator,
1749 	 typename EqualityFunctor>
1750 void
compute_diff(RandomAccessOutputIterator a_base,RandomAccessOutputIterator a_begin,RandomAccessOutputIterator a_end,RandomAccessOutputIterator b_base,RandomAccessOutputIterator b_begin,RandomAccessOutputIterator b_end,vector<point> & lcs,edit_script & ses)1751 compute_diff(RandomAccessOutputIterator a_base,
1752 	     RandomAccessOutputIterator a_begin,
1753 	     RandomAccessOutputIterator a_end,
1754 	     RandomAccessOutputIterator b_base,
1755 	     RandomAccessOutputIterator b_begin,
1756 	     RandomAccessOutputIterator b_end,
1757 	     vector<point>& lcs,
1758 	     edit_script& ses)
1759 {
1760   int ses_len = 0;
1761 
1762   compute_diff<RandomAccessOutputIterator,
1763 	       EqualityFunctor>(a_base, a_begin, a_end,
1764 				b_base, b_begin, b_end,
1765 				lcs, ses, ses_len);
1766 }
1767 
1768 /// Compute the longest common subsequence of two (sub-regions of)
1769 /// sequences as well as the shortest edit script from transforming
1770 /// the first (sub-region of) sequence into the second (sub-region of)
1771 /// sequence.
1772 ///
1773 /// This uses the LCS algorithm of the paper at section 4b.
1774 ///
1775 /// @tparm RandomAccessOutputIterator the type of iterators passed to
1776 /// this function.  It must be a random access output iterator kind.
1777 ///
1778 /// @tparm EqualityFunctor this must be a class that declares a public
1779 /// call operator member returning a boolean and taking two arguments
1780 /// that must be of the same type as the one pointed to by the @ref
1781 /// RandomAccessOutputIterator template parameter. This functor is
1782 /// used to compare the elements referred to by the iterators pased in
1783 /// argument to this function.
1784 ///
1785 /// @param a_start an iterator to the beginning of the first sequence
1786 /// to consider.
1787 ///
1788 /// @param a_end an iterator to the end of the first sequence to
1789 /// consider.
1790 ///
1791 /// @param b_start an iterator to the beginning of the sequence to
1792 /// actually consider.
1793 ///
1794 /// @param b_end an iterator to the end of second sequence to
1795 /// consider.
1796 ///
1797 /// @param lcs the resulting lcs.  This is set iff the function
1798 /// returns true.
1799 ///
1800 /// @param ses the resulting shortest editing script.
1801 ///
1802 /// @return true upon successful completion, false otherwise.
1803 template<typename RandomAccessOutputIterator,
1804 	 typename EqualityFunctor>
1805 void
compute_diff(RandomAccessOutputIterator a_begin,RandomAccessOutputIterator a_end,RandomAccessOutputIterator b_begin,RandomAccessOutputIterator b_end,vector<point> & lcs,edit_script & ses)1806 compute_diff(RandomAccessOutputIterator a_begin,
1807 	     RandomAccessOutputIterator a_end,
1808 	     RandomAccessOutputIterator b_begin,
1809 	     RandomAccessOutputIterator b_end,
1810 	     vector<point>& lcs,
1811 	     edit_script& ses)
1812 {
1813   compute_diff<RandomAccessOutputIterator,
1814 	       EqualityFunctor>(a_begin, a_begin, a_end,
1815 				b_begin, b_begin, b_end,
1816 				lcs, ses);
1817 }
1818 
1819 /// Compute the longest common subsequence of two (sub-regions of)
1820 /// sequences as well as the shortest edit script from transforming
1821 /// the first (sub-region of) sequence into the second (sub-region of)
1822 /// sequence.
1823 ///
1824 /// This uses the LCS algorithm of the paper at section 4b.
1825 ///
1826 /// @tparm RandomAccessOutputIterator the type of iterators passed to
1827 /// this function.  It must be a random access output iterator kind.
1828 ///
1829 /// @param a_start an iterator to the beginning of the first sequence
1830 /// to consider.
1831 ///
1832 /// @param a_end an iterator to the end of the first sequence to
1833 /// consider.
1834 ///
1835 /// @param b_start an iterator to the beginning of the sequence to
1836 /// actually consider.
1837 ///
1838 /// @param b_end an iterator to the end of second sequence to
1839 /// consider.
1840 ///
1841 /// @param lcs the resulting lcs.  This is set iff the function
1842 /// returns true.
1843 ///
1844 /// @param ses the resulting shortest editing script.
1845 ///
1846 /// @return true upon successful completion, false otherwise.
1847 template<typename RandomAccessOutputIterator>
1848 void
compute_diff(RandomAccessOutputIterator a_begin,RandomAccessOutputIterator a_end,RandomAccessOutputIterator b_begin,RandomAccessOutputIterator b_end,vector<point> & lcs,edit_script & ses)1849 compute_diff(RandomAccessOutputIterator a_begin,
1850 	     RandomAccessOutputIterator a_end,
1851 	     RandomAccessOutputIterator b_begin,
1852 	     RandomAccessOutputIterator b_end,
1853 	     vector<point>& lcs,
1854 	     edit_script& ses)
1855 {
1856   compute_diff<RandomAccessOutputIterator,
1857 	       default_eq_functor>(a_begin, a_end, b_begin, b_end, lcs, ses);
1858 }
1859 
1860 /// Compute the longest common subsequence of two (sub-regions of)
1861 /// sequences as well as the shortest edit script from transforming
1862 /// the first (sub-region of) sequence into the second (sub-region of)
1863 /// sequence.
1864 ///
1865 /// A sequence is determined by a base, a beginning offset and an end
1866 /// offset.  The base always points to the container that contains the
1867 /// sequence to consider.  The beginning offset is an iterator that
1868 /// points the beginning of the sub-region of the sequence that we
1869 /// actually want to consider.  The end offset is an iterator that
1870 /// points to the end of the sub-region of the sequence that we
1871 /// actually want to consider.
1872 ///
1873 /// This uses the LCS algorithm of the paper at section 4b.
1874 ///
1875 /// @tparm RandomAccessOutputIterator the type of iterators passed to
1876 /// this function.  It must be a random access output iterator kind.
1877 ///
1878 /// @tparm EqualityFunctor this must be a class that declares a public
1879 /// call operator member returning a boolean and taking two arguments
1880 /// that must be of the same type as the one pointed to by the @ref
1881 /// RandomAccessOutputIterator template parameter. This functor is
1882 /// used to compare the elements referred to by the iterators pased in
1883 /// argument to this function.
1884 ///
1885 /// @param a_base the iterator to the base of the first sequence.
1886 ///
1887 /// @param a_start an iterator to the beginning of the sub-region
1888 /// of the first sequence to actually consider.
1889 ///
1890 /// @param a_end an iterator to the end of the sub-region of the first
1891 /// sequence to consider.
1892 ///
1893 /// @param b_base an iterator to the base of the second sequence to
1894 /// consider.
1895 ///
1896 /// @param b_start an iterator to the beginning of the sub-region
1897 /// of the second sequence to actually consider.
1898 ///
1899 /// @param b_end an iterator to the end of the sub-region of the
1900 /// second sequence to actually consider.
1901 ///
1902 /// @param ses the resulting shortest editing script.
1903 ///
1904 /// @return true upon successful completion, false otherwise.
1905 template<typename RandomAccessOutputIterator,
1906 	 typename EqualityFunctor>
1907 void
compute_diff(RandomAccessOutputIterator a_base,RandomAccessOutputIterator a_begin,RandomAccessOutputIterator a_end,RandomAccessOutputIterator b_base,RandomAccessOutputIterator b_begin,RandomAccessOutputIterator b_end,edit_script & ses)1908 compute_diff(RandomAccessOutputIterator a_base,
1909 	     RandomAccessOutputIterator a_begin,
1910 	     RandomAccessOutputIterator a_end,
1911 	     RandomAccessOutputIterator b_base,
1912 	     RandomAccessOutputIterator b_begin,
1913 	     RandomAccessOutputIterator b_end,
1914 	     edit_script& ses)
1915 {
1916   vector<point> lcs;
1917 
1918   compute_diff<RandomAccessOutputIterator,
1919 	       EqualityFunctor>(a_base, a_begin, a_end,
1920 				b_base, b_begin, b_end,
1921 				lcs, ses);
1922 }
1923 
1924 /// Compute the longest common subsequence of two (sub-regions of)
1925 /// sequences as well as the shortest edit script from transforming
1926 /// the first (sub-region of) sequence into the second (sub-region of)
1927 /// sequence.
1928 ///
1929 /// This uses the LCS algorithm of the paper at section 4b.
1930 ///
1931 /// @tparm RandomAccessOutputIterator the type of iterators passed to
1932 /// this function.  It must be a random access output iterator kind.
1933 ///
1934 /// @tparm EqualityFunctor this must be a class that declares a public
1935 /// call operator member returning a boolean and taking two arguments
1936 /// that must be of the same type as the one pointed to by the @ref
1937 /// RandomAccessOutputIterator template parameter. This functor is
1938 /// used to compare the elements referred to by the iterators pased in
1939 /// argument to this function.
1940 ///
1941 /// @param a_start an iterator to the beginning of the first sequence
1942 /// to consider.
1943 ///
1944 /// @param a_end an iterator to the end of the first sequence to
1945 /// consider.
1946 ///
1947 /// @param b_start an iterator to the beginning of the second sequence
1948 /// to consider.
1949 ///
1950 /// @param b_end an iterator to the end of the second sequence to
1951 /// consider.
1952 ///
1953 /// @param ses the resulting shortest editing script.
1954 ///
1955 /// @return true upon successful completion, false otherwise.
1956 template<typename RandomAccessOutputIterator,
1957 	 typename EqualityFunctor>
1958 void
compute_diff(RandomAccessOutputIterator a_begin,RandomAccessOutputIterator a_end,RandomAccessOutputIterator b_begin,RandomAccessOutputIterator b_end,edit_script & ses)1959 compute_diff(RandomAccessOutputIterator a_begin,
1960 	     RandomAccessOutputIterator a_end,
1961 	     RandomAccessOutputIterator b_begin,
1962 	     RandomAccessOutputIterator b_end,
1963 	     edit_script& ses)
1964 {
1965   compute_diff<RandomAccessOutputIterator,
1966 	       EqualityFunctor>(a_begin, a_begin, a_end,
1967 				b_begin, b_begin, b_end,
1968 				ses);
1969 }
1970 
1971 /// Compute the longest common subsequence of two (sub-regions of)
1972 /// sequences as well as the shortest edit script from transforming
1973 /// the first (sub-region of) sequence into the second (sub-region of)
1974 /// sequence.
1975 ///
1976 /// This uses the LCS algorithm of the paper at section 4b.
1977 ///
1978 /// @tparm RandomAccessOutputIterator the type of iterators passed to
1979 /// this function.  It must be a random access output iterator kind.
1980 ///
1981 /// @param a_start an iterator to the beginning of the first sequence
1982 /// to consider.
1983 ///
1984 /// @param a_end an iterator to the end of the first sequence to
1985 /// consider.
1986 ///
1987 /// @param b_start an iterator to the beginning of the second sequence
1988 /// to consider.
1989 ///
1990 /// @param b_end an iterator to the end of the second sequence to
1991 /// consider.
1992 ///
1993 /// @param ses the resulting shortest editing script.
1994 ///
1995 /// @return true upon successful completion, false otherwise.
1996 template<typename RandomAccessOutputIterator>
1997 void
compute_diff(RandomAccessOutputIterator a_begin,RandomAccessOutputIterator a_end,RandomAccessOutputIterator b_begin,RandomAccessOutputIterator b_end,edit_script & ses)1998 compute_diff(RandomAccessOutputIterator a_begin,
1999 	     RandomAccessOutputIterator a_end,
2000 	     RandomAccessOutputIterator b_begin,
2001 	     RandomAccessOutputIterator b_end,
2002 	     edit_script& ses)
2003 {
2004   compute_diff<RandomAccessOutputIterator, default_eq_functor>(a_begin, a_end,
2005 							       b_begin, b_end,
2006 							       ses);
2007 }
2008 
2009 void
2010 compute_lcs(const char* str1, const char* str2, int &ses_len, string& lcs);
2011 
2012 void
2013 compute_ses(const char* str1, const char* str2, edit_script& ses);
2014 
2015 /// Display an edit script on standard output.
2016 ///
2017 /// @param es the edit script to display
2018 ///
2019 /// @param str1_base the first string the edit script is about.
2020 ///
2021 /// @pram str2_base the second string the edit script is about.
2022 template<typename RandomAccessOutputIterator>
2023 void
display_edit_script(const edit_script & es,const RandomAccessOutputIterator str1_base,const RandomAccessOutputIterator str2_base,ostream & out)2024 display_edit_script(const edit_script& es,
2025 		    const RandomAccessOutputIterator str1_base,
2026 		    const RandomAccessOutputIterator str2_base,
2027 		    ostream& out)
2028 {
2029   if (es.num_deletions() == 0)
2030     out << "no deletion:\n";
2031   else if (es.num_deletions() == 1)
2032     {
2033       out << "1 deletion:\n"
2034 	  << "\t happened at index: ";
2035     }
2036   else
2037     {
2038       out << es.num_deletions() << " deletions:\n"
2039 	   << "\t happened at indexes: ";
2040     }
2041 
2042   for (vector<deletion>::const_iterator i = es.deletions().begin();
2043        i != es.deletions().end();
2044        ++i)
2045     {
2046       if (i != es.deletions().begin())
2047 	out << ", ";
2048       out << i->index() << " (" << str1_base[i->index()] << ")";
2049     }
2050   out << "\n\n";
2051 
2052   if (es.num_insertions() == 0)
2053     out << "no insertion\n";
2054   else if (es.num_insertions() == 1)
2055     out << "1 insertion\n";
2056   else
2057       out << es.num_insertions() << " insertions:\n";
2058   for (vector<insertion>::const_iterator i = es.insertions().begin();
2059        i != es.insertions().end();
2060        ++i)
2061     {
2062       int idx = i->insertion_point_index();
2063       if (idx < 0)
2064 	out << "\t before index of first sequence: " << idx + 1
2065 	    << " (" << str1_base[idx + 1] << ")\n";
2066       else
2067 	out << "\t after index of first sequence: " << idx
2068 	    << " (" << str1_base[idx] << ")\n";
2069 
2070       if (!i->inserted_indexes().empty())
2071 	out << "\t\t inserted indexes from second sequence: ";
2072 
2073       for (vector<unsigned>::const_iterator j = i->inserted_indexes().begin();
2074 	   j != i->inserted_indexes().end();
2075 	   ++j)
2076 	{
2077 	  if (j != i->inserted_indexes().begin())
2078 	    out << ", ";
2079 	  out << *j << " (" << str2_base[*j] << ")";
2080 	}
2081       out << "\n";
2082     }
2083   out << "\n\n";
2084 }
2085 
2086 }//end namespace diff_utils
2087 
2088 }//end namespace abigail
2089 #endif // __ABG_DIFF_UTILS_H__
2090