1#!/usr/bin/perl
2# Copyright 2008 The RE2 Authors.  All Rights Reserved.
3# Use of this source code is governed by a BSD-style
4# license that can be found in the LICENSE file.
5
6# Generate table entries giving character ranges
7# for POSIX/Perl character classes.  Rather than
8# figure out what the definition is, it is easier to ask
9# Perl about each letter from 0-128 and write down
10# its answer.
11
12@posixclasses = (
13	"[:alnum:]",
14	"[:alpha:]",
15	"[:ascii:]",
16	"[:blank:]",
17	"[:cntrl:]",
18	"[:digit:]",
19	"[:graph:]",
20	"[:lower:]",
21	"[:print:]",
22	"[:punct:]",
23	"[:space:]",
24	"[:upper:]",
25	"[:word:]",
26	"[:xdigit:]",
27);
28
29@perlclasses = (
30	"\\d",
31	"\\s",
32	"\\w",
33);
34
35sub ComputeClass($) {
36  my @ranges;
37  my ($class) = @_;
38  my $regexp = "[$class]";
39  my $start = -1;
40  for (my $i=0; $i<=129; $i++) {
41    if ($i == 129) { $i = 256; }
42    if ($i <= 128 && chr($i) =~ $regexp) {
43      if ($start < 0) {
44        $start = $i;
45      }
46    } else {
47      if ($start >= 0) {
48        push @ranges, [$start, $i-1];
49      }
50      $start = -1;
51    }
52  }
53  return @ranges;
54}
55
56sub PrintClass($$@) {
57  my ($cname, $name, @ranges) = @_;
58  print "static URange16 code${cname}[] = {  /* $name */\n";
59  for (my $i=0; $i<@ranges; $i++) {
60    my @a = @{$ranges[$i]};
61    printf "\t{ 0x%x, 0x%x },\n", $a[0], $a[1];
62  }
63  print "};\n";
64  my $n = @ranges;
65  my $escname = $name;
66  $escname =~ s/\\/\\\\/g;
67  $negname = $escname;
68  if ($negname =~ /:/) {
69    $negname =~ s/:/:^/;
70  } else {
71    $negname =~ y/a-z/A-Z/;
72  }
73  return "{ \"$escname\", +1, code$cname, $n }", "{ \"$negname\", -1, code$cname, $n }";
74}
75
76my $gen = 0;
77
78sub PrintClasses($@) {
79  my ($cname, @classes) = @_;
80  my @entries;
81  foreach my $cl (@classes) {
82    my @ranges = ComputeClass($cl);
83    push @entries, PrintClass(++$gen, $cl, @ranges);
84  }
85  print "UGroup ${cname}_groups[] = {\n";
86  foreach my $e (@entries) {
87    print "\t$e,\n";
88  }
89  print "};\n";
90  my $count = @entries;
91  print "int num_${cname}_groups = $count;\n";
92}
93
94print <<EOF;
95// GENERATED BY make_perl_groups.pl; DO NOT EDIT.
96// make_perl_groups.pl >perl_groups.cc
97
98#include "re2/unicode_groups.h"
99
100namespace re2 {
101
102EOF
103
104PrintClasses("perl", @perlclasses);
105PrintClasses("posix", @posixclasses);
106
107print <<EOF;
108
109}  // namespace re2
110EOF
111