common/escape/UnicodeEscaperTest.java

/*
 * Copyright (C) 2008 The Guava Authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.google.common.escape;

import com.google.common.annotations.GwtCompatible;

import junit.framework.TestCase;

/**
 * Tests for {@link UnicodeEscaper}.
 *
 * @author David Beaumont
 */
@GwtCompatible
public class UnicodeEscaperTest extends TestCase {

  private static final String SMALLEST_SURROGATE =
      "" + Character.MIN_HIGH_SURROGATE + Character.MIN_LOW_SURROGATE;
  private static final String LARGEST_SURROGATE =
      "" + Character.MAX_HIGH_SURROGATE + Character.MAX_LOW_SURROGATE;

  private static final String TEST_STRING =
      "\0abyz\u0080\u0100\u0800\u1000ABYZ\uffff" +
      SMALLEST_SURROGATE + "0189" +  LARGEST_SURROGATE;

  // Escapes nothing
  private static final UnicodeEscaper NOP_ESCAPER = new UnicodeEscaper() {
    @Override
    protected char[] escape(int c) {
      return null;
    }
  };

  // Escapes everything except [a-zA-Z0-9]
  private static final UnicodeEscaper SIMPLE_ESCAPER = new UnicodeEscaper() {
    @Override
    protected char[] escape(int cp) {
      return ('a' <= cp && cp <= 'z') ||
             ('A' <= cp && cp <= 'Z') ||
             ('0' <= cp && cp <= '9') ? null :
          ("[" + String.valueOf(cp) + "]").toCharArray();
    }
  };

  public void testNopEscaper() {
    UnicodeEscaper e = NOP_ESCAPER;
    assertEquals(TEST_STRING, escapeAsString(e, TEST_STRING));
  }

  public void testSimpleEscaper() {
    UnicodeEscaper e = SIMPLE_ESCAPER;
    String expected =
        "[0]abyz[128][256][2048][4096]ABYZ[65535]" +
        "[" + Character.MIN_SUPPLEMENTARY_CODE_POINT + "]" +
        "0189[" + Character.MAX_CODE_POINT + "]";
    assertEquals(expected, escapeAsString(e, TEST_STRING));
  }

  public void testGrowBuffer() { // need to grow past an initial 1024 byte buffer
    StringBuffer input = new StringBuffer();
    StringBuffer expected = new StringBuffer();
    for (int i = 256; i < 1024; i++) {
      input.append((char) i);
      expected.append("[" + i + "]");
    }
    assertEquals(expected.toString(), SIMPLE_ESCAPER.escape(input.toString()));
  }

  public void testSurrogatePairs() {
    UnicodeEscaper e = SIMPLE_ESCAPER;

    // Build up a range of surrogate pair characters to test
    final int min = Character.MIN_SUPPLEMENTARY_CODE_POINT;
    final int max = Character.MAX_CODE_POINT;
    final int range = max - min;
    final int s1 = min + (1 * range) / 4;
    final int s2 = min + (2 * range) / 4;
    final int s3 = min + (3 * range) / 4;
    final char[] dst = new char[12];

    // Put surrogate pairs at odd indices so they can be split easily
    dst[0] = 'x';
    Character.toChars(min, dst, 1);
    Character.toChars(s1, dst, 3);
    Character.toChars(s2, dst, 5);
    Character.toChars(s3, dst, 7);
    Character.toChars(max, dst, 9);
    dst[11] = 'x';
    String test = new String(dst);

    // Get the expected result string
    String expected =
        "x[" + min + "][" + s1 + "][" + s2 + "][" + s3 + "][" + max + "]x";
    assertEquals(expected, escapeAsString(e, test));
  }

  public void testTrailingHighSurrogate() {
    String test = "abc" + Character.MIN_HIGH_SURROGATE;
    try {
      escapeAsString(NOP_ESCAPER, test);
      fail("Trailing high surrogate should cause exception");
    } catch (IllegalArgumentException expected) {
      // Pass
    }
    try {
      escapeAsString(SIMPLE_ESCAPER, test);
      fail("Trailing high surrogate should cause exception");
    } catch (IllegalArgumentException expected) {
      // Pass
    }
  }

  public void testNullInput() {
    UnicodeEscaper e = SIMPLE_ESCAPER;
    try {
      e.escape((String) null);
      fail("Null string should cause exception");
    } catch (NullPointerException expected) {
      // Pass
    }
  }

  public void testBadStrings() {
    UnicodeEscaper e = SIMPLE_ESCAPER;
    String[] BAD_STRINGS = {
        String.valueOf(Character.MIN_LOW_SURROGATE),
        Character.MIN_LOW_SURROGATE + "xyz",
        "abc" + Character.MIN_LOW_SURROGATE,
        "abc" + Character.MIN_LOW_SURROGATE + "xyz",
        String.valueOf(Character.MAX_LOW_SURROGATE),
        Character.MAX_LOW_SURROGATE + "xyz",
        "abc" + Character.MAX_LOW_SURROGATE,
        "abc" + Character.MAX_LOW_SURROGATE + "xyz",
    };
    for (String s : BAD_STRINGS) {
      try {
        escapeAsString(e, s);
        fail("Isolated low surrogate should cause exception [" + s + "]");
      } catch (IllegalArgumentException expected) {
        // Pass
      }
    }
  }

  public void testFalsePositivesForNextEscapedIndex() {
    UnicodeEscaper e = new UnicodeEscaper() {
      // Canonical escaper method that only escapes lower case ASCII letters.
      @Override
      protected char[] escape(int cp) {
        return ('a' <= cp && cp <= 'z') ?
            new char[] { Character.toUpperCase((char) cp) } : null;
      }
      // Inefficient implementation that defines all letters as escapable.
      @Override
      protected int nextEscapeIndex(CharSequence csq, int index, int end) {
        while (index < end && !Character.isLetter(csq.charAt(index))) {
          index++;
        }
        return index;
      }
    };
    assertEquals("\0HELLO \uD800\uDC00 WORLD!\n",
        e.escape("\0HeLLo \uD800\uDC00 WorlD!\n"));
  }

  public void testCodePointAt_IndexOutOfBoundsException() {
    try {
      UnicodeEscaper.codePointAt("Testing...", 4, 2);
      fail();
    } catch (IndexOutOfBoundsException expected) {
    }
  }

  private String escapeAsString(Escaper e, String s) {
    return e.escape(s);
  }
}