UriUtil.java

/*
 * Copyright 2012, Ryan J. McDonough
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.damnhandy.uri.template;

import java.io.ByteArrayOutputStream;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;


/**
 * <p>
 * A light-weight utility class for applying encoding to values that are applied to
 * expression values.
 * </p>
 *
 * @author <a href="ryan@damnhandy.com">Ryan J. McDonough</a>
 * @version $Revision: 1.1 $
 */
public final class UriUtil
{
    static final Pattern PCT_ENCODDED_STRING = Pattern.compile("%[0-9A-Fa-f]{2}");

    static final char[] GENERAL_DELIM_CHARS = {':', '/', ',', '?', '#', '[', ']', '@'};

    static final char[] SUB_DELIMS_CHARS = {'!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', '<', '>', '{', '}'};

    private static final BitSet RESERVED;

    private static final BitSet ESCAPE_CHARS;

    static
    {

        RESERVED = new BitSet();
        for (int i = 0; i < GENERAL_DELIM_CHARS.length; i++)
        {
            RESERVED.set(GENERAL_DELIM_CHARS[i]);
        }
        RESERVED.set(' ');
        RESERVED.set('%');
        RESERVED.set('|');
        RESERVED.set('\\');
        RESERVED.set('`');
        RESERVED.set('"');
        RESERVED.set('^');

        for (int i = 0; i < SUB_DELIMS_CHARS.length; i++)
        {
            RESERVED.set(SUB_DELIMS_CHARS[i]);
        }

        ESCAPE_CHARS = new BitSet();
        ESCAPE_CHARS.set('<');
        ESCAPE_CHARS.set('>');
        ESCAPE_CHARS.set('%');
        ESCAPE_CHARS.set('\"');
        ESCAPE_CHARS.set('{');
        ESCAPE_CHARS.set('}');
        ESCAPE_CHARS.set('|');
        ESCAPE_CHARS.set('\\');
        ESCAPE_CHARS.set('^');
        ESCAPE_CHARS.set('[');
        ESCAPE_CHARS.set(']');
        ESCAPE_CHARS.set('`');
    }

    private UriUtil()
    {

    }

    /**
     * @param sourceValue
     * @return the encoded string
     */
    public static String encodeFragment(String sourceValue) throws UnsupportedEncodingException
    {
        // Check if the string has %-encoded values already.
        // if it does, rebuild the string and encode the non-encoded bit
        // but don't re-encode the already encoded strings.
        //
        // There's probably a cleaner way to do this.
        Matcher m = PCT_ENCODDED_STRING.matcher(sourceValue);
        List<int[]> positions = new ArrayList<int[]>();
        while (m.find())
        {
            positions.add(new int[]{m.start(), m.end()});
        }
        if(!positions.isEmpty())
        {
            StringBuilder b = new StringBuilder();
            int offset = 0;
            for (int[] pos : positions)
            {
                // encode the non-encoded portion of the string
                b.append(UriUtil.encode(sourceValue.substring(offset, pos[0]),ESCAPE_CHARS));
                // the already encodede string does not get encoded twice
                b.append(sourceValue.substring(pos[0], pos[1]));
                offset = pos[1];
            }
            b.append(encode(sourceValue.substring(offset, sourceValue.length()),ESCAPE_CHARS));
            return b.toString();
        }
        // If there's
        return encode(sourceValue, ESCAPE_CHARS);
    }

    /**
     * @param sourceValue
     * @return the encoded string
     */
    public static String encode(String sourceValue) throws UnsupportedEncodingException
    {
        return encode(sourceValue, RESERVED);
    }

    /**
     * @param sourceValue
     * @param chars
     * @return the encoded string
     * @throws UnsupportedEncodingException
     */
    private static String encode(String sourceValue, BitSet chars) throws UnsupportedEncodingException
    {
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        byte[] source = sourceValue.getBytes(Charset.forName("UTF-8"));
        for (int i = 0; i < source.length; i++)
        {
            byte c = source[i];
            // fixed unsigned problem
            if (chars.get(c & 0xff) || c <= 0x20)
            {
                out.write('%');
                char hex1 = Character.toUpperCase(Character.forDigit((c >> 4) & 0xF, 16));
                char hex2 = Character.toUpperCase(Character.forDigit(c & 0xF, 16));
                out.write(hex1);
                out.write(hex2);
            }
            else
            {
                out.write(c);
            }
        }
        return new String(out.toByteArray(), "UTF-8");
    }

}