// ==========================================================================
// JavaScript Tool for URL Encoding/Decoding
// Copyright (C) 2006 Netzreport (netzreport.googlepages.com)
//
// Website: http://netzreport.googlepages.com/online_tool_for_url_en_decoding.html
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
//
// The GNU General Public License is also available from:
// http://www.gnu.org/copyleft/gpl.html
//
// A local copy of the GNU General Public License is available here:
// http://netzreport.googlepages.com/gpl.txt
// ==========================================================================
//
// --------------------------------------------------------------------------
// 2006-12-18: Changed character encoding. Now, one can choose between URL
//             encoding/decoding strings that are character encoded as ASCII
//             or UTF-8.
// 2006-11-19: First release
// --------------------------------------------------------------------------

// According to RFC 3986, only characters from a set of reserved and a set
// of unreserved characters are allowed in a URL:
var unreserved = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-_.~";
var reserved = "!*'();:@&=+$,/?%#[]";
var allowed = unreserved + reserved;
var hexchars = "0123456789ABCDEFabcdef";

// --------------------------------- Encoding -------------------------------

// This function returns a percent sign followed by two hexadecimal digits.
// Input is a decimal value not greater than 255.
function gethex(decimal) {
  return "%" + hexchars.charAt(decimal >> 4) + hexchars.charAt(decimal & 0xF);
  }

function utf8encode(input_string){
  var encoded = '';                 
  decoded = input_string;
  for (var i = 0; i < decoded.length; i++ ) {
      var ch = decoded.charAt(i);
      // Check if character is an unreserved character:
      if (unreserved.indexOf(ch) != -1) {
        encoded = encoded + ch;
      } else {

        // The position in the Unicode table tells us how many bytes are needed.
        // Note that if we talk about first, second, etc. in the following, we are
        // counting from left to right:
        //
        //   Position in   |  Bytes needed   | Binary representation
        //  Unicode table  |   for UTF-8     |       of UTF-8
        // ----------------------------------------------------------
        //     0 -     127 |    1 byte       | 0XXX.XXXX
        //   128 -    2047 |    2 bytes      | 110X.XXXX 10XX.XXXX
        //  2048 -   65535 |    3 bytes      | 1110.XXXX 10XX.XXXX 10XX.XXXX
        // 65536 - 2097151 |    4 bytes      | 1111.0XXX 10XX.XXXX 10XX.XXXX 10XX.XXXX

        var charcode = decoded.charCodeAt(i);

        // Position 0 - 127 is equal to percent-encoding with an ASCII character encoding:
        if (charcode < 128) {
          encoded = encoded + gethex(charcode);
        }

        // Position 128 - 2047: two bytes for UTF-8 character encoding.
        if (charcode > 127 && charcode < 2048) {
          // First UTF byte: Mask the first five bits of charcode with binary 110X.XXXX:
          encoded = encoded + gethex((charcode >> 6) | 0xC0);
          // Second UTF byte: Get last six bits of charcode and mask them with binary 10XX.XXXX:
          encoded = encoded + gethex((charcode & 0x3F) | 0x80);
        }

        // Position 2048 - 65535: three bytes for UTF-8 character encoding.
        if (charcode > 2047 && charcode < 65536) {
          // First UTF byte: Mask the first four bits of charcode with binary 1110.XXXX:
          encoded = encoded + gethex((charcode >> 12) | 0xE0);
          // Second UTF byte: Get the next six bits of charcode and mask them binary 10XX.XXXX:
          encoded = encoded + gethex(((charcode >> 6) & 0x3F) | 0x80);
          // Third UTF byte: Get the last six bits of charcode and mask them binary 10XX.XXXX:
          encoded = encoded + gethex((charcode & 0x3F) | 0x80);
        }

        // Position 65536 - : four bytes for UTF-8 character encoding.
        if (charcode > 65535) {
          // First UTF byte: Mask the first three bits of charcode with binary 1111.0XXX:
          encoded = encoded + gethex((charcode >> 18) | 0xF0);
          // Second UTF byte: Get the next six bits of charcode and mask them binary 10XX.XXXX:
          encoded = encoded + gethex(((charcode >> 12) & 0x3F) | 0x80);
          // Third UTF byte: Get the last six bits of charcode and mask them binary 10XX.XXXX:
          encoded = encoded + gethex(((charcode >> 6) & 0x3F) | 0x80);
          // Fourth UTF byte: Get the last six bits of charcode and mask them binary 10XX.XXXX:
          encoded = encoded + gethex((charcode & 0x3F) | 0x80);
        }

      }

    }  // end of for ...

    // Write result:
  return  encoded;
}
