Вторник
16-04-2024
14:58
Главная страница
Каталог статей
Информист Приветствую Вас Гость | RSS
Регистрация
Вход
Меню сайта

Категории каталога
YUI - Yahoo! User Interface [1]
Решение проблем, связанных с библиотекой web-интерфейса от Yahho
Web интерфейсы [18]
советы по оформлению web страниц
Протоколы [1]
MVC Visual Studio [1]

Наш опрос
Что Вы веберете
Всего ответов: 230

Копилка
Если вам понравился наш сайт или вы нашли полезную информацию, то у вас есть возможность отблагодарить администрацию.

Начало » Статьи » Web » Web интерфейсы

JavaScript UTF-8 ASCII

Кода много, разбираться сложно, но стопудово работает. Здесь приведены две функции - кодирование и декодирование текста между двумя кодировками.

Код был выдран из страницы одного забугорного сайта (ссылка указана вконце), переделывать и приводить к универсальному виду было влом, так что кому помог - с того "спасибо", кому нет - ну что ж найдите лучше и киньте ссылку.

Если кто то решиться привети этот код в людский вид (т.е. чтоб можно было скопировать от сюда функцию к себе на страницу и просто вызывать, удалить все лишене и т.п.), перевести каменты и т.д., то многие web-разработчики и верстальщики, начинающие и не очень и в частности администрация данного сайта будут Вам очень признательны.

<script type="text/javascript"><!--

// ==========================================================================
// JavaScript Tool for URL Encoding/Decoding
// Copyright (C) 2006 Netzreport (netzreport.googlepages.com)
//
// Website: http://netzreport.googlepages.com/online_tool_for_url_en_decoding.html
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
//
// The GNU General Public License is also available from:
// http://www.gnu.org/copyleft/gpl.html
//
// A local copy of the GNU General Public License is available here:
// http://netzreport.googlepages.com/gpl.txt
// ==========================================================================
//
// --------------------------------------------------------------------------
// 2006-12-18: Changed character encoding. Now, one can choose between URL
// encoding/decoding strings that are character encoded as ASCII
// or UTF-8.
// 2006-11-19: First release
// --------------------------------------------------------------------------

// According to RFC 3986, only characters from a set of reserved and a set
// of unreserved characters are allowed in a URL:
var unreserved = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-_.~";
var reserved = "!*'();:@&=+$,/?%#[]";
var allowed = unreserved + reserved;
var hexchars = "0123456789ABCDEFabcdef";

// --------------------------------- Encoding -------------------------------

// This function returns a percent sign followed by two hexadecimal digits.
// Input is a decimal value not greater than 255.
function gethex(decimal) {
  return "%" + hexchars.charAt(decimal >> 4) + hexchars.charAt(decimal & 0xF);
  }

function encode() {
  // Clear output field:
  document.converter.encoded.value = "";

  // Some variables:
  var decoded = document.converter.decoded.value;
  var encoded = "";

  // ---------------- If ASCII character encoding was chosen: ----------------

  if (document.converter.charset.value == "ascii") {

  // Remember non-ASCII characters, which will not be encoded:
  var notascii = "";

  for (var i = 0; i < decoded.length; i++ ) {
  var ch = decoded.charAt(i);
  // Check if character is an unreserved character:
  if (unreserved.indexOf(ch) != -1) {
  encoded = encoded + ch;
  } else {
  // If position in the Unicode table is smaller than 128, then we have
  // an ASCII character:
  var charcode = decoded.charCodeAt(i);
  if (charcode < 128) {
  encoded = encoded + gethex(charcode);
  } else {
  encoded = encoded + ch;
  notascii = notascii + ch + " ";
  }
  }
  }

  // Write result:
  document.converter.encoded.value = encoded;

  // Display warning message if necessary:
  if (notascii != "") alert("Warning: Non-ASCII characters in decoded text!\n\nThus, these characters have not been encoded:\n" + notascii);
  }

  // ---------------- If UTF-8 character encoding was chosen: ----------------

  if (document.converter.charset.value == "utf8") {
  for (var i = 0; i < decoded.length; i++ ) {
  var ch = decoded.charAt(i);
  // Check if character is an unreserved character:
  if (unreserved.indexOf(ch) != -1) {
  encoded = encoded + ch;
  } else {

  // The position in the Unicode table tells us how many bytes are needed.
  // Note that if we talk about first, second, etc. in the following, we are
  // counting from left to right:
  //
  // Position in | Bytes needed | Binary representation
  // Unicode table | for UTF-8 | of UTF-8
  // ----------------------------------------------------------
  // 0 - 127 | 1 byte | 0XXX.XXXX
  // 128 - 2047 | 2 bytes | 110X.XXXX 10XX.XXXX
  // 2048 - 65535 | 3 bytes | 1110.XXXX 10XX.XXXX 10XX.XXXX
  // 65536 - 2097151 | 4 bytes | 1111.0XXX 10XX.XXXX 10XX.XXXX 10XX.XXXX

  var charcode = decoded.charCodeAt(i);

  // Position 0 - 127 is equal to percent-encoding with an ASCII character encoding:
  if (charcode < 128) {
  encoded = encoded + gethex(charcode);
  }

  // Position 128 - 2047: two bytes for UTF-8 character encoding.
  if (charcode > 127 && charcode < 2048) {
  // First UTF byte: Mask the first five bits of charcode with binary 110X.XXXX:
  encoded = encoded + gethex((charcode >> 6) | 0xC0);
  // Second UTF byte: Get last six bits of charcode and mask them with binary 10XX.XXXX:
  encoded = encoded + gethex((charcode & 0x3F) | 0x80);
  }

  // Position 2048 - 65535: three bytes for UTF-8 character encoding.
  if (charcode > 2047 && charcode < 65536) {
  // First UTF byte: Mask the first four bits of charcode with binary 1110.XXXX:
  encoded = encoded + gethex((charcode >> 12) | 0xE0);
  // Second UTF byte: Get the next six bits of charcode and mask them binary 10XX.XXXX:
  encoded = encoded + gethex(((charcode >> 6) & 0x3F) | 0x80);
  // Third UTF byte: Get the last six bits of charcode and mask them binary 10XX.XXXX:
  encoded = encoded + gethex((charcode & 0x3F) | 0x80);
  }

  // Position 65536 - : four bytes for UTF-8 character encoding.
  if (charcode > 65535) {
  // First UTF byte: Mask the first three bits of charcode with binary 1111.0XXX:
  encoded = encoded + gethex((charcode >> 18) | 0xF0);
  // Second UTF byte: Get the next six bits of charcode and mask them binary 10XX.XXXX:
  encoded = encoded + gethex(((charcode >> 12) & 0x3F) | 0x80);
  // Third UTF byte: Get the last six bits of charcode and mask them binary 10XX.XXXX:
  encoded = encoded + gethex(((charcode >> 6) & 0x3F) | 0x80);
  // Fourth UTF byte: Get the last six bits of charcode and mask them binary 10XX.XXXX:
  encoded = encoded + gethex((charcode & 0x3F) | 0x80);
  }

  }

  } // end of for ...

  // Write result:
  document.converter.encoded.value = encoded;
  }
}

// --------------------------------- Decoding -------------------------------

// This function returns the decimal value of two hexadecimal digits.
// Input is a percent sign followed by two hexadecimal digits. If the input
// string is shorter than three characters, the percent sign is missing or if
// not a hexadecimal numeral is used, then the decimal value 256 is returned:
function getdec(hexencoded) {
  if (hexencoded.length == 3) {
  if (hexencoded.charAt(0) == "%") {
  if (hexchars.indexOf(hexencoded.charAt(1)) != -1 && hexchars.indexOf(hexencoded.charAt(2)) != -1) {
  return parseInt(hexencoded.substr(1,2),16);
  }
  }
  }
  return 256;
}

function decode() {
  // Clear output field:
  document.converter.decoded.value = "";

  // Some variables:
  var encoded = document.converter.encoded.value;
  var decoded = "";
  // Remember characters that are not allowed in a URL:
  var notallowed = "";
  // Remember illegal percent encoding:
  var illegalencoding = "";

  // ---------------- If ASCII character encoding was chosen: ----------------
  if (document.converter.charset.value == "ascii") {
  var i = 0;
  while (i < encoded.length) {
  var ch = encoded.charAt(i);
  // Check for percent-encoded string:
  if (ch == "%") {
  // Check if percent-encoded string represents an ASCII character:
  if (getdec(encoded.substr(i,3)) < 128) {
  decoded = decoded + unescape(encoded.substr(i,3));
  } else {
  decoded = decoded + encoded.substr(i,3);
  illegalencoding = illegalencoding + encoded.substr(i,3) + " ";
  }
  i = i + 3;
  } else {
  // Check if character is an allowed character:
  if (allowed.indexOf(ch) == -1) notallowed = notallowed + ch + " ";
  decoded = decoded + ch;
  i++;
  }
  }

  // Write result:
  document.converter.decoded.value = decoded;

  // Display warning message if necessary:
  var warning = "";
  if (notallowed != "") warning = warning + "Characters not allowed in a URL:\n" + notallowed + "\n\n";
  if (illegalencoding != "") warning = warning + "Illegal percent-encoding (for ASCII):\n" + illegalencoding + "\n\n";
  if (warning != "") alert("Warning: Illegal characters/strings in encoded text!\n\n" + warning);
  }

  // ---------------- If UTF-8 character encoding was chosen: ----------------
  if (document.converter.charset.value == "utf8") {
  // UTF-8 bytes from left to right:
  var byte1, byte2, byte3, byte4 = 0;

  var i = 0;
  while (i < encoded.length) {
  var ch = encoded.charAt(i);
  // Check for percent-encoded string:
  if (ch == "%") {

  // Check for legal percent-encoding of first byte:
  if (getdec(encoded.substr(i,3)) < 255) {

  // Get the decimal values of all (potential) UTF-bytes:
  byte1 = getdec(encoded.substr(i,3));
  byte2 = getdec(encoded.substr(i+3,3));
  byte3 = getdec(encoded.substr(i+6,3));
  byte4 = getdec(encoded.substr(i+9,3));

  // Check for one byte UTF-8 character encoding:
  if (byte1 < 128) {
  decoded = decoded + String.fromCharCode(byte1);
  i = i + 3;
  }

  // Check for illegal one byte UTF-8 character encoding:
  if (byte1 > 127 && byte1 < 192) {
  decoded = decoded + encoded.substr(i,3);
  illegalencoding = illegalencoding + encoded.substr(i,3) + " ";
  i = i + 3;
  }

  // Check for two byte UTF-8 character encoding:
  if (byte1 > 191 && byte1 < 224) {
  if (byte2 > 127 && byte2 < 192) {
  decoded = decoded + String.fromCharCode(((byte1 & 0x1F) << 6) | (byte2 & 0x3F));
  } else {
  decoded = decoded + encoded.substr(i,6);
  illegalencoding = illegalencoding + encoded.substr(i,6) + " ";
  }
  i = i + 6;
  }

  // Check for three byte UTF-8 character encoding:
  if (byte1 > 223 && byte1 < 240) {
  if (byte2 > 127 && byte2 < 192) {
  if (byte3 > 127 && byte3 < 192) {
  decoded = decoded + String.fromCharCode(((byte1 & 0xF) << 12) | ((byte2 & 0x3F) << 6) | (byte3 & 0x3F));
  } else {
  decoded = decoded + encoded.substr(i,9);
  illegalencoding = illegalencoding + encoded.substr(i,9) + " ";
  }
  } else {
  decoded = decoded + encoded.substr(i,9);
  illegalencoding = illegalencoding + encoded.substr(i,9) + " ";
  }
  i = i + 9;
  }

  // Check for four byte UTF-8 character encoding:
  if (byte1 > 239) {
  if (byte2 > 127 && byte2 < 192) {
  if (byte3 > 127 && byte3 < 192) {
  if (byte4 > 127 && byte4 < 192) {
  decoded = decoded + String.fromCharCode(((byte1 & 0x7) << 18) | ((byte2 & 0x3F) << 12) | ((byte3 & 0x3F) << 6) | (byte4 & 0x3F));
  } else {
  decoded = decoded + encoded.substr(i,12);
  illegalencoding = illegalencoding + encoded.substr(i,12) + " ";
  }
  } else {
  decoded = decoded + encoded.substr(i,12);
  illegalencoding = illegalencoding + encoded.substr(i,12) + " ";
  }
  } else {
  decoded = decoded + encoded.substr(i,12);
  illegalencoding = illegalencoding + encoded.substr(i,12) + " ";
  }
  i = i + 12;
  }

  } else { // the first byte is not legally percent-encoded
  decoded = decoded + encoded.substr(i,3);
  illegalencoding = illegalencoding + encoded.substr(i,3) + " ";
  i = i + 3;
  }

  } else { // the string is not percent encoded
  // Check if character is an allowed character:
  if (allowed.indexOf(ch) == -1) notallowed = notallowed + ch + " ";
  decoded = decoded + ch;
  i++;
  }
  } // end of while ...

  // Write result:
  document.converter.decoded.value = decoded;

  // Display warning message if necessary:
  var warning = "";
  if (notallowed != "") warning = warning + "Characters not allowed in a URL:\n" + notallowed + "\n\n";
  if (illegalencoding != "") warning = warning + "Illegal percent-encoding (for UTF-8):\n" + illegalencoding + "\n\n";
  if (warning != "") alert("Warning: Illegal characters/strings in encoded text!\n\n" + warning);
  }
}
//-->

</script>


Источник: http://netzreport.googlepages.com/online_tool_for_url_en_decoding.html
Категория: Web интерфейсы | Добавил: Judge (28-08-2009)
Просмотров: 3152 | Рейтинг: 5.0 |

Всего комментариев: 0
Имя *:
Email *:
Код *:
Форма входа

Поиск по каталогу
Яндекс


Поиск по Информисту

Наша кнопка


Друзья сайта

Статистика


Copyright MyCorp © 2006Сайт создан в системе uCoz