Subversion Repositories JSX

Compare Revisions

Last modification

Ignore whitespace Rev 528 → Rev 529

/trunk/string/unicode.js
1,4 → 1,3
/* vim:set fileencoding=utf-8 tabstop=2 shiftwidth=2 softtabstop=2 expandtab: */
/**
* @fileOverview <title>PointedEars' JSX: String Library -- Unicode Support</title>
* @file $Id$
24,11 → 23,11
* and the value of the <code>length</code> property is
* <code>2</code>. Retrieving the (code point of the) first
* character from a string with
* <code>{@link String.prototype#charAt}(0)</code>.
* <code>{@link String.prototype.charAt}(0)</code>.
* where the character has a code point beyond the multilingual
* plane, returns the (code point of the) (non-)character for its
* <em>lead surrogate</em>. Retrieving with
* <code>{@link String.prototype#charAt}(1)</code>
* <code>{@link String.prototype.charAt}(1)</code>
* the (code point of) the second character from a string that
* contains a first non-BMP character returns the first character's
* <em>trail surrogate</em> instead.
38,8 → 37,8
* === "\uDD1E"</code>).
*
* This library provides a new object type, <code>WideString</code>,
* whose prototype object inherits from the <code>String</code>
* prototype object, that is intended to be used as a drop-in
* whose prototype inherits from the <code>String</code>
* prototype, that is intended to be used as a drop-in
* replacement for <code>String</code> values that may contain
* Unicode characters from both inside and beyond the BMP, from
* code points U+0000 to U+10FFFF inclusive.
46,8 → 45,8
*
* It also replaces by default the values of certain built-in
* properties of the <code>String</code> constructor and
* <code>String</code> prototype object such as the ones mentioned
* above, and add others to the prototype object, so as to
* <code>String</code> prototype such as the ones mentioned
* above, and add others to the String prototype, so as to
* support non-BMP characters as well in implementations of those
* ECMAScript Editions almost seamlessly. However, non-callable
* built-in own and inherited properties of <code>String</code>
55,7 → 54,7
* (In case of <code>length</code>, you would need to use
* <code>getLength()</code> instead.) Whether and to what extent
* built-in objects are modified, can be controlled with
* {@link jsx#options}. (See {@link "object.js"} for details.)
* {@link jsx.options}. (See {@link "object.js"} for details.)
*
* Your feedback is appreciated.
*
64,7 → 63,7
*
* @section {Copyright & Disclaimer}
*
* @author (C) 2013 Thomas Lahn &lt;js@PointedEars.de&gt;
* @author (C) 2013, 2014 Thomas Lahn &lt;js@PointedEars.de&gt;
*
* @partof PointedEars' JavaScript Extensions (JSX)
*
82,14 → 81,6
* along with JSX. If not, see <http://www.gnu.org/licenses/>.
*/
 
if (typeof jsx == "undefined")
{
/**
* @namespace
*/
var jsx = {};
}
 
if (typeof jsx.string == "undefined")
{
/**
99,18 → 90,17
}
 
/**
* @type jsx.string.unicode
* @memberOf __jsx.string.unicode
* @namespace
*/
jsx.string.unicode = (/** @constructor */ function () {
/* Imports */
var String_prototype_charCodeAt = "".charCodeAt;
 
var _isArray = jsx.object.isArray;
 
/* Handles characters in UTF-16 encoding from U+0000 to U+10FFFF */
var _rxWideString = /[\uD800-\uDBFF][\uDC00-\uDFFF]/;
var _rxString = /[\uD800-\uDBFF][\uDC00-\uDFFF]|[\S\s]/g;
var _rx_wide_string = /[\uD800-\uDBFF][\uDC00-\uDFFF]/;
var _rx_string = /[\uD800-\uDBFF][\uDC00-\uDFFF]|[\S\s]/g;
 
/**
* @private
120,7 → 110,7
*/
function _toCharArray (s)
{
return (s ? (String(s).match(_rxString) || []) : []);
return (s ? (String(s).match(_rx_string) || []) : []);
}
 
/**
137,7 → 127,7
* the augmentation instead.
* </p><p>
* You may call a <code>WideString</code>'s
* {@link WideString#prototype.toString toString()} method
* {@link WideString.prototype.toString toString()} method
* (implicitly called in string context, like concatenation)
* to convert it back to a <code>String</code> value.
* </p><p>
146,7 → 136,6
* 2³²−1 Unicode characters. Also note that normalization
* is <strong>not</strong> performed at the moment.</em>
* </p>
* @type jsx.string.unicode.WideString
* @extends String
* @constructor
*/
155,11 → 144,11
* @param {String} s
* @return {jsx.string.unicode.WideString}
*/
function _jsx_string_unicode_WideString (s) {
function jsx_string_unicode_WideString (s) {
/* Factory support */
if (!(this instanceof _jsx_string_unicode_WideString))
if (!(this instanceof jsx_string_unicode_WideString))
{
return new _jsx_string_unicode_WideString(s);
return new jsx_string_unicode_WideString(s);
}
 
/**
169,7 → 158,7
var _chars =
_isArray(s)
? s
: (s instanceof _jsx_string_unicode_WideString
: (s instanceof jsx_string_unicode_WideString
? s.getChars()
: _toCharArray(s));
 
200,7 → 189,7
* <em>NOTE: A single Unicode character may be composed out of
* several other characters. Normalization is not performed.</em>
*
* @memberOf jsx.string.unicode.WideString#prototype
* @memberOf jsx.string.unicode.WideString.prototype
* @param {int} position
* If not an integer, replaced with the closest integer.
* @return {string}
234,7 → 223,7
* <em>NOTE: A single Unicode character may be composed out of
* several other characters. Normalization is not performed.</em>
*
* @memberOf jsx.string.unicode.WideString#prototype
* @memberOf jsx.string.unicode.WideString.prototype
* @param {Number} position
* If not an integer, replaced with the closest integer.
* @return {number}
387,7 → 376,7
* Position of the first Unicode character that should not be
* included in the slice.
* @return {jsx.string.unicode.WideString}
* @see String.prototype#slice()
* @see String.prototype.slice()
*/
slice: function (start, end) {
return new this.constructor(
406,7 → 395,7
* to the end of this string.
* @return {jsx.string.unicode.WideString}
* @see #slice()
* @see String.prototype#substr()
* @see String.prototype.substr()
*/
substr: function (start, length) {
return this.slice(start,
425,7 → 414,7
* included in the slice. If <code><var>start</var></code>
* is larger than <code><var>end</var></code>, they are swapped.
* @return {jsx.string.unicode.WideString}
* @see String.prototype#substring()
* @see String.prototype.substring()
*/
substring: function (start, end) {
if (start > end)
460,7 → 449,7
/**
* @name length
* @type int
* @memberOf jsx.string.unicode.WideString#prototype
* @memberOf jsx.string.unicode.WideString.prototype
* @see #getLength()
*/
jsx.object.defineProperty(_WideString.prototype, "length", {
497,7 → 486,7
return jsx.throwThis(jsx.InvalidArgumentError,
["Unsupported Unicode code point value",
"0x" + arg.toString(16).toUpperCase(),
"0x0000..0x10FFFF"]);
"0..0x10FFFF"]);
}
 
if (arg > 0xFFFF)
564,7 → 553,7
* Returns the character in this string
* at the specified position.
* <p>
* Differs from the built-in {@link String.prototype#charAt}
* Differs from the built-in {@link String.prototype.charAt}
* method in that it considers characters with code points
* beyond U+FFFF that require several UTF-16 code units,
* and that the position may be negative, counting from
573,8 → 562,8
* @memberOf String.prototype
* @param {Number} position
* @return {string}
* @see String.prototype#charAt()
* @see jsx.string.unicode.WideString.prototype#charAt()
* @see String.prototype.charAt()
* @see jsx.string.unicode.WideString.prototype.charAt()
*/
charAt: function (position) {
return _createWideString(this).charAt(position);
584,7 → 573,7
* Returns the Unicode code point value of the character
* in this string at the specified position.
* <p>
* Differs from the built-in {@link String.prototype#charCodeAt}
* Differs from the built-in {@link String.prototype.charCodeAt}
* method in that it considers characters with code points
* beyond U+FFFF that require several UTF-16 code units,
* and that the position may be negative, counting from
592,8 → 581,8
* </p>
* @param {Number} position
* @return {number}
* @see String.prototype#charCodeAt()
* @see jsx.string.unicode.WideString.prototype#charCodeAt()
* @see String.prototype.charCodeAt()
* @see jsx.string.unicode.WideString.prototype.charCodeAt()
*/
charCodeAt: function (position) {
return _createWideString(this).charCodeAt(position);
602,12 → 591,12
/**
* Returns the number of Unicode characters in this string.
* <p>
* Differs from the built-in {@link string#length length}
* Differs from the built-in {@link string.length length}
* property in that it considers characters with code points
* beyond U+FFFF that require several UTF-16 code units.
* </p>
* @return {number}
* @see string#length
* @see string.length
*/
getLength: function () {
return _createWideString(this).getLength();
617,7 → 606,7
* Returns the index of the first position of a substring
* in this string.
* <p>
* Differs from the built-in {@link String.prototype#indexOf}
* Differs from the built-in {@link String.prototype.indexOf}
* method in that it considers characters with code points
* beyond U+FFFF that require several UTF-16 code units.
* </p>
627,7 → 616,7
* Position from where to start searching. The default is
* <code>0</code>.
* @return {number}
* @see String.prototype#indexOf()
* @see String.prototype.indexOf()
*/
indexOf: function (searchString, position) {
return _createWideString(this).indexOf(searchString, position);
637,7 → 626,7
* Returns the index of the last position of a substring
* in this string.
* <p>
* Differs from the built-in {@link String.prototype#lastIndexOf}
* Differs from the built-in {@link String.prototype.lastIndexOf}
* method in that it considers characters with code points
* beyond U+FFFF that require several UTF-16 code units.
* </p>
647,7 → 636,7
* Position from where to start searching backwards.
* The default is the position of the last character.
* @return {number}
* @see String.prototype#lastIndexOf()
* @see String.prototype.lastIndexOf()
*/
lastIndexOf: function (searchString, position) {
return _createWideString(this).lastIndexOf(searchString, position);
656,11 → 645,11
/**
* Returns a slice (substring) of this string.
* <p>
* Differs from the built-in {@link String.prototype#slice}
* Differs from the built-in {@link String.prototype.slice}
* method in that it considers characters with code points
* beyond U+FFFF that require several UTF-16 code units.
* </p>
* @see String.prototype#slice()
* @see String.prototype.slice()
*/
slice: (function () {
var String_prototype_slice = "".slice;
674,7 → 663,7
* @return {string}
*/
return function (start, end) {
return _rxWideString.test(this)
return _rx_wide_string.test(this)
? _createWideString(this).slice(start, end).toString()
: String_prototype_slice.apply(this, arguments);
};
684,7 → 673,7
* Returns a substring of this string.
* <p>
* Differs from the built-in (non-standard)
* {@link String.prototype#substr} method in that it
* {@link String.prototype.substr} method in that it
* considers characters with code points beyond U+FFFF
* that require several UTF-16 code units.
* </p>
696,8 → 685,8
* contains all characters from <code>start</code>
* to the end of this string.
* @return {string}
* @see String.prototype#substr()
* @see jsx.string.unicode.WideString.prototype#substr()
* @see String.prototype.substr()
* @see jsx.string.unicode.WideString.prototype.substr()
*/
substr: function (start, length) {
return _createWideString(this).substr(start, length).toString();
706,11 → 695,12
/**
* Returns a substring of this string.
* <p>
* Differs from the built-in {@link String.prototype#substring}
* Differs from the built-in {@link String.prototype.substring}
* method in that it considers characters with code points
* beyond U+FFFF that require several UTF-16 code units.
* </p>
* @see String.prototype#substring()
* @function
* @see String.prototype.substring()
*/
substring: (function () {
var String_prototype_substring = "".substring;
725,7 → 715,7
* @return {string}
*/
return function (start, end) {
return _rxWideString.test(this)
return _rx_wide_string.test(this)
? _createWideString(this).substring(start, end).toString()
: String_prototype_substring.apply(this, arguments);
};
742,7 → 732,7
* Returns a {@link String} containing the Unicode characters
* specified by the code point arguments.
* <p>
* Differs from the built-in {@link String#fromCharCode}
* Differs from the built-in {@link String.fromCharCode}
* method in that it considers characters with code points
* beyond U+FFFF that require several UTF-16 code units.
* </p>
764,4 → 754,6
*/
WideString: _WideString
};
}());
}());
 
/* vim:set fileencoding=utf-8 tabstop=2 shiftwidth=2 softtabstop=2 expandtab: */