Subversion Repositories JSX

Compare Revisions

Last modification

Ignore whitespace Rev 430 → Rev 431

/trunk/test/unicode.js
292,7 → 292,7
},
 
{
feature: 'wideString("…").substr(…)',
feature: 'wideString.substr(…)',
description: 'Return the correct value',
code: function () {
var s = new WideString("x\uD834\uDD1Ey").substr(1);
363,11 → 363,99
},
 
{
feature: 'string.charAt(…)',
description: 'Return the correct value',
code: function () {
assert("x\uD834\uDD1E".charAt(1) === "\uD834\uDD1E");
assert("\uD834\uDD1Ex".charAt(1) === "x");
assert(typeof "\uD834\uDD1Ex".charAt(2) == "undefined");
 
assert("xy\uD834\uDD1E".charAt(-1) === "\uD834\uDD1E");
assert("x\uD834\uDD1Ey".charAt(-1) === "y");
assert(typeof "x\uD834\uDD1Ey".charAt(-4) == "undefined");
}
},
 
{
feature: 'string.charCodeAt(…)',
description: 'Return the correct value',
code: function () {
assert("x\uD834\uDD1E".charCodeAt(1) === 0x1D11E);
assert("\uD834\uDD1EA".charCodeAt(1) === 65);
 
var result = "\uD834\uDD1EA".charCodeAt(2);
assert(typeof result == "number" && isNaN(result));
 
assert("xy\uD834\uDD1E".charCodeAt(-1) === 0x1D11E);
assert("A\uD834\uDD1EB".charCodeAt(-1) === 66);
 
result = "A\uD834\uDD1EB".charCodeAt(-4);
assert(typeof result == "number" && isNaN(result));
}
},
 
{
feature: 'string.getLength()',
description: 'Return the correct value (augmented prototype)',
code: function () {
assert("x\uD834\uDD1Ey".getLength() === 3);
}
},
 
{
feature: 'string.slice(…)',
description: 'Return the correct value',
code: function () {
assert("xyz".slice(1) === "yz");
assert("xyz".slice(0, 1) === "x");
assert("xyz".slice(0, 2) === "xy");
assert("xyz".slice(1, 1) === "");
assert("xyz".slice(1, 3) === "yz");
 
assert("x\uD834\uDD1Ey".slice(1) === "\uD834\uDD1Ey");
assert("x\uD834\uDD1Ey".slice(0, 1) === "x");
assert("x\uD834\uDD1Ey".slice(0, 2) === "x\uD834\uDD1E");
assert("x\uD834\uDD1Ey".slice(1, 1) === "");
assert("x\uD834\uDD1Ey".slice(1, 3) === "\uD834\uDD1Ey");
}
},
 
{
feature: 'string.substr(…)',
description: 'Return the correct value',
code: function () {
assert("xyz".substr(1) === "yz");
assert("xyz".substr(0, 1) === "x");
assert("xyz".substr(0, 2) === "xy");
assert("xyz".substr(1, 1) === "y");
assert("xyz".substr(1, 3) === "yz");
 
assert("x\uD834\uDD1Ey".substr(1) === "\uD834\uDD1Ey");
assert("x\uD834\uDD1Ey".substr(0, 1) === "x");
assert("x\uD834\uDD1Ey".substr(0, 2) === "x\uD834\uDD1E");
assert("x\uD834\uDD1Ey".substr(1, 1) === "\uD834\uDD1E");
assert("x\uD834\uDD1Ey".substr(1, 3) === "\uD834\uDD1Ey");
}
},
 
{
feature: 'string.substring(…)',
description: 'Return the correct value',
code: function () {
assert("xyz".substring(1) === "yz");
assert("xyz".substring(0, 1) === "x");
assert("xyz".substring(0, 2) === "xy");
assert("xyz".substring(1, 1) === "");
assert("xyz".substring(1, 3) === "yz");
assert("xyz".substring(3, 1) === "yz");
 
assert("x\uD834\uDD1Ey".substring(1) === "\uD834\uDD1Ey");
assert("x\uD834\uDD1Ey".substring(0, 1) === "x");
assert("x\uD834\uDD1Ey".substring(0, 2) === "x\uD834\uDD1E");
assert("x\uD834\uDD1Ey".substring(1, 1) === "");
assert("x\uD834\uDD1Ey".substring(1, 3) === "\uD834\uDD1Ey");
assert("x\uD834\uDD1Ey".substring(3, 1) === "\uD834\uDD1Ey");
}
}
]
});
/trunk/string/unicode.js
104,9 → 104,12
* @namespace
*/
jsx.string.unicode = (/** @constructor */ function () {
var String_prototype_charCodeAt = "".charCodeAt;
 
var _isArray = jsx.object.isArray;
 
/* Handles characters in UTF-16 encoding from U+0000 to U+10FFFF */
var _rxWideString = /[\uD800-\uDBFF][\uDC00-\uDFFF]/;
var _rxString = /[\uD800-\uDBFF][\uDC00-\uDFFF]|[\S\s]/g;
 
/**
152,11 → 155,11
* @param {String} s
* @return {jsx.string.unicode.WideString}
*/
function jsx_string_unicode_WideString (s) {
function _jsx_string_unicode_WideString (s) {
/* Factory support */
if (!(this instanceof jsx_string_unicode_WideString))
if (!(this instanceof _jsx_string_unicode_WideString))
{
return new jsx_string_unicode_WideString(s);
return new _jsx_string_unicode_WideString(s);
}
 
/**
166,7 → 169,7
var _chars =
_isArray(s)
? s
: (s instanceof jsx_string_unicode_WideString
: (s instanceof _jsx_string_unicode_WideString
? s.getChars()
: _toCharArray(s));
 
182,10 → 185,11
},
 
/**
* @field
* @name chars
* @type Array
* @memberOf jsx.string.unicode.WideString
* @field
* @type Array
* @see #getChars()
*/
jsx.object.defineProperty(this, "chars", {"get": this.getChars});
}
231,55 → 235,46
* several other characters. Normalization is not performed.</em>
*
* @memberOf jsx.string.unicode.WideString#prototype
* @param {Number} position
* If not an integer, replaced with the closest integer.
* @return {number}
* If <code><var>position</var></code> is greater than or
* equal to zero, the code point value of the character
* at that position, counted from zero.
* If <code><var>position</var></code> is less than zero,
* it is treated as the number of characters in this object
* + <code><var>position</var></code>; that is,
* <code><var>position</var> === -1</code>
* returns the code point value of the last character.
* If <code><var>position</var></code> is replacable with
* an integer value out of this range, <code>NaN</code>
* is returned.
* By contrast, if <code><var>position</var></code> is
* not a <code>Number</code>, the return value is
* <strong>not defined</strong>.
* @see #charAt()
*/
charCodeAt: (function () {
var String_prototype_charCodeAt = "".charCodeAt;
charCodeAt: function (position) {
var ch = this.charAt(position);
if (typeof ch == "undefined")
{
return NaN;
}
 
/**
* @param {Number} position
* If not an integer, replaced with the closest integer.
* @return {number}
* If <code><var>position</var></code> is greater than or
* equal to zero, the code point value of the character
* at that position, counted from zero.
* If <code><var>position</var></code> is less than zero,
* it is treated as the number of characters in this object
* + <code><var>position</var></code>; that is,
* <code><var>position</var> === -1</code>
* returns the code point value of the last character.
* If <code><var>position</var></code> is replacable with
* an integer value out of this range, <code>NaN</code>
* is returned.
* By contrast, if <code><var>position</var></code> is
* not a <code>Number</code>, the return value is
* <strong>not defined</strong>.
*/
function _charCodeAt (position)
if (/^[\uD800-\uDBFF]/.test(ch))
{
var ch = this.charAt(position);
if (typeof ch == "undefined")
{
return NaN;
}
var leadSurrogate = String_prototype_charCodeAt.call(ch, 0);
var trailSurrogate = String_prototype_charCodeAt.call(ch, 1);
 
if (/^[\uD800-\uDBFF]/.test(ch))
{
var leadSurrogate = String_prototype_charCodeAt.call(ch, 0);
var trailSurrogate = String_prototype_charCodeAt.call(ch, 1);
var leadBits = (leadSurrogate - 0xD800) << 10;
var trailBits = trailSurrogate - 0xDC00;
var bmpOffset = 0x10000;
 
var leadBits = (leadSurrogate - 0xD800) << 10;
var trailBits = trailSurrogate - 0xDC00;
var bmpOffset = 0x10000;
 
return leadBits + trailBits + bmpOffset;
}
 
return String_prototype_charCodeAt.call(ch, 0);
return leadBits + trailBits + bmpOffset;
}
 
return _charCodeAt;
}()),
return String_prototype_charCodeAt.call(ch, 0);
},
 
/**
* Concatenates this string with other strings
421,7 → 416,7
},
 
/**
* Returns a slice (substring) of this string.
* Returns a substring of this string.
*
* @param {int} start
* Position of the Unicode character from where to start slicing.
531,21 → 526,212
{
if (jsx.options.augmentPrototypes)
{
jsx.object.setProperties(String.prototype, {
jsx.object.setProperties(String.prototype, (function () {
/**
* Returns the number of Unicode characters in this string.
*
* Differs from the built-in {@link string#length length}
* property in that it considers characters with code points
* beyond U+FFFF that require several UTF-16 code units.
*
* @memberOf String.prototype
* @return {number}
* @type jsx.string.unicode.WideString
*/
getLength: function () {
return (new _WideString(this)).getLength();
var _prevWideString = null;
 
/**
* Returns the {@link jsx.string.unicode.WideString WideString}
* created from this <code>String</code>.
* <p>
* NOTE: The previously created <code>WideString</code> is
* retained in <code><var>prevWideString</var></code>, which
* increases runtime efficiency with subsequent method calls
* on the same string. Caching <em>all</em> previous values
* would also increase runtime efficiency, but decrease memory
* efficiency at the same time, as unused <code>WideString</code>s
* could not be garbage-collected.<br>
* Calling <code>toString</code> is required so that primitive
* values, not object references, are compared (the latter
* would always be <code>false</code> with internal wrapper
* <code>String</code> objects created from primitive
* string values).
* </p>
* @param {String} s
* @return {jsx.string.unicode.WideString}
*/
function _createWideString (s)
{
return (_prevWideString && _prevWideString.toString() == s
? _prevWideString
: (_prevWideString = new _WideString(s)));
}
});
 
return {
/**
* Returns the character in this string
* at the specified position.
* <p>
* Differs from the built-in {@link String.prototype#charAt}
* method in that it considers characters with code points
* beyond U+FFFF that require several UTF-16 code units,
* and that the position may be negative, counting from
* the end of the string.
* </p>
* @memberOf String.prototype
* @param {Number} position
* @return {string}
* @see String.prototype#charAt()
* @see jsx.string.unicode.WideString.prototype#charAt()
*/
charAt: function (position) {
return _createWideString(this).charAt(position);
},
 
/**
* Returns the Unicode code point value of the character
* in this string at the specified position.
* <p>
* Differs from the built-in {@link String.prototype#charCodeAt}
* method in that it considers characters with code points
* beyond U+FFFF that require several UTF-16 code units,
* and that the position may be negative, counting from
* the end of the string.
* </p>
* @param {Number} position
* @return {number}
* @see String.prototype#charCodeAt()
* @see jsx.string.unicode.WideString.prototype#charCodeAt()
*/
charCodeAt: function (position) {
return _createWideString(this).charCodeAt(position);
},
 
/**
* Returns the number of Unicode characters in this string.
* <p>
* Differs from the built-in {@link string#length length}
* property in that it considers characters with code points
* beyond U+FFFF that require several UTF-16 code units.
* </p>
* @return {number}
* @see string#length
*/
getLength: function () {
return _createWideString(this).getLength();
},
 
/**
* Returns the index of the first position of a substring
* in this string.
* <p>
* Differs from the built-in {@link String.prototype#indexOf}
* method in that it considers characters with code points
* beyond U+FFFF that require several UTF-16 code units.
* </p>
* @param {jsx.string.unicode.WideString|String} searchString
* Substring to look for.
* @param {int} position
* Position from where to start searching. The default is
* <code>0</code>.
* @return {number}
* @see String.prototype#indexOf()
*/
indexOf: function (searchString, position) {
return _createWideString(this).indexOf(searchString, position);
},
 
/**
* Returns the index of the last position of a substring
* in this string.
* <p>
* Differs from the built-in {@link String.prototype#lastIndexOf}
* method in that it considers characters with code points
* beyond U+FFFF that require several UTF-16 code units.
* </p>
* @param {jsx.string.unicode.WideString|String} searchString
* Substring to look for.
* @param {int} position
* Position from where to start searching backwards.
* The default is the position of the last character.
* @return {number}
* @see String.prototype#lastIndexOf()
*/
lastIndexOf: function (searchString, position) {
return _createWideString(this).lastIndexOf(searchString, position);
},
 
/**
* Returns a slice (substring) of this string.
* <p>
* Differs from the built-in {@link String.prototype#slice}
* method in that it considers characters with code points
* beyond U+FFFF that require several UTF-16 code units.
* </p>
* @see String.prototype#slice()
*/
slice: (function () {
var String_prototype_slice = "".slice;
 
/**
* @param {int} start
* Position of the Unicode character from where to start slicing.
* @param {int} end
* Position of the first Unicode character that should not be
* included in the slice.
* @return {string}
*/
return function (start, end) {
return _rxWideString.test(this)
? _createWideString(this).slice(start, end).toString()
: String_prototype_slice.apply(this, arguments);
};
}()),
 
/**
* Returns a substring of this string.
* <p>
* Differs from the built-in (non-standard)
* {@link String.prototype#substr} method in that it
* considers characters with code points beyond U+FFFF
* that require several UTF-16 code units.
* </p>
* @param {int} start
* Position of the Unicode character from where to start slicing.
* @param {int} length
* Number of Unicode characters in the substring.
* If omitted or <code>undefined</code>, the substring
* contains all characters from <code>start</code>
* to the end of this string.
* @return {string}
* @see String.prototype#substr()
* @see jsx.string.unicode.WideString.prototype#substr()
*/
substr: function (start, length) {
return _createWideString(this).substr(start, length).toString();
},
 
/**
* Returns a substring of this string.
* <p>
* Differs from the built-in {@link String.prototype#substring}
* method in that it considers characters with code points
* beyond U+FFFF that require several UTF-16 code units.
* </p>
* @see String.prototype#substring()
*/
substring: (function () {
var String_prototype_substring = "".substring;
 
/**
* @param {int} start
* Position of the Unicode character from where to start slicing.
* @param {int} end (optional)
* Position of the first Unicode character that should not be
* included in the slice. If <code><var>start</var></code>
* is larger than <code><var>end</var></code>, they are swapped.
* @return {string}
*/
return function (start, end) {
return _rxWideString.test(this)
? _createWideString(this).substring(start, end).toString()
: String_prototype_substring.apply(this, arguments);
};
}())
};
}()), jsx.object.ADD_OVERWRITE);
}
}
 
555,7 → 741,11
/**
* Returns a {@link String} containing the Unicode characters
* specified by the code point arguments.
*
* <p>
* Differs from the built-in {@link String#fromCharCode}
* method in that it considers characters with code points
* beyond U+FFFF that require several UTF-16 code units.
* </p>
* @memberOf String
* @params {Number}
* Code points of the characters from <code>0</code> to