TextDecoder von Emil, 07.08.2019 08:30

Beitrag lesen

TextDecoder

Emil 07.08.2019 08:30

javascript

TextDecoder, -Encoder ist eine gute Idee. Es enspricht dem Perlmodul Encode, was genauso gehandhabt wird. Somit erfolgt die Vermittlung zwischen Zeichenorientierung und Byteorientierung in JS nach denselben Prinzipien wie das in Perl seit Jahrzehnten üblich ist zwischen Zeichen und Bytes zu vermitteln.

In PlainJS sieht der Algorithmus UTF-8-Kodierung so aus:

function decode_utf8( aChars, nIdx ){
    if( nIdx == null) nIdx = 0;
    var nLen = aChars.length, nPart = aChars[nIdx];
    this.cps = this.cps != null ? this.cps : [];
    this.offs = this.offs != null ? this.offs : nIdx;
    var cp = 0;
    if( nPart > 251 && nPart < 254 && nIdx + 5 < nLen ){
        // 6 bytes
        cp = (nPart - 252) * 1073741824 + (aChars[nIdx + 1] - 128 << 24) + (aChars[nIdx + 2] - 128 << 18) + (aChars[nIdx + 3] - 128 << 12) + (aChars[nIdx + 4] - 128 << 6) + aChars[nIdx + 5] - 128;
        this.cps.push(cp.toString(16).toUpperCase());
        this.offs += 6;
    }
    else if( nPart > 247 && nPart < 252 && nIdx + 4 < nLen ){
        // 5 bytes
        cp = (nPart - 248 << 24) + (aChars[nIdx + 1] - 128 << 18) + (aChars[nIdx + 2] - 128 << 12) + (aChars[nIdx + 3] - 128 << 6) + aChars[nIdx + 4] - 128;
        this.cps.push(cp.toString(16).toUpperCase());
        this.offs += 5;
    }
    else if( nPart > 239 && nPart < 248 && nIdx + 3 < nLen ){
        // 4 bytes
        cp = (nPart - 240 << 18) + (aChars[nIdx + 1] - 128 << 12) + (aChars[nIdx + 2] - 128 << 6) + aChars[nIdx + 3] - 128;
        this.cps.push(cp.toString(16).toUpperCase());
        this.offs += 4;
    }
    else if( nPart > 223 && nPart < 240 && nIdx + 2 < nLen ){
        // 3 bytes
        cp = (nPart - 224 << 12) + (aChars[nIdx + 1] - 128 << 6) + aChars[nIdx + 2] - 128;
        this.cps.push(cp.toString(16).toUpperCase());
        this.offs += 3;
    }
    else if( nPart > 191 && nPart < 224 && nIdx + 1 < nLen ){
        // 2 bytes
        cp = (nPart - 192 << 6) + aChars[nIdx + 1] - 128;
        this.cps.push(cp.toString(16).toUpperCase());
        this.offs += 2;
    }
    else{
        // 1 byte
        this.cps.push(nPart.toString(16).toUpperCase());
        this.offs += 1;
    }

    if( this.offs < nLen) examine(aChars, this.offs);
    return this.cps;
}

Beitrag melden

– Informationen zu den Bewertungsregeln

SELFHTML Forum - Ergänzung zur Dokumentation Übersicht

Emil: TextDecoder

Beitrag lesen

TextDecoder

JavaScript, Bytes und Zeichen

Schwimmen und Schlaganfall

TextDecoder

JavaScript, Bytes, Zeichen und Kontext

Antwort nicht angezeigt?