PHP's unserialize in JavaScript

Here’s what our current JavaScript equivalent to PHP's unserialize looks like.

functioninitCache () {
const store = []
// cache only first element, second is length to jump ahead for the parser
const cache = functioncache (value) {
store.push(value[0])
return value
}
cache.get = (index) => {
if (index >= store.length) {
throwRangeError(`Can't resolve reference ${index + 1}`)
}
return store[index]
}
return cache
}
functionexpectType (str, cache) {
const types = /^(?:N(?=;)|[bidsSaOCrR](?=:)|[^:]+(?=:))/g
const type = (types.exec(str) || [])[0]
if (!type) {
throwSyntaxError('Invalid input: ' + str)
}
switch (type) {
case'N':
return cache([null, 2])
case'b':
return cache(expectBool(str))
case'i':
return cache(expectInt(str))
case'd':
return cache(expectFloat(str))
case's':
return cache(expectString(str))
case'S':
return cache(expectEscapedString(str))
case'a':
return expectArray(str, cache)
case'O':
return expectObject(str, cache)
case'C':
return expectClass(str, cache)
case'r':
case'R':
return expectReference(str, cache)
default:
throwSyntaxError(`Invalid or unsupported data type: ${type}`)
}
}
functionexpectBool (str) {
const reBool = /^b:([01]);/
const [match, boolMatch] = reBool.exec(str) || []
if (!boolMatch) {
throwSyntaxError('Invalid bool value, expected 0 or 1')
}
return [boolMatch === '1', match.length]
}
functionexpectInt (str) {
const reInt = /^i:([+-]?\d+);/
const [match, intMatch] = reInt.exec(str) || []
if (!intMatch) {
throwSyntaxError('Expected an integer value')
}
return [parseInt(intMatch, 10), match.length]
}
functionexpectFloat (str) {
const reFloat = /^d:(NAN|-?INF|(?:\d+\.\d*|\d*\.\d+|\d+)(?:[eE][+-]\d+)?);/
const [match, floatMatch] = reFloat.exec(str) || []
if (!floatMatch) {
throwSyntaxError('Expected a float value')
}
let floatValue
switch (floatMatch) {
case'NAN':
floatValue = Number.NaN
break
case'-INF':
floatValue = Number.NEGATIVE_INFINITY
break
case'INF':
floatValue = Number.POSITIVE_INFINITY
break
default:
floatValue = parseFloat(floatMatch)
break
}
return [floatValue, match.length]
}
functionreadBytes (str, len, escapedString = false) {
let bytes = 0
let out = ''
let c = 0
const strLen = str.length
let wasHighSurrogate = false
let escapedChars = 0
while (bytes < len && c < strLen) {
let chr = str.charAt(c)
const code = chr.charCodeAt(0)
const isHighSurrogate = code >= 0xd800 && code <= 0xdbff
const isLowSurrogate = code >= 0xdc00 && code <= 0xdfff
if (escapedString && chr === '\\') {
chr = String.fromCharCode(parseInt(str.substr(c + 1, 2), 16))
escapedChars++
// each escaped sequence is 3 characters. Go 2 chars ahead.
// third character will be jumped over a few lines later
c += 2
}
c++
bytes += isHighSurrogate || (isLowSurrogate && wasHighSurrogate)
// if high surrogate, count 2 bytes, as expectation is to be followed by low surrogate
// if low surrogate preceded by high surrogate, add 2 bytes
? 2
: code > 0x7ff
// otherwise low surrogate falls into this part
? 3
: code > 0x7f
? 2
: 1
// if high surrogate is not followed by low surrogate, add 1 more byte
bytes += wasHighSurrogate && !isLowSurrogate ? 1 : 0
out += chr
wasHighSurrogate = isHighSurrogate
}
return [out, bytes, escapedChars]
}
functionexpectString (str) {
// PHP strings consist of one-byte characters.
// JS uses 2 bytes with possible surrogate pairs.
// Serialized length of 2 is still 1 JS string character
const reStrLength = /^s:(\d+):"/g// also match the opening " char
const [match, byteLenMatch] = reStrLength.exec(str) || []
if (!match) {
throwSyntaxError('Expected a string value')
}
const len = parseInt(byteLenMatch, 10)
str = str.substr(match.length)
const [strMatch, bytes] = readBytes(str, len)
if (bytes !== len) {
throwSyntaxError(`Expected string of ${len} bytes, but got ${bytes}`)
}
str = str.substr(strMatch.length)
// strict parsing, match closing "; chars
if (!str.startsWith('";')) {
throwSyntaxError('Expected ";')
}
return [strMatch, match.length + strMatch.length + 2] // skip last ";
}
functionexpectEscapedString (str) {
const reStrLength = /^S:(\d+):"/g// also match the opening " char
const [match, strLenMatch] = reStrLength.exec(str) || []
if (!match) {
throwSyntaxError('Expected an escaped string value')
}
const len = parseInt(strLenMatch, 10)
str = str.substr(match.length)
const [strMatch, bytes, escapedChars] = readBytes(str, len, true)
if (bytes !== len) {
throwSyntaxError(`Expected escaped string of ${len} bytes, but got ${bytes}`)
}
str = str.substr(strMatch.length + escapedChars * 2)
// strict parsing, match closing "; chars
if (!str.startsWith('";')) {
throwSyntaxError('Expected ";')
}
return [strMatch, match.length + strMatch.length + 2] // skip last ";
}
functionexpectKeyOrIndex (str) {
try {
return expectString(str)
} catch (err) {}
try {
return expectEscapedString(str)
} catch (err) {}
try {
return expectInt(str)
} catch (err) {
throwSyntaxError('Expected key or index')
}
}
functionexpectObject (str, cache) {
// O:<class name length>:"class name":<prop count>:{<props and values>}
// O:8:"stdClass":2:{s:3:"foo";s:3:"bar";s:3:"bar";s:3:"baz";}
const reObjectLiteral = /^O:(\d+):"([^"]+)":(\d+):\{/
const [objectLiteralBeginMatch, /* classNameLengthMatch */, className, propCountMatch] = reObjectLiteral.exec(str) || []
if (!objectLiteralBeginMatch) {
throwSyntaxError('Invalid input')
}
if (className !== 'stdClass') {
throwSyntaxError(`Unsupported object type: ${className}`)
}
let totalOffset = objectLiteralBeginMatch.length
const propCount = parseInt(propCountMatch, 10)
const obj = {}
cache([obj])
str = str.substr(totalOffset)
for (let i = 0; i < propCount; i++) {
const prop = expectKeyOrIndex(str)
str = str.substr(prop[1])
totalOffset += prop[1]
const value = expectType(str, cache)
str = str.substr(value[1])
totalOffset += value[1]
obj[prop[0]] = value[0]
}
// strict parsing, expect } after object literal
if (str.charAt(0) !== '}') {
throwSyntaxError('Expected }')
}
return [obj, totalOffset + 1] // skip final }
}
functionexpectClass (str, cache) {
// can't be well supported, because requires calling eval (or similar)
// in order to call serialized constructor name
// which is unsafe
// or assume that constructor is defined in global scope
// but this is too much limiting
throwError('Not yet implemented')
}
functionexpectReference (str, cache) {
const reRef = /^[rR]:([1-9]\d*);/
const [match, refIndex] = reRef.exec(str) || []
if (!match) {
throwSyntaxError('Expected reference value')
}
return [cache.get(parseInt(refIndex, 10) - 1), match.length]
}
functionexpectArray (str, cache) {
const reArrayLength = /^a:(\d+):{/
const [arrayLiteralBeginMatch, arrayLengthMatch] = reArrayLength.exec(str) || []
if (!arrayLengthMatch) {
throwSyntaxError('Expected array length annotation')
}
str = str.substr(arrayLiteralBeginMatch.length)
const array = expectArrayItems(str, parseInt(arrayLengthMatch, 10), cache)
// strict parsing, expect closing } brace after array literal
if (str.charAt(array[1]) !== '}') {
throwSyntaxError('Expected }')
}
return [array[0], arrayLiteralBeginMatch.length + array[1] + 1] // jump over }
}
functionexpectArrayItems (str, expectedItems = 0, cache) {
let key
let item
let totalOffset = 0
let hasContinousIndexes = true
let lastIndex = -1
let items = {}
cache([items])
for (let i = 0; i < expectedItems; i++) {
key = expectKeyOrIndex(str)
hasContinousIndexes = hasContinousIndexes && typeof key[0] === 'number' && key[0] === lastIndex + 1
lastIndex = key[0]
str = str.substr(key[1])
totalOffset += key[1]
// references are resolved immediately, so if duplicate key overwrites previous array index
// the old value is anyway resolved
// fixme: but next time the same reference should point to the new value
item = expectType(str, cache)
str = str.substr(item[1])
totalOffset += item[1]
items[key[0]] = item[0]
}
if (hasContinousIndexes) {
items = Object.values(items)
}
return [items, totalOffset]
}
module.exports = functionunserialize (str) {
// discuss at: https://locutus.io/php/unserialize/
// original by: Arpad Ray (mailto:arpad@php.net)
// improved by: Pedro Tainha (https://www.pedrotainha.com)
// improved by: Kevin van Zonneveld (https://kvz.io)
// improved by: Kevin van Zonneveld (https://kvz.io)
// improved by: Chris
// improved by: James
// improved by: Le Torbi
// improved by: Eli Skeggs
// bugfixed by: dptr1988
// bugfixed by: Kevin van Zonneveld (https://kvz.io)
// bugfixed by: Brett Zamir (https://brett-zamir.me)
// bugfixed by: philippsimon (https://github.com/philippsimon/)
// revised by: d3x
// input by: Brett Zamir (https://brett-zamir.me)
// input by: Martin (https://www.erlenwiese.de/)
// input by: kilops
// input by: Jaroslaw Czarniak
// input by: lovasoa (https://github.com/lovasoa/)
// improved by: Rafał Kukawski
// reimplemented by: Rafał Kukawski
// note 1: We feel the main purpose of this function should be
// note 1: to ease the transport of data between php & js
// note 1: Aiming for PHP-compatibility, we have to translate objects to arrays
// example 1: unserialize('a:3:{i:0;s:5:"Kevin";i:1;s:3:"van";i:2;s:9:"Zonneveld";}')
// returns 1: ['Kevin', 'van', 'Zonneveld']
// example 2: unserialize('a:2:{s:9:"firstName";s:5:"Kevin";s:7:"midName";s:3:"van";}')
// returns 2: {firstName: 'Kevin', midName: 'van'}
// example 3: unserialize('a:3:{s:2:"ü";s:2:"ü";s:3:"四";s:3:"四";s:4:"𠜎";s:4:"𠜎";}')
// returns 3: {'ü': 'ü', '四': '四', '𠜎': '𠜎'}
// example 4: unserialize(undefined)
// returns 4: false
// example 5: unserialize('O:8:"stdClass":1:{s:3:"foo";b:1;}')
// returns 5: { foo: true }
// example 6: unserialize('a:2:{i:0;N;i:1;s:0:"";}')
// returns 6: [null, ""]
// example 7: unserialize('S:7:"\\65\\73\\63\\61\\70\\65\\64";')
// returns 7: 'escaped'
try {
if (typeof str !== 'string') {
returnfalse
}
return expectType(str, initCache())[0]
} catch (err) {
console.error(err)
returnfalse
}
}
[ View on GitHub | Edit on GitHub | Source on GitHub ]

How to use

You you can install via npm install locutus and require it via require('locutus/php/var/unserialize'). You could also require the var module in full so that you could access var.unserialize instead.

If you intend to target the browser, you can then use a module bundler such as Parcel, webpack, Browserify, or rollup.js. This can be important because Locutus allows modern JavaScript in the source files, meaning it may not work in all browsers without a build/transpile step. Locutus does transpile all functions to ES5 before publishing to npm.

A community effort

Not unlike Wikipedia, Locutus is an ongoing community effort. Our philosophy follows The McDonald’s Theory. This means that we don't consider it to be a bad thing that many of our functions are first iterations, which may still have their fair share of issues. We hope that these flaws will inspire others to come up with better ideas.

This way of working also means that we don't offer any production guarantees, and recommend to use Locutus inspiration and learning purposes only.

Notes

  • We feel the main purpose of this function should be to ease the transport of data between php & js Aiming for PHP-compatibility, we have to translate objects to arrays

Examples

Please note that these examples are distilled from test cases that automatically verify our functions still work correctly. This could explain some quirky ones.

#codeexpected result
1unserialize('a:3:{i:0;s:5:"Kevin";i:1;s:3:"van";i:2;s:9:"Zonneveld";}')['Kevin', 'van', 'Zonneveld']
2unserialize('a:2:{s:9:"firstName";s:5:"Kevin";s:7:"midName";s:3:"van";}'){firstName: 'Kevin', midName: 'van'}
3unserialize('a:3:{s:2:"ü";s:2:"ü";s:3:"四";s:3:"四";s:4:"𠜎";s:4:"𠜎";}'){'ü': 'ü', '四': '四', '𠜎': '𠜎'}
4unserialize(undefined)false
5unserialize('O:8:"stdClass":1:{s:3:"foo";b:1;}'){ foo: true }
6unserialize('a:2:{i:0;N;i:1;s:0:"";}')[null, ""]
7unserialize('S:7:"\\65\\73\\63\\61\\70\\65\\64";')'escaped'

« More PHP var functions


Star