这是我一直在开发的东西。目前还没有单元测试,但似乎运行得相当不错。我不支持在数组结构内使用函数、对象实例化、条件语句等。因为对于我的用例来说,我不需要支持这些。但是您可以随意添加所需的功能。
class ArrayTokenScanner
{
protected $arrayKeys = [];
public function scan($string)
{
$sanitized = trim($string, " \t\n\r\0\x0B;");
if(preg_match('/^(\[|array\().*(\]|\))$/', $sanitized)) {
if($tokens = $this->tokenize("<?php {$sanitized}")) {
$this->initialize($tokens);
return $this->parse($tokens);
}
}
throw new InvalidArgumentException("Invalid array format.");
}
protected function initialize(array $tokens)
{
$this->arrayKeys = [];
while($current = current($tokens)) {
$next = next($tokens);
if($next[0] === T_DOUBLE_ARROW) {
$this->arrayKeys[] = $current[1];
}
}
}
protected function parse(array &$tokens)
{
$array = [];
$token = current($tokens);
if(in_array($token[0], [T_ARRAY, T_BRACKET_OPEN])) {
$assoc = false;
$index = 0;
$discriminator = ($token[0] === T_ARRAY) ? T_ARRAY_CLOSE : T_BRACKET_CLOSE;
while($token = $this->until($tokens, $discriminator)) {
if(in_array($token[0], [T_DOUBLE_ARROW])) {
continue;
}
if($token[0] === T_COMMA_SEPARATOR) {
$assoc = false;
continue;
}
$next = next($tokens);
prev($tokens);
if($next[0] === T_DOUBLE_ARROW) {
$assoc = $token[1];
if(preg_match('/^-?(0|[1-9][0-9]*)$/', $assoc)) {
$index = $assoc = (int) $assoc;
}
continue;
}
if(in_array($token[0], [T_ARRAY, T_BRACKET_OPEN])) {
$array[($assoc !== false) ? $assoc : $this->createKey($index)] = $this->parse($tokens);
continue;
}
if(in_array($token[0], [T_STRING, T_NUM_STRING, T_CONSTANT_ENCAPSED_STRING])) {
$array[($assoc !== false) ? $assoc : $this->createKey($index)] = $this->parseAtomic($token[1]);
}
if(in_array($token[0], [T_LNUMBER, T_DNUMBER])) {
$prev = prev($tokens);
$value = $token[1];
if($prev[0] === T_MINUS) {
$value = "-{$value}";
}
next($tokens);
$array[($assoc !== false) ? $assoc : $this->createKey($index)] = $this->parseAtomic($value);
}
if(!is_string($assoc)) {
$index++;
}
}
return $array;
}
}
protected function until(array &$tokens, $discriminator)
{
$next = next($tokens);
if($next === false or $next[0] === $discriminator) {
return false;
}
return $next;
}
protected function createKey(&$index)
{
do {
if(!in_array($index, $this->arrayKeys, true)) {
return $index;
}
} while(++$index);
}
protected function tokenize($string)
{
$tokens = token_get_all($string);
if(is_array($tokens)) {
$tokens = array_values(array_filter($tokens, [$this, 'accept']));
return $this->normalize($tokens);
}
return false;
}
protected function accept($value)
{
if(is_string($value)) {
return in_array($value, [',', '[', ']', ')', '-']);
}
if(!in_array($value[0], [T_ARRAY, T_CONSTANT_ENCAPSED_STRING, T_DOUBLE_ARROW, T_STRING, T_NUM_STRING, T_LNUMBER, T_DNUMBER])) {
return false;
}
return true;
}
protected function normalize(array $tokens)
{
defined('T_MINUS') ?: define('T_MINUS', '-');
defined('T_BRACKET_OPEN') ?: define('T_BRACKET_OPEN', '[');
defined('T_BRACKET_CLOSE') ?: define('T_BRACKET_CLOSE', ']');
defined('T_COMMA_SEPARATOR') ?: define('T_COMMA_SEPARATOR', ',');
defined('T_ARRAY_CLOSE') ?: define('T_ARRAY_CLOSE', ')');
return array_map( function($token) {
return [
0 => $token[0],
1 => (is_string($token[0])) ? $token[0] : $token[1]
];
}, $tokens);
}
protected function parseAtomic($value)
{
if(preg_match('/^["\'].*["\']$/', $value)) {
return $value;
}
if(preg_match('/^-?(0|[1-9][0-9]*)$/', $value)) {
return (int) $value;
}
if(is_numeric($value)) {
return (float) $value;
}
if(in_array(strtolower($value), ['true', 'false'])) {
return ($value == 'true') ? true : false;
}
if(strtolower($value) === 'null') {
return null;
}
return $value;
}
}
使用示例:
$tokenScanner = new ArrayTokenScanner();
$array = $tokenScanner->scan('[array("foo" => -123, "foobie" => "5x2", "bar" => \'456\', 111 => 12, "bar", ["null" => null], "bool" => false), 123 => E_ERROR];');
$arrayExport = preg_replace('/[\s\t]+/', ' ', var_export($array, true));
echo stripslashes($arrayExport) . PHP_EOL;
$array2 = $tokenScanner->scan('[array("foo" => 123, "foobie" => "5x2", "bar" => \'456\', 111 => 12, "bar", ["null" => null], "bool" => false), 123 => E_ERROR];');
$arrayExport = preg_replace('/[\s\t]+/', ' ', var_export($array, true));
echo stripslashes($arrayExport);
['Test','Blah']
。 - vstmarray
的字符串的。依赖于var_export
作为一个天真的序列化器难道不是一个聪明的想法吗?它不能被覆盖,比如使用更合理的选择(serialize
,json_encode
)吗? - raina77ow