这里有一个使用位移操作非常快的C语言解决方案,假定您知道解码后字符串的长度。这些字符串是整数向量,范围为0..每个字母表的最大值。用户需要自行将其转换为字符范围受限的字符串。至于问题标题中的“原地”,源向量和目标向量可以重叠,但仅当源字母表不大于目标字母表时才可以。
#include <stddef.h>
#include <limits.h>
#if UINT_MAX == ULLONG_MAX
# error recode() assumes that long long has more bits than int
#endif
int recode(unsigned *dest, size_t *dlen, unsigned dmax,
const unsigned *source, size_t slen, unsigned smax)
{
if (smax < 1)
return 2;
unsigned sbits = 0;
unsigned scut = 1;
while (scut && scut <= smax) {
scut <<= 1;
sbits++;
}
scut -= smax + 1;
if (dmax < 1)
return 2;
unsigned dbits = 0;
unsigned dcut = 1;
while (dcut && dcut <= dmax) {
dcut <<= 1;
dbits++;
}
dcut -= dmax + 1;
unsigned long long buf = 0;
unsigned have = 0;
size_t i = 0, n = 0;
unsigned sym;
for (;;) {
while (have < dbits && i < slen) {
sym = source[i++];
if (sym > smax) {
*dlen = n;
return 3;
}
if (sym < scut) {
buf = (buf << (sbits - 1)) + sym;
have += sbits - 1;
}
else {
buf = (buf << sbits) + sym + scut;
have += sbits;
}
}
if (have < dbits)
break;
if (n == *dlen)
return 1;
sym = buf >> (have - dbits + 1);
if (sym < dcut) {
dest[n++] = sym;
have -= dbits - 1;
}
else {
sym = buf >> (have - dbits);
dest[n++] = sym - dcut;
have -= dbits;
}
buf &= ((unsigned long long)1 << have) - 1;
}
if (have) {
if (n == *dlen)
return 1;
sym = buf;
sym <<= dbits - 1 - have;
if (sym >= dcut)
sym = (sym << 1) - dcut;
dest[n++] = sym;
}
*dlen = n;
return 0;
}
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <assert.h>
static void ranvec(unsigned *vec, size_t len, unsigned max) {
unsigned bits = 0;
unsigned long long mask = 1;
while (mask <= max) {
mask <<= 1;
bits++;
}
mask--;
unsigned long long ran = 0;
unsigned have = 0;
size_t n = 0;
while (n < len) {
while (have < bits) {
ran = (ran << 31) + random();
have += 31;
}
if ((ran & mask) <= max)
vec[n++] = ran & mask;
ran >>= bits;
have -= bits;
}
}
#define NUM(var, str) \
do { \
char *end; \
unsigned long val = strtoul(str, &end, 0); \
var = val; \
if (*end || var != val) { \
fprintf(stderr, \
"invalid or out of range numeric argument: %s\n", str); \
return 1; \
} \
} while (0)
int main(int argc, char **argv)
{
unsigned smax, dmax, runs;
size_t slen, dsize, bsize;
if (argc != 5) { fputs("need four arguments\n", stderr); return 1; }
NUM(smax, argv[1]);
NUM(dmax, argv[2]);
NUM(slen, argv[3]);
NUM(runs, argv[4]);
dsize = ceil(slen * ceil(log2(smax + 1.)) / floor(log2(dmax + 1.)));
bsize = ceil(dsize * ceil(log2(dmax + 1.)) / floor(log2(smax + 1.)));
srandomdev();
unsigned source[slen], dest[dsize], back[bsize];
unsigned mis = 0, i;
unsigned long long dtot = 0;
int ret;
for (i = 0; i < runs; i++) {
ranvec(source, slen, smax);
size_t dlen = dsize;
ret = recode(dest, &dlen, dmax, source, slen, smax);
if (ret) {
fprintf(stderr, "encode error %d\n", ret);
break;
}
dtot += dlen;
size_t blen = bsize;
ret = recode(back, &blen, smax, dest, dlen, dmax);
if (ret) {
fprintf(stderr, "decode error %d\n", ret);
break;
}
if (blen < slen || memcmp(source, back, slen))
mis++;
}
if (mis)
fprintf(stderr, "%u/%u mismatches!\n", mis, i);
if (ret == 0)
printf("mean dest/source symbols = %.4f (optimal = %.4f)\n",
dtot / (i * (double)slen), log(smax + 1.) / log(dmax + 1.));
return 0;
}