有没有标准的C库函数用于转义C字符串?
例如,如果我有一个C字符串:
char example[] = "first line\nsecond line: \"inner quotes\"";
我想要打印
"first line\nsecond line: \"inner quotes\""
有没有一个库函数可以帮我完成这个转换?自己写似乎有点傻。
如果我可以指定一个长度进行转义(这样它就会在\0
之前或之后停止),那就更好了。
这方面没有标准的C库函数。
当您使用以下声明:
char example[] = "first line\nsecond line: \"inner quotes\"";
转义序列将被解释并由编译器替换。 您将不得不“取消解释”C转义的字符。 这是一个简单且粗略的示例:
#include <stdio.h>
#include <ctype.h>
void print_unescaped(char* ptr, int len) {
if (!ptr) return;
for (int i = 0; i < len; i++, ptr++) {
switch (*ptr) {
case '\0': printf("\\0"); break;
case '\a': printf("\\a"); break;
case '\b': printf("\\b"); break;
case '\f': printf("\\f"); break;
case '\n': printf("\\n"); break;
case '\r': printf("\\r"); break;
case '\t': printf("\\t"); break;
case '\v': printf("\\v"); break;
case '\\': printf("\\\\"); break;
case '\?': printf("\\\?"); break;
case '\'': printf("\\\'"); break;
case '\"': printf("\\\""); break;
default:
if (isprint(*ptr)) printf("%c", *ptr);
else printf("\\%03o", *ptr);
}
}
}
puts
会在末尾添加一个换行符。 - dreamlaxelse
在 switch
中是干嘛的? - Michael Krelin - hackergets
,因为它比两个putchar
函数更易读(考虑到提供的空间),而且我想传达的是概念而不是实现。我将编辑我的答案以澄清这一点。@Michael Krelin:这是拉丁语中的“默认”一词,但我已经将其翻译回来了。谢谢你指出来。 - bta你刚才提到你想要打印这个字符串。
char example[] = "first line\nsecond line: \"inner quotes\"";
size_t length = strlen(example);
size_t i;
static const char *simple = "\\\'\"";
static const char *complex = "\a\b\f\n\r\t\v";
static const char *complexMap = "abfnrtv";
for (i = 0; i < length; i++)
{
char *p;
if (strchr(simple, example[i]))
{
putchar('\\');
putchar(example[i]);
}
else if ((p = strchr(complex, example[i]))
{
size_t idx = p - complex;
putchar('\\');
putchar(complexMap[idx]);
}
else if (isprint(example[i]))
{
putchar(example[i]);
}
else
{
printf("\\%03o", example[i]);
}
}
\e
不是 C 语言中的标准转义序列。https://en.cppreference.com/w/c/language/escape - dreamlax#include <string.h>
/* int c_quote(const char* src, char* dest, int maxlen)
*
* Quotes the string given so that it will be parseable by a c compiler.
* Return the number of chars copied to the resulting string (including any nulls)
*
* if dest is NULL, no copying is performed, but the number of chars required to
* copy will be returned.
*
* maxlen characters are copied. If maxlen is negative,
* strlen is used to find the length of the source string, and the whole string
* including the NULL-terminator is copied.
*
* Note that this function will not null-terminate the string in dest.
* If the string in src is not null-terminated, or maxlen is specified to not
* include the whole src, remember to null-terminate dest afterwards.
*
*/
int c_quote(const char* src, char* dest, int maxlen) {
int count = 0;
if(maxlen < 0) {
maxlen = strlen(src)+1; /* add 1 for NULL-terminator */
}
while(src && maxlen > 0) {
switch(*src) {
/* these normal, printable chars just need a slash appended */
case '\\':
case '\"':
case '\'':
if(dest) {
*dest++ = '\\';
*dest++ = *src;
}
count += 2;
break;
/* newlines/tabs and unprintable characters need a special code.
* Use the macro CASE_CHAR defined below.
* The first arg for the macro is the char to compare to,
* the 2nd arg is the char to put in the result string, after the '\' */
#define CASE_CHAR(c, d) case c:\
if(dest) {\
*dest++ = '\\'; *dest++ = (d);\
}\
count += 2;\
break;
/* -------------- */
CASE_CHAR('\n', 'n');
CASE_CHAR('\t', 't');
CASE_CHAR('\b', 'b');
/* ------------- */
#undef CASE_CHAR
/* by default, just copy the char over */
default:
if(dest) {
*dest++ = *src;
}
count++;
}
++src;
--maxlen;
}
return count;
}
没有标准的C函数,但自己编写也不太难
虽然不太美观,但是可以实现:
void escape_str(char *dest, char *src)
{
*dest = 0;
while(*src)
{
switch(*src)
{
case '\n' : strcat(dest++, "\\n"); break;
case '\"' : strcat(dest++, "\\\""); break;
default: *dest = *src;
}
*src++;
*dest++;
*dest = 0;
}
}
stdout
的函数更有用,所以这里有一个替代方案,它可以计算出如果dst
是NULL
时需要多少内存,并且根据需求在dstLen
处停止。if(dst)
检查中有一些低效。#include <stdint.h>
#include <stdlib.h>
#include <string.h>
size_t str_escape(char *dst, const char *src, size_t dstLen)
{
const char complexCharMap[] = "abtnvfr";
size_t i;
size_t srcLen = strlen(src);
size_t dstIdx = 0;
// If caller wants to determine required length (supplying NULL for dst)
// then we set dstLen to SIZE_MAX and pretend the buffer is the largest
// possible, but we never write to it. Caller can also provide dstLen
// as 0 if no limit is wanted.
if (dst == NULL || dstLen == 0) dstLen = SIZE_MAX;
for (i = 0; i < srcLen && dstIdx < dstLen; i++)
{
size_t complexIdx = 0;
switch (src[i])
{
case '\'':
case '\"':
case '\\':
if (dst && dstIdx <= dstLen - 2)
{
dst[dstIdx++] = '\\';
dst[dstIdx++] = src[i];
}
else dstIdx += 2;
break;
case '\r': complexIdx++;
case '\f': complexIdx++;
case '\v': complexIdx++;
case '\n': complexIdx++;
case '\t': complexIdx++;
case '\b': complexIdx++;
case '\a':
if (dst && dstIdx <= dstLen - 2)
{
dst[dstIdx++] = '\\';
dst[dstIdx++] = complexCharMap[complexIdx];
}
else dstIdx += 2;
break;
default:
if (isprint(src[i]))
{
// simply copy the character
if (dst)
dst[dstIdx++] = src[i];
else
dstIdx++;
}
else
{
// produce octal escape sequence
if (dst && dstIdx <= dstLen - 4)
{
dst[dstIdx++] = '\\';
dst[dstIdx++] = ((src[i] & 0300) >> 6) + '0';
dst[dstIdx++] = ((src[i] & 0070) >> 3) + '0';
dst[dstIdx++] = ((src[i] & 0007) >> 0) + '0';
}
else
{
dstIdx += 4;
}
}
}
}
if (dst && dstIdx <= dstLen)
dst[dstIdx] = '\0';
return dstIdx;
}
while(*src++)
{
if(*src == '\\' || *src == '\"' || *src == '\'')
*dest++ = '\\';
*dest++ = *src++;
}
isprint()
并为常见的不可打印字符和空格生成适当的转义符(\n
、\t
),对于其他字符使用八进制转义符(\0
、\377
)"。 - rampion
\0
停止似乎很危险,你最好百分之百确定长度是有效的,否则会引起混乱。 - bta