我有一些文本(有意义的文本或算术表达式),我想把它分成单词。
如果我只有一个分隔符,我会使用:
std::stringstream stringStream(inputString);
std::string word;
while(std::getline(stringStream, word, delimiter))
{
wordVector.push_back(word);
}
如何使用多个分隔符将字符串分割成标记?
我有一些文本(有意义的文本或算术表达式),我想把它分成单词。
如果我只有一个分隔符,我会使用:
std::stringstream stringStream(inputString);
std::string word;
while(std::getline(stringStream, word, delimiter))
{
wordVector.push_back(word);
}
如何使用多个分隔符将字符串分割成标记?
std::stringstream stringStream(inputString);
std::string line;
while(std::getline(stringStream, line))
{
std::size_t prev = 0, pos;
while ((pos = line.find_first_of(" ';", prev)) != std::string::npos)
{
if (pos > prev)
wordVector.push_back(line.substr(prev, pos-prev));
prev = pos+1;
}
if (prev < line.length())
wordVector.push_back(line.substr(prev, std::string::npos));
}
如果您有 boost 库,您可以使用以下代码:
#include <boost/algorithm/string.hpp>
std::string inputString("One!Two,Three:Four");
std::string delimiters("|,:");
std::vector<std::string> parts;
boost::split(parts, inputString, boost::is_any_of(delimiters));
std::regex
std::regex
可以用几行代码来实现字符串分割:
std::regex re("[\\|,:]");
std::sregex_token_iterator first{input.begin(), input.end(), re, -1}, last;//the '-1' is what makes the regex split (-1 := what was not matched)
std::vector<std::string> tokens{first, last};
\\]
转义。 - daruneWindows
,' ', Apple
],其中包含一个空格(' '),而我不想要它。 - cpchungconst std::string delims(";,:. \n\t");
inline bool isDelim(char c) {
for (int i = 0; i < delims.size(); ++i)
if (delims[i] == c)
return true;
return false;
}
以及在函数中:
std::stringstream stringStream(inputString);
std::string word; char c;
while (stringStream) {
word.clear();
// Read word
while (!isDelim((c = stringStream.get())))
word.push_back(c);
if (c != EOF)
stringStream.unget();
wordVector.push_back(word);
// Read delims
while (isDelim((c = stringStream.get())));
if (c != EOF)
stringStream.unget();
}
这样,如果需要的话,您就可以利用分隔符进行一些有用的操作。
const int dictSize = 256;
vector<string> tokenizeMyString(const string &s, const string &del)
{
static bool dict[dictSize] = { false};
vector<string> res;
for (int i = 0; i < del.size(); ++i) {
dict[del[i]] = true;
}
string token("");
for (auto &i : s) {
if (dict[i]) {
if (!token.empty()) {
res.push_back(token);
token.clear();
}
}
else {
token += i;
}
}
if (!token.empty()) {
res.push_back(token);
}
return res;
}
int main()
{
string delString = "MyDog:Odie, MyCat:Garfield MyNumber:1001001";
//the delimiters are " " (space) and "," (comma)
vector<string> res = tokenizeMyString(delString, " ,");
for (auto &i : res) {
cout << "token: " << i << endl;
}
return 0;
}
多年之后,这里提供一种使用C++20的解决方案:
constexpr std::string_view words{"Hello-_-C++-_-20-_-!"};
constexpr std::string_view delimeters{"-_-"};
for (const std::string_view word : std::views::split(words, delimeters)) {
std::cout << std::quoted(word) << ' ';
}
// outputs: Hello C++ 20!
必需的标头:
#include <ranges>
#include <string_view>
参考:https://zh.cppreference.com/w/cpp/ranges/split_view
-_-
。如果您有输入 "Hello,C++;20,"
并使用 std::views::split(words ",;"_sv)
,它将不会拆分任何内容,因为 ,;
在输入中并不存在。 - Chris#include <string>
#include <iostream>
#include "range/v3/all.hpp"
int main()
{
std::string s = "user1:192.168.0.1|user2:192.168.0.2|user3:192.168.0.3";
auto words = s
| ranges::view::split('|')
| ranges::view::transform([](auto w){
return w | ranges::view::split(':');
});
ranges::for_each(words, [](auto i){ std::cout << i << "\n"; });
}