在某种程度上遵循Kieveli的建议,我想出了一个可行的解决方案。虽然之前没有提到,但对我来说也很重要,需要知道潜在结果的数量。我使用了一个名为 "exrex" 的Python脚本,我在github上找到了它。尴尬的是,我没有意识到它也有计数功能。尽管如此,我尽力用我的简化正则表达式语言在C++中实现了它。如果您对我的解决方案感兴趣,请继续阅读。
从面向对象的角度来看,我编写了一个扫描器,将正则表达式(字符串)转换为令牌列表(字符串向量)。然后将令牌列表发送到解析器,生成一个n叉树。所有这些都被封装在一个“表达式生成器”类中,该类可以接受表达式并保存解析树以及生成的计数。
![object overview](https://istack.dev59.com/GxmAe.webp)
扫描器很重要,因为它对空字符串情况进行了标记化处理,您可以在我的问题中看到它出现为“|)”。扫描还创建了一个模式,其中包含[word] [operation] [word] [operation] ... [word]。
例如,扫描:
"(hello|goodbye) (world(s|)|)"
将创建:
[][(][hello][|][goodbye][)][ ][(][world][(][s][|][][)][][|][][)][]
解析树是一个节点向量。节点包含节点向量的向量。
![parse structure](https://istack.dev59.com/Kpb36.webp)
橙色单元格表示“或”,而绘制连接的其他框表示“和”。以下是我的代码。
节点标题
#pragma once
#include <string>
#include <vector>
class Function_Expression_Node{
public:
Function_Expression_Node(std::string const& value_in = "", bool const& more_in = false);
std::string value;
bool more;
std::vector<std::vector<Function_Expression_Node>> children;
};
节点源码
Function_Expression_Node::Function_Expression_Node(std::string const& value_in, bool const& more_in)
: value(value_in)
, more(more_in)
{}
扫描仪标题
#pragma once
#include <vector>
#include <string>
class Function_Expression_Scanner{
public: Function_Expression_Scanner() = delete;
public: static std::vector<std::string> Scan(std::string const& expression);
};
扫描仪源代码
#include "function_expression_scanner.hpp"
std::vector<std::string> Function_Expression_Scanner::Scan(std::string const& expression){
std::vector<std::string> tokens;
std::string temp;
for (auto const& it: expression){
if (it == '('){
tokens.push_back(temp);
tokens.push_back("(");
temp.clear();
}
else if (it == '|'){
tokens.push_back(temp);
tokens.push_back("|");
temp.clear();
}
else if (it == ')'){
tokens.push_back(temp);
tokens.push_back(")");
temp.clear();
}
else if (isalpha(it) || it == ' '){
temp+=it;
}
}
tokens.push_back(temp);
return tokens;
}
解析器标题
#pragma once
#include <string>
#include <vector>
#include "function_expression_node.hpp"
class Function_Expression_Parser{
Function_Expression_Parser() = delete;
public: static std::vector<std::vector<Function_Expression_Node>> Parse(std::vector<std::string> const& tokens, unsigned int & amount);
private: static std::vector<std::vector<Function_Expression_Node>> Build_Parse_Tree(std::vector<std::string>::const_iterator & it, std::vector<std::string>::const_iterator const& end, unsigned int & amount);
private: static Function_Expression_Node Recursive_Build(std::vector<std::string>::const_iterator & it, int & total);
private: static bool Is_Word(std::string const& it);
};
解析器源代码
#include "function_expression_parser.hpp"
bool Function_Expression_Parser::Is_Word(std::string const& it){
return (it != "(" && it != "|" && it != ")");
}
Function_Expression_Node Function_Expression_Parser::Recursive_Build(std::vector<std::string>::const_iterator & it, int & total){
Function_Expression_Node sub_root("",true);
std::vector<Function_Expression_Node> root;
const auto begin = it;
std::vector<std::vector<int>> multiplies;
std::vector<int> adds;
int sub_amount = 1;
while(*it != ")"){
if(Is_Word(*it)){
root.push_back(Function_Expression_Node(*it));
}
else if (*it == "("){
++it;
root.push_back(Recursive_Build(it,sub_amount));
}
else{
sub_root.children.push_back(root);
root.clear();
adds.push_back(sub_amount);
sub_amount = 1;
}
++it;
}
if (!root.empty()){
sub_root.children.push_back(root);
adds.push_back(sub_amount);
}
if (!adds.empty()){
multiplies.push_back(adds);
}
int or_count = 0;
for (auto const& it: multiplies){
for (auto const& it2: it){
or_count+=it2;
}
if (or_count > 0){
total*=or_count;
}
or_count = 0;
}
return sub_root;
}
std::vector<std::vector<Function_Expression_Node>> Function_Expression_Parser::Build_Parse_Tree(std::vector<std::string>::const_iterator & it, std::vector<std::string>::const_iterator const& end, unsigned int & amount){
std::vector<std::vector<Function_Expression_Node>> full_root;
std::vector<Function_Expression_Node> root;
const auto begin = it;
std::vector<int> adds;
int sub_amount = 1;
int total = 0;
while (it != end){
if(Is_Word(*it)){
root.push_back(Function_Expression_Node(*it));
}
else if (*it == "("){
++it;
root.push_back(Recursive_Build(it,sub_amount));
}
else{
full_root.push_back(root);
root.clear();
adds.push_back(sub_amount);
sub_amount = 1;
}
++it;
}
if (!root.empty()){
full_root.push_back(root);
adds.push_back(sub_amount);
sub_amount = 1;
}
for (auto const& it: adds){ total+=it; }
amount = total;
return full_root;
}
std::vector<std::vector<Function_Expression_Node>> Function_Expression_Parser::Parse(std::vector<std::string> const& tokens, unsigned int & amount){
auto it = tokens.cbegin();
auto end = tokens.cend();
auto parse_tree = Build_Parse_Tree(it,end,amount);
return parse_tree;
}
生成器标题
#pragma once
#include "function_expression_node.hpp"
class Function_Expression_Generator{
public: Function_Expression_Generator(std::string const& expression);
public: Function_Expression_Generator();
void Set_New_Expression(std::string const& expression);
public: unsigned int Get_Count();
public: std::vector<std::string> Get_Generations();
private: std::vector<std::string> Generate(std::vector<std::vector<Function_Expression_Node>> const& parse_tree);
private: std::vector<std::string> Sub_Generate(std::vector<Function_Expression_Node> const& nodes);
private:
std::vector<std::vector<Function_Expression_Node>> m_parse_tree;
unsigned int amount;
};
生成器来源
#include "function_expression_generator.hpp"
#include "function_expression_scanner.hpp"
#include "function_expression_parser.hpp"
Function_Expression_Generator::Function_Expression_Generator(std::string const& expression){
auto tokens = Function_Expression_Scanner::Scan(expression);
m_parse_tree = Function_Expression_Parser::Parse(tokens,amount);
}
Function_Expression_Generator::Function_Expression_Generator(){}
void Function_Expression_Generator::Set_New_Expression(std::string const& expression){
auto tokens = Function_Expression_Scanner::Scan(expression);
m_parse_tree = Function_Expression_Parser::Parse(tokens,amount);
}
unsigned int Function_Expression_Generator::Get_Count(){
return amount;
}
std::vector<std::string> Function_Expression_Generator::Get_Generations(){
return Generate(m_parse_tree);
}
std::vector<std::string> Function_Expression_Generator::Generate(std::vector<std::vector<Function_Expression_Node>> const& parse_tree){
std::vector<std::string> results;
std::vector<std::string> more;
for (auto it: parse_tree){
more = Sub_Generate(it);
results.insert(results.end(), more.begin(), more.end());
}
return results;
}
std::vector<std::string> Function_Expression_Generator::Sub_Generate(std::vector<Function_Expression_Node> const& nodes){
std::vector<std::string> results;
std::vector<std::string> more;
std::vector<std::string> new_results;
results.push_back("");
for (auto it: nodes){
if (!it.more){
for (auto & result: results){
result+=it.value;
}
}
else{
more = Generate(it.children);
for (auto m: more){
for (auto r: results){
new_results.push_back(r+m);
}
}
more.clear();
results = new_results;
new_results.clear();
}
}
return results;
}
总之,如果您需要为正则表达式生成匹配项,我建议使用
exrex或本主题中提到的任何其他程序。
foo hello bar
。 - p.s.w.g