基于 Eric 优秀的答案, 我提出了一种更通用的解决方案,能够处理捕获组和反向引用:
import re
from itertools import islice
def multiple_replace(s, repl_dict):
groups_no = [re.compile(pattern).groups for pattern in repl_dict]
def repl_func(m):
all_groups = m.groups()
i, j = 0, 0
while i < len(all_groups) and all_groups[i] is None:
i += (groups_no[j] + 1)
j += 1
pattern, repl = next(islice(repl_dict.items(), j, j + 1))
return re.sub(pattern, repl, all_groups[i])
full_pattern = '|'.join(f'({pattern})' for pattern in repl_dict)
return re.sub(full_pattern, repl_func, s)
示例。 使用以下内容调用
s = 'This is a sample string. Which is getting replaced. 1234-5678.'
REPL_DICT = {
r'(.*?)is(.*?)ing(.*?)ch': r'\3-\2-\1',
r'replaced': 'REPLACED',
r'\d\d((\d)(\d)-(\d)(\d))\d\d': r'__\5\4__\3\2__',
r'get|ing': '!@#'
}
给出:
>>> multiple_replace(s, REPL_DICT)
'. Whi- is a sample str-Th is !@#t!@# REPLACED. __65__43__.'
为了更高效的解决方案,可以创建一个简单的包装器来预计算
groups_no
和
full_pattern
,例如:
import re
from itertools import islice
class ReplWrapper:
def __init__(self, repl_dict):
self.repl_dict = repl_dict
self.groups_no = [re.compile(pattern).groups for pattern in repl_dict]
self.full_pattern = '|'.join(f'({pattern})' for pattern in repl_dict)
def get_pattern_repl(self, pos):
return next(islice(self.repl_dict.items(), pos, pos + 1))
def multiple_replace(self, s):
def repl_func(m):
all_groups = m.groups()
i, j = 0, 0
while i < len(all_groups) and all_groups[i] is None:
i += (self.groups_no[j] + 1)
j += 1
return re.sub(*self.get_pattern_repl(j), all_groups[i])
return re.sub(self.full_pattern, repl_func, s)
使用方法如下:
>>> ReplWrapper(REPL_DICT).multiple_replace(s)
'. Whi- is a sample str-Th is !@#t!@# REPLACED. __65__43__.'
re.compile
中指定re.I
标志。 - nhahtdhmo.string [mo.start():mo.end()]
替换为mo.group()
来简化此过程,因为它们是等效的,正如文档此处所述。 - Johnny Mayhew