这就像序列比对一样,你可以使用
Needleman-Wunsch算法来解决它。链接中包含了Python代码。只需确保设置得分,使不匹配为负,匹配为正,空白对齐时为0,以最大化得分。该算法的时间和空间复杂度均为O(n * m),但其空间复杂度可以改进。
得分函数
int score(char x, char y){
if ((x == ' ') || (y == ' ')){
return 0;
}
else if (x != y){
return -1;
}
else if (x == y){
return 1;
}
else{
puts("Error!");
exit(2);
}
}
代码
#include <stdio.h>
#include <stdbool.h>
int max(int a, int b, int c){
bool ab, ac, bc;
ab = (a > b);
ac = (a > c);
bc = (b > c);
if (ab && ac){
return a;
}
if (!ab && bc){
return b;
}
if (!ac && !bc){
return c;
}
}
int score(char x, char y){
if ((x == ' ') || (y == ' ')){
return 0;
}
else if (x != y){
return -1;
}
else if (x == y){
return 1;
}
else{
puts("Error!");
exit(2);
}
}
void print_table(int **table, char str1[], char str2[]){
unsigned int i, j, len1, len2;
len1 = strlen(str1) + 1;
len2 = strlen(str2) + 1;
for (j = 0; j < len2; j++){
if (j != 0){
printf("%3c", str2[j - 1]);
}
else{
printf("%3c%3c", ' ', ' ');
}
}
putchar('\n');
for (i = 0; i < len1; i++){
if (i != 0){
printf("%3c", str1[i - 1]);
}
else{
printf("%3c", ' ');
}
for (j = 0; j < len2; j++){
printf("%3d", table[i][j]);
}
putchar('\n');
}
}
int **optimal_global_alignment_table(char str1[], char str2[]){
unsigned int len1, len2, i, j;
int **table;
len1 = strlen(str1) + 1;
len2 = strlen(str2) + 1;
table = malloc(sizeof(int*) * len1);
for (i = 0; i < len1; i++){
table[i] = calloc(len2, sizeof(int));
}
for (i = 0; i < len1; i++){
table[i][0] += i * score(str1[i], ' ');
}
for (j = 0; j < len1; j++){
table[0][j] += j * score(str1[j], ' ');
}
for (i = 1; i < len1; i++){
for (j = 1; j < len2; j++){
table[i][j] = max(
table[i - 1][j - 1] + score(str1[i - 1], str2[j - 1]),
table[i - 1][j] + score(str1[i - 1], ' '),
table[i][j - 1] + score(' ', str2[j - 1])
);
}
}
return table;
}
void prefix_char(char ch, char str[]){
int i;
for (i = strlen(str); i >= 0; i--){
str[i+1] = str[i];
}
str[0] = ch;
}
void optimal_global_alignment(int **table, char str1[], char str2[]){
unsigned int i, j;
char *align1, *align2;
i = strlen(str1);
j = strlen(str2);
align1 = malloc(sizeof(char) * (i * j));
align2 = malloc(sizeof(char) * (i * j));
align1[0] = align2[0] = '\0';
while((i > 0) && (j > 0)){
if (table[i][j] == (table[i - 1][j - 1] + score(str1[i - 1], str2[j - 1]))){
prefix_char(str1[i - 1], align1);
prefix_char(str2[j - 1], align2);
i--;
j--;
}
else if (table[i][j] == (table[i - 1][j] + score(str1[i-1], ' '))){
prefix_char(str1[i - 1], align1);
prefix_char('_', align2);
i--;
}
else if (table[i][j] == (table[i][j - 1] + score(' ', str2[j - 1]))){
prefix_char('_', align1);
prefix_char(str2[j - 1], align2);
j--;
}
}
while (i > 0){
prefix_char(str1[i - 1], align1);
prefix_char('_', align2);
i--;
}
while(j > 0){
prefix_char('_', align1);
prefix_char(str2[j - 1], align2);
j--;
}
puts(align1);
puts(align2);
}
int main(int argc, char * argv[]){
int **table;
if (argc == 3){
table = optimal_global_alignment_table(argv[1], argv[2]);
print_table(table, argv[1], argv[2]);
optimal_global_alignment(table, argv[1], argv[2]);
}
else{
puts("Reqires to string arguments!");
}
return 0;
}
示例输入输出
$ cc dynamic_programming.c && ./a.out aab bba
__aab
bb_a_
$ cc dynamic_programming.c && ./a.out d abc
___d
abc_
$ cc dynamic_programming.c && ./a.out abcde bqcdge
ab_cd_e
_bqcdge