我在这里提供一个在 R 中“可以”工作的解决方案。远非完美。
countSyllablesInWord = function(words)
{
#word = "super";
n.words = length(words);
result = list();
for(j in 1:n.words)
{
word = words[j];
vowels = c("a","e","i","o","u","y");
word.vec = strsplit(word,"")[[1]];
word.vec;
n.char = length(word.vec);
is.vowel = is.element(tolower(word.vec), vowels);
n.vowels = sum(is.vowel);
# nontrivial problem
if(n.vowels <= 1)
{
syllables = 1;
str = word;
} else {
# syllables = 0;
previous = "C";
# on average ?
str = "";
n.hyphen = 0;
for(i in 1:n.char)
{
my.char = word.vec[i];
my.vowel = is.vowel[i];
if(my.vowel)
{
if(previous == "C")
{
if(i == 1)
{
str = paste0(my.char, "-");
n.hyphen = 1 + n.hyphen;
} else {
if(i < n.char)
{
if(n.vowels > (n.hyphen + 1))
{
str = paste0(str, my.char, "-");
n.hyphen = 1 + n.hyphen;
} else {
str = paste0(str, my.char);
}
} else {
str = paste0(str, my.char);
}
}
# syllables = 1 + syllables;
previous = "V";
} else { # "VV"
# assume what ? vowel team?
str = paste0(str, my.char);
}
} else {
str = paste0(str, my.char);
previous = "C";
}
#
}
syllables = 1 + n.hyphen;
}
result[[j]] = list("syllables" = syllables, "vowels" = n.vowels, "word" = str);
}
if(n.words == 1) { result[[1]]; } else { result; }
}
这是一些结果:
my.count = countSyllablesInWord(c("America", "beautiful", "spacious", "skies", "amber", "waves", "grain", "purple", "mountains", "majesty"));
my.count.df = data.frame(matrix(unlist(my.count), ncol=3, byrow=TRUE));
colnames(my.count.df) = names(my.count[[1]]);
my.count.df;
我没有意识到这是一个多么深奥的“兔子洞”,看起来很简单。
为了更好地衡量,这里有一个简单的金凯德可读性函数... 音节是从第一个函数返回的计数列表...
由于我的函数有点偏向于更多的音节,这将给出一个夸大的可读性分数... 目前来说这没问题... 如果目标是使文本更易读,这不是最糟糕的事情。
computeReadability = function(n.sentences, n.words, syllables=NULL)
{
n = length(syllables);
n.syllables = 0;
for(i in 1:n)
{
my.syllable = syllables[[i]];
n.syllables = my.syllable$syllables + n.syllables;
}
FRE = 206.835 - 1.015 * (n.words/n.sentences) - 84.6 * (n.syllables/n.words);
FKGL = 0.39 * (n.words/n.sentences) + 11.8 * (n.syllables/n.words) - 15.59;
list("FRE" = FRE, "FKGL" = FKGL);
}