我正在尝试在R
中编写一个不调用任何C例程的md5哈希函数。虽然我的代码执行得很好,但输出永远无法与tools::md5sum
相匹配(而这与各种在线文档提供的示例匹配)。我怀疑有些地方存在字节顺序(或字词顺序)问题;正如下面提供的代码所示,我尝试了一些翻转操作,但没有成功。
我已经检查了输出中的简单不匹配情况,例如正确的字节但是顺序错误,但没有运气。
要运行此函数,您需要使用库Rmpfr
和bigBits
以及此处提供的函数(flip32
、flip16
和bigRotate
)。
# need these to run:
library(Rmpfr)
library(bigBits)
mymd5 <- function(msg){
if(!is(msg,'raw')) msg <- charToRaw(as.character(msg))
# a const to be used when truncating values > ffffffff
two32 <- as.bigz(2^(as.bigz(32)))
sidx <- c( 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22 , 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20 , 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23 , 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21 )
# K-values verified to be correct.
K <- as.bigz(2^32 * abs(sin(1:64)))
# IETF shows these values as-is, i.e. don't change Endian on these calculations
#ietf.org page says A is 01234567, ie lower bytes first.
adef <- '67452301'
bdef <- 'efcdab89'
cdef <- '98badcfe'
ddef <- '10325476'
# then optionally
adef <- flip32(adef)
bdef <- flip32(bdef)
cdef <- flip32(cdef)
ddef <- flip32(ddef)
a0 <- (base2base(adef,16,10))[[1]]
b0 <-(base2base(bdef,16,10))[[1]]
c0 <-(base2base(cdef,16,10))[[1]]
d0 <-(base2base(ddef,16,10))[[1]]
# append 0x80 and pad with 0x00 bytes so that the message length in bytes ≡ 56 (mod 64).
origlenbit <- length(msg) * 8
msg <- c(msg, as.raw('0x80') )
bytemod <- length(msg)%%64
raw00 <- as.raw('0x00')
# append '0x00' 56-bytemod times
if (bytemod < 56 && bytemod > 0 ) {
msg <- c(msg,rep(raw00,times=(56-bytemod)))
} else if (bytemod > 56) {
msg <- c(msg, rep(raw00, times = bytemod-56))
}
# https://datatracker.ietf.org/doc/html/rfc1321#section-3.4 says
# A 64-bit representation of b (the length of the message before the
# padding bits were added) is appended to the result of the previous
# step. In the unlikely event that b is greater than 2^64, then only
# the low-order 64 bits of b are used. (These bits are appended as two
# 32-bit words and appended low-order word first in accordance with the
# previous conventions.)
len16 <- base2base(origlenbit,10,16)[[1]]
lenbit2 <- base2base(origlenbit,10,2)[[1]]
if(nchar(lenbit2) < 64 ){
lenbit2 <- paste0(c(rep('0',times = 64 -nchar(lenbit2)) ,lenbit2), collapse='')
} else if (nchar(lenbit2) > 64) lenbit2 <- rev(rev(lenbit2)[1:64])
newbytes2 <- substring(lenbit2, seq(1,nchar(lenbit2)-7,by=8), last = seq(8, nchar(lenbit2),by = 8) )
newdec <- base2base(newbytes2,2,10)
newdbl <- rep(0,times= 8)
for (jd in 1: 8) newdbl[jd] <- as.numeric(newdec[[jd]])
newraw <- as.raw(newdbl)
# which order is correct?
# msg <- c(msg,newraw[c(7,8,5,6,3,4,1,2)])
msg <- c(msg, newraw[5:8], newraw[1:4])
# msg <- c(msg, newraw[1:4], newraw[5:8])
chunkit <- as.bigz(as.numeric(msg ) ) # all these will be <= 0xff
## turn groups of 512 BITS of msg into 16 words of 32 bits each.
for(jchunk in 1: (length(msg)/64)) {
thischunk <- matrix(chunkit[ ((jchunk-1)*64)+(1:64)],ncol=16)
# should I reverse the bytes of this chunk?
thisM <- as.bigz(rep(0,16) )
for(jm in 1:16) thisM[jm] <- sum(thischunk[,jm] * c(256^3,256^2,256,1) )
# for(jm in 1:16) thisM[jm] <- sum(thischunk[,jm] * c(1,256,256^2,256^3) )
# Initialize hash value for this chunk:
Ah<- a0
Bh<- b0
Ch<- c0
Dh<- d0
#now the chunk loop, which I make into 4 sep'te loops
for (jone in 1:16) {
Fh <- bigOr (bigAnd(Bh,Ch, inTwo=FALSE)[[1]] ,bigAnd(bigNot(Bh, inTwo=FALSE, outTwo=FALSE)[[1]],Dh, inTwo=FALSE)[[1]] )[[1]]
g <- jone
Fh<- (Fh + Ah + K[jone] + thisM[g]) %%two32 # truncate
Ah<- Dh
Dh<- Ch
Ch<- Bh
Bh<- (Bh + bigRotate(Fh, sidx[jone], inTwo=FALSE)[[1]] )%%two32
}
for (j2 in 17:32) {
Fh <- bigOr( bigAnd(Dh,Bh,inTwo=FALSE)[[1]] ,bigAnd(bigNot(Dh[[1]],inTwo=FALSE, outTwo=FALSE), Ch, inTwo=FALSE)[[1]] ,inTwo=FALSE)[[1]]
g <-(1 + 5*(j2-1))%%16 +1 # plus one due to indexing from 1
Fh<- (Fh + Ah + K[j2] + thisM[g])%%two32
Ah<- Dh
Dh<- Ch
Ch<- Bh
Bh<- (Bh + bigRotate(Fh, sidx[j2], inTwo=FALSE)[[1]])%%two32
}
for(j3 in 33:48){
Fh <- bigXor(Bh,bigXor(Ch,Dh, inTwo=FALSE)[[1]], inTwo=FALSE)[[1]]
g <-(5 + 3*(j3-1))%%16 +1
Fh<- (Fh + Ah + K[j3] + thisM[g])%%two32
Ah<- Dh
Dh<- Ch
Ch<- Bh
Bh<- (Bh + bigRotate(Fh, sidx[j3],inTwo=FALSE)[[1]])%%two32
}
for(j4 in 49:64){
Fh <- bigXor(Ch,bigOr(Bh,bigNot(Dh,inTwo=FALSE, outTwo=FALSE)[[1]],inTwo=FALSE)[[1]],inTwo=FALSE)[[1]]
g <- (7 * (j4 -1))%%16 +1
Fh<- (Fh + Ah + K[j4] + thisM[g])%%two32
Ah<- Dh
Dh<- Ch
Ch<- Bh
Bh<- (Bh + bigRotate(Fh, sidx[j4], inTwo=FALSE)[[1]])%%two32
}
# Add this chunk's hash to result so far:
a0<- (a0 + Ah)%%two32
b0<- (b0 + Bh)%%two32
c0<- (c0 + Ch)%%two32
d0<- (d0 + Dh)%%two32
} # end of jchunk
#finally, string the values together.
# watch for those leading zeros
a0x <-(base2base(a0,10,16))
a0x <- paste0(c(rep(0,times=max(8-(nchar(a0x)),0)), a0x), collapse='')
b0x <-(base2base(b0,10,16))
b0x <- paste0(c(rep(0,times=max(8-(nchar(b0x)),0)), b0x), collapse='')
c0x <-(base2base(c0,10,16))
c0x <- paste0(c(rep(0,times=max(8-(nchar(c0x)),0)), c0x), collapse='')
d0x <-(base2base(d0,10,16))
d0x <- paste0(c(rep(0,times=max(8-(nchar(d0x)),0)), d0x), collapse='')
thesum <- paste0(c(a0x,b0x,c0x,d0x) , sep='',collapse='')
return(thesum)
}
# helper func to test reordering bytes. e.g. 01234567 from 67452301
flip32 <- function(x){
xsep <- unlist(strsplit(x,'') )
xout <- paste0(c(xsep[c(7,8,5,6,3,4,1,2)]),collapse='')
return(xout)
}
flip16 <- function(x) {
xsep <- unlist(strsplit(x,''))
xout <- paste0(c(xsep[c(3,4,1,2)]),collapse='')
return(xout)
}
bigRotate <- function(x, shift, inBase = 10,binSize = 32, outBase = 10, inTwosComp = TRUE) {
#default binary size is 32 to match bitwShiftR
shift = floor(shift[1])
out <- rep('0',times = length(x)) # gmp::as.bigz(rep(0,times=length(x)))
for(jj in 1:length(x)) {
bintmp <- base2base(x[[jj]],inBase,2, binSize = binSize, inTwosComp = inTwosComp, outTwosComp = FALSE)[[1]]
#now all inputs will have a neg sign if negative.
isPos = TRUE
if(length(grep('-',bintmp)) ) {
isPos = FALSE
bintmp <- gsub('^-','',bintmp)
}
bintmp <- strsplit(bintmp,'')[[1]]
xlen = length(bintmp)
# now rotate the bits.
otmp <- unlist(paste0(c(bintmp[ ((1:xlen) + shift -1) %%(xlen) + 1]),collapse=''))
out[jj] <- base2base(otmp, 2, outBase, binSize=binSize, outTwosComp = FALSE, classOut = "character")[[1]]
if(!isPos) out[jj] <- paste0('-',out[jj], collapse='')
}
# provide output in source 'inBase'
if(is(x,'mpfr') || is(x,'mpfr1')) {
out <- Rmpfr::.bigz2mpfr(gmp::as.bigz(out))
} else if(is(x,'numeric')) {
out <- as.numeric(out)
} else if(is(x,'bigz')) out <- gmp::as.bigz(out)
return(out)
}
thischunk
中反转字节),但是你对细节的敏锐关注令人印象深刻。我需要仔细研究你简化过的bigRotate
,因为我希望那个函数能够通用,即适用于任意大小的值,而不仅限于32位。 - Carl WitthoftbigRotate
以允许任意大小的二进制时,它的运行速度比我在问题中提供的代码要慢得多。它们都能产生相同且正确的输出。 - Carl Witthoft