我用Haskell编写了以下代码来计算sha256。我觉得这段代码很优雅,但是在GHC下,它在shaStep中花费了大量的时间,并且如果我正确地阅读分析数据,它在进行大量的内存分配。考虑到应该可以无需任何内存分配就能计算sha256,我正在寻找如何找出哪个部分在进行分配并进行优化的方法。
我的代码:
{-# OPTIONS_GHC -funbox-strict-fields #-}
module SHA256 (sha256, sha256Ascii, Hash8) where
import Data.Word
import Data.Bits
import Data.List
import Control.Monad (ap)
ch x y z = (x .&. y) `xor` (complement x .&. z)
maj x y z = (x .&. y) `xor` (x .&. z) `xor` (y .&. z)
bigSigma0 x = rotateR x 2 `xor` rotateR x 13 `xor` rotateR x 22
bigSigma1 x = rotateR x 6 `xor` rotateR x 11 `xor` rotateR x 25
smallSigma0 x = rotateR x 7 `xor` rotateR x 18 `xor` shiftR x 3
smallSigma1 x = rotateR x 17 `xor` rotateR x 19 `xor` shiftR x 10
ks = [0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
,0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
,0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
,0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
,0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
,0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
,0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
,0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2]
blockSize = 16
padding :: Int -> [Word8] -> [[Word32]]
padding blockSize x = unfoldr block $ paddingHelper x 0 (0::Int) (0::Integer)
where
block [] = Nothing
block x = Just $ splitAt blockSize x
paddingHelper x o on n | on == (bitSize o) = o:paddingHelper x 0 0 n
paddingHelper (x:xs) o on n | on < (bitSize o) =
paddingHelper xs ((shiftL o bs) .|. (fromIntegral x)) (on+bs) $! (n+fromIntegral bs)
where
bs = bitSize x
paddingHelper [] o on n = (shiftL (shiftL o 1 .|. 1) (bso-on-1)):
(zeros ((-(fromIntegral n-on+3*bso)) `mod` (blockSize*bso)))
[fromIntegral (shiftR n bso), fromIntegral n]
where
bso = bitSize o
zeros 0 = id
zeros n | 0 < n = let z=0 in (z:) . (zeros (n-bitSize z))
data Hash8 = Hash8 {-# UNPACK #-} !Word32
{-# UNPACK #-} !Word32
{-# UNPACK #-} !Word32
{-# UNPACK #-} !Word32
{-# UNPACK #-} !Word32
{-# UNPACK #-} !Word32
{-# UNPACK #-} !Word32
{-# UNPACK #-} !Word32 deriving (Eq, Ord, Show)
shaStep :: Hash8 -> [Word32] -> Hash8
shaStep h m = foldl' (flip id) h (zipWith mkStep3 ks ws) `plus` h
where
ws = m++zipWith4 smallSigma (drop (blockSize-2) ws) (drop (blockSize-7) ws)
(drop (blockSize-15) ws) (drop (blockSize-16) ws)
where
smallSigma a b c d = smallSigma1 a + b + smallSigma0 c + d
mkStep3 k w (Hash8 a b c d e f g h) = Hash8 (t1+t2) a b c (d+t1) e f g
where
t1 = h + bigSigma1 e + ch e f g + k + w
t2 = bigSigma0 a + maj a b c
(Hash8 x0 x1 x2 x3 x4 x5 x6 x7) `plus` (Hash8 y0 y1 y2 y3 y4 y5 y6 y7) =
Hash8 (x0+y0) (x1+y1) (x2+y2) (x3+y3) (x4+y4) (x5+y5) (x6+y6) (x7+y7)
sha :: Hash8 -> [Word8] -> Hash8
sha h0 x = foldl' shaStep h0 $ padding blockSize x
sha256 :: [Word8] -> Hash8
sha256 = sha $
Hash8 0x6a09e667 0xbb67ae85 0x3c6ef372 0xa54ff53a 0x510e527f 0x9b05688c 0x1f83d9ab 0x5be0cd19
sha256Ascii :: String -> Hash8
sha256Ascii = sha256 . map (fromIntegral . fromEnum)
编辑:我刚刚注意到为ch
、maj
以及大型和小型西格玛添加专门的类型签名对我的分析结果有很大影响(而不影响未经分析的程序)。因此,看来我的程序在shaStep
中花费的时间并没有像我最初认为的那样多。
[Word8]
很难获得良好的性能,你必须转向一些紧凑的数据结构,比如 bytestring 或 unboxed vector 或 array 表示。 - Thomas M. DuBuisson