张量转换请求了dtype float64,但张量的dtype为float32。

4

I have this error:

ValueError: Tensor conversion requested dtype float64 for Tensor with dtype float32: 'Tensor("pearson_r/covariance/Cast:0", shape=(), dtype=float32)'

我的代码如下:

def correlation_coefficient(y_true, y_pred):
    return tf.contrib.metrics.streaming_pearson_correlation(y_pred, y_true)[1]


def construct_embedding():
    # load the whole embedding into memory
    embeddings_index = dict()
    f = open('../input/embedding/word_embedding50d.txt')
    for line in f:
        values = line.split()
        word = values[0]
        coefs = asarray(values[1:], dtype='float32')
        embeddings_index[word] = coefs
    f.close()
    print('Loaded %s word vectors.' % len(embeddings_index))
    return embeddings_index




# define the model
def define_model(length, vocab_size, embedding_matrix):
    # channel 1
    inputs1 = Input(shape=(length,))
    embedding1 = Embedding(vocab_size, 50, weights=[embedding_matrix],trainable=False)(inputs1)
    conv1 = Conv1D(filters=350, kernel_size=3, activation='relu')(embedding1)
    drop1 = Dropout(0.2)(conv1)
    nor11=keras.layers.BatchNormalization()(drop1)

    pool1 = MaxPooling1D(pool_size=5)(nor11)
    pdrop1 = Dropout(0.2)(pool1)
    nor12=keras.layers.BatchNormalization()(pdrop1)

    ls1=LSTM(200)(nor12)
    ldrop1 = Dropout(0.2)(ls1)
    lnor1=keras.layers.BatchNormalization()(ldrop1)

    # channel 2
    inputs2 = Input(shape=(length,))
    embedding2 = Embedding(vocab_size, 50, weights=[embedding_matrix],trainable=False)(inputs2)
    conv2 = Conv1D(filters=350, kernel_size=4, activation='relu')(embedding2)
    drop2 = Dropout(0.2)(conv2)
    nor21=keras.layers.BatchNormalization()(drop2)

    pool2 = MaxPooling1D(pool_size=5)(nor21)
    pdrop2 = Dropout(0.2)(pool2) 
    nor22=keras.layers.BatchNormalization()(pdrop2)

    ls2=LSTM(200)(nor22)
    ldrop2 = Dropout(0.2)(ls2)
    lnor2=keras.layers.BatchNormalization()(ldrop2)


    # channel 3
    inputs3 = Input(shape=(length,))
    embedding3 = Embedding(vocab_size, 50, weights=[embedding_matrix],trainable=False)(inputs3)
    conv3 = Conv1D(filters=350, kernel_size=5, activation='relu')(embedding3)
    drop3 = Dropout(0.2)(conv3)
    nor31=keras.layers.BatchNormalization()(drop3)

    pool3 = MaxPooling1D(pool_size=5)(nor31)
    pdrop3 = Dropout(0.2)(pool3) 
    nor32=keras.layers.BatchNormalization()(pdrop3)


    ls3=LSTM(250)(nor32)
    ldrop3 = Dropout(0.2)(ls3)
    lnor3=keras.layers.BatchNormalization()(ldrop3)


    # merge
    merged=concatenate([lnor1, lnor2, lnor3])
    # interpretation
    dense1 = Dense(100, activation='elu')(merged)
    nor4=keras.layers.BatchNormalization()(dense1)


    outputs = Dense(1, activation='elu')(nor4)
    noroutputs=keras.layers.BatchNormalization()(outputs)
    model = Model(inputs=[inputs1, inputs2, inputs3], outputs=noroutputs)
    model.load_weights("../input/bestweight/bestweights.hdf5")
    # compile
    model.compile(loss='mse', optimizer=optimizers.Adam(lr=0.003), metrics=[correlation_coefficient, 'accuracy'])
    K.get_session().run(tf.local_variables_initializer())

    # summarize 
    print(model.summary())
    plot_model(model, show_shapes=True, to_file='multichannel.png')

    return model

# load a clean dataset
def load_dataset(filename):
    return load(open(filename, 'rb'))

#preprocessing text
def preprocess(lines):
    #print(lines)      
    ps = PorterStemmer() 
    for i in range(len(lines)):
        tokens = lines[i].split() 
        # filter out stop words then stem the remaining words
        stop_words = set(stopwords.words('english'))    
        tokens = [ps.stem(w) for w in tokens if not w in stop_words]    
        lines[i]=' '.join(tokens)  
    #print('lines: ')
    #print(lines)
    return lines



# encode a list of lines
def encode_text(tokenizer, lines, length):  
    # integer encode
    encoded = tokenizer.texts_to_sequences(lines)
    # pad encoded sequences
    padded = pad_sequences(encoded, maxlen=length, padding='post')
    return padded


# fit a tokenizer
def create_tokenizer(lines):
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(lines)
    return tokenizer

# calculate the maximum document length
def max_length(lines):
    return max([len(s.split()) for s in lines])

# encode a list of lines
def encode_text(tokenizer, lines, length):
    # integer encode
    encoded = tokenizer.texts_to_sequences(lines)
    # pad encoded sequences
    padded = pad_sequences(encoded, maxlen=length, padding='post')
    return padded

def embed (vocab_size, embeddings_index, t):
    embedding_matrix = zeros((vocab_size, 50))
    for word, i in t.word_index.items():
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            embedding_matrix[i] = embedding_vector
    return embedding_matrix


# define training data
f = pd.read_csv('../input/satexasdataset/texasDatasetForHAN.csv', encoding='ISO-8859-1')
ftest = pd.read_csv('../input/satexasdataset/testtexasDatasetForHAN.csv', encoding='ISO-8859-1')

train=[]
test=[]

#print(f.read())
data_train = pd.DataFrame(data=f) 

for i in range(data_train.shape[0]):
    train.append(data_train.manswer[i]+ ' ' +data_train.sanswer[i])

trainLabels=data_train.score
Lines=pd.DataFrame(train, columns=['train'])
trainLines=Lines.train
trainLines=preprocess(trainLines)

data_test = pd.DataFrame(data=ftest) 
for i in range(data_test .shape[0]):
    test.append(data_test.manswer[i] + ' '+data_test.sanswer[i])

testLabels=data_test.score
tLines=pd.DataFrame(test, columns=['test'])
testLines=tLines.test
testLines=preprocess(testLines)

mergedLines = [trainLines , testLines]
allmerged = pd.concat(mergedLines)

# create tokenizer
tokenizer = create_tokenizer(allmerged.str.lower())


# calculate max document length
length = max_length(allmerged)

# calculate vocabulary size
vocab_size = len(tokenizer.word_index) + 1


print('Max answerlength: %d' % length)
print('Vocabulary size: %d' % vocab_size)

# encode data
alldataX = encode_text(tokenizer, allmerged, length)


s=(trainLines.size)
trainX=alldataX[0:s]
testX=alldataX[s:]



print(trainX.shape,  testX.shape)

embeddings_index=construct_embedding()
embedding_matrix=embed (vocab_size, embeddings_index, tokenizer)


    # define model
model = define_model(length, vocab_size, embedding_matrix)

ynew = model.predict([testX,testX,testX])
corr=correlation_coefficient(ynew, array(testLabels))
print('Test Correlation: %f' % (cor))

完整的错误消息如下:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-4-3ce58b1f4f26> in <module>
    274 #testLabels = tf.convert_to_tensor(array(testLabels), dtype=tf.float64)
    275 
--> 276 corr=correlation_coefficient(ynew, array(testLabels))
    277 print('Test Correlation: %f' % (cor*100))
    278 

<ipython-input-4-3ce58b1f4f26> in correlation_coefficient(y_true, y_pred)
     45 
     46 def correlation_coefficient(y_true, y_pred):
---> 47     return tf.contrib.metrics.streaming_pearson_correlation(y_pred, y_true)[1]
     48 
     49 

/opt/conda/lib/python3.6/site-packages/tensorflow/contrib/metrics/python/ops/metric_ops.py in streaming_pearson_correlation(predictions, labels, weights, metrics_collections, updates_collections, name)
   3334       weights = weights_broadcast_ops.broadcast_weights(weights, labels)
   3335     cov, update_cov = streaming_covariance(
-> 3336         predictions, labels, weights=weights, name='covariance')
   3337     var_predictions, update_var_predictions = streaming_covariance(
   3338         predictions, predictions, weights=weights, name='variance_predictions')

/opt/conda/lib/python3.6/site-packages/tensorflow/contrib/metrics/python/ops/metric_ops.py in streaming_covariance(predictions, labels, weights, metrics_collections, updates_collections, name)
   3218     # batch_mean_prediction is E[x_B] in the update equation
   3219     batch_mean_prediction = math_ops.div_no_nan(
-> 3220         math_ops.reduce_sum(weighted_predictions), batch_count)
   3221     delta_mean_prediction = math_ops.div_no_nan(
   3222         (batch_mean_prediction - mean_prediction) * batch_count, update_count)

/opt/conda/lib/python3.6/site-packages/tensorflow/python/util/dispatch.py in wrapper(*args, **kwargs)
    178     """Call target, and fall back on dispatchers if there is a TypeError."""
    179     try:
--> 180       return target(*args, **kwargs)
    181     except (TypeError, ValueError):
    182       # Note: convert_to_eager_tensor currently raises a ValueError, not a

/opt/conda/lib/python3.6/site-packages/tensorflow/python/ops/math_ops.py in div_no_nan(x, y, name)
   1095   with ops.name_scope(name, "div_no_nan", [x, y]) as name:
   1096     x = ops.convert_to_tensor(x, name="x")
-> 1097     y = ops.convert_to_tensor(y, name="y", dtype=x.dtype.base_dtype)
   1098     x_dtype = x.dtype.base_dtype
   1099     y_dtype = y.dtype.base_dtype

/opt/conda/lib/python3.6/site-packages/tensorflow/python/framework/ops.py in convert_to_tensor(value, dtype, name, preferred_dtype, dtype_hint)
   1085   preferred_dtype = deprecation.deprecated_argument_lookup(
   1086       "dtype_hint", dtype_hint, "preferred_dtype", preferred_dtype)
-> 1087   return convert_to_tensor_v2(value, dtype, preferred_dtype, name)
   1088 
   1089 

/opt/conda/lib/python3.6/site-packages/tensorflow/python/framework/ops.py in convert_to_tensor_v2(value, dtype, dtype_hint, name)
   1143       name=name,
   1144       preferred_dtype=dtype_hint,
-> 1145       as_ref=False)
   1146 
   1147 

/opt/conda/lib/python3.6/site-packages/tensorflow/python/framework/ops.py in internal_convert_to_tensor(value, dtype, name, as_ref, preferred_dtype, ctx, accept_symbolic_tensors, accept_composite_tensors)
   1222 
   1223     if ret is None:
-> 1224       ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
   1225 
   1226     if ret is NotImplemented:

/opt/conda/lib/python3.6/site-packages/tensorflow/python/framework/ops.py in _TensorTensorConversionFunction(t, dtype, name, as_ref)
   1016     raise ValueError(
   1017         "Tensor conversion requested dtype %s for Tensor with dtype %s: %r" %
-> 1018         (dtype.name, t.dtype.name, str(t)))
   1019   return t
   1020 

ValueError: Tensor conversion requested dtype float64 for Tensor with dtype float32: 'Tensor("pearson_r/covariance/Cast:0", shape=(), dtype=float32)'

1个回答

3
"最初的回答":替换这一行。
testLabels = data_test.score

使用:

testLabels = data_test.score.astype(np.float32)

这样correlation_coefficient的两个参数都将为float32类型。

(注:原文中的Original Answer未翻译,因其与该段翻译内容无关。)


1
当我运行代码时,结果是:测试相关性: Tensor("pearson_r_1/update_op:0", shape=(), dtype=float32)。不幸的是,它没有返回相关值。 - zeinab ezz
1
@zeinabezz 是的,它是一个TensorFlow函数,所以它返回一个张量,你需要在会话中进行评估,就像你执行K.get_session().run(...)时一样。 - jdehesa
1
@jdehesa 当我添加了这个 代码ynew = model.predict([testX,testX,testX]) K.get_session().run(tf.local_variables_initializer()) corr=correlation_coefficient(ynew, testLabels) K.get_session().run(tf.local_variables_initializer()) print('Test Correlation: ', corr) 结果是:Test Correlation: Tensor("pearson_r_7/update_op:0", shape=(), dtype=float32)。不幸的是,相关性值没有被打印出来。你能告诉我如何打印出这个值吗? - zeinab ezz
1
@zeinabezz 我的意思是你应该执行 print('测试相关性: ', K.get_session().run(corr)) - jdehesa
1
非常感谢。它运行了。你能给我推荐一个好的学习TensorFlow的教程吗? - zeinab ezz
@zeinabezz 我认为官方教程和指南是最好的起点。 - jdehesa

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接