报错代码:
import tensorflow as tf
import numpy as np
import os
os.environ["CUDA_DEVICES_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
with tf.device("/gpu:0"): ## 与下面注释的命令等价
# with tf.device("/job:localhost/replica:0/task:0/device:GPU:0"):
a = tf.Variable(0,dtype=tf.float32)
with tf.device("/gpu:1"):
b = tf.constant(0, shape=[1], dtype=tf.float32)
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.log_device_placement = True
with tf.Session(config=config) as sess:
sess.run(tf.global_variables_initializer())
c = sess.run(a)
b_ = sess.run(b)
print(b_,c)
报错:
tensorflow.python.framework.errors_impl.InvalidArgumentError: Cannot assign a device for operation Variable: node Variable (defined at /wzg_code/×××/multi_gpu_test.py:19) was explicitly assigned to /device:GPU:1 but available devices are [ /job:localhost/replica:0/task:0/device:CPU:0, /job:localhost/replica:0/task:0/device:XLA_CPU:0, /job:localhost/replica:0/task:0/device:XLA_GPU:0, /job:localhost/replica:0/task:0/device:XLA_GPU:1 ]. Make sure the device specification refers to a valid device.
[[Variable]]
当时考虑了将 tf.device("gpu:0") 的位置改成: tf.device(“/job:localhost/replica:0/task:0/device:XLA_GPU:0”),仍然报错,最后发现需要在sess的配置上进行修改,增加 config.allow_soft_placement=True,虽然不知道为什么,但是work了。更正后的代码如下:
import tensorflow as tf
import numpy as np
import os
os.environ["CUDA_DEVICES_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
with tf.device("/gpu:0"):
# with tf.device("/job:localhost/replica:0/task:0/device:GPU:0"):
a = tf.Variable(0,dtype=tf.float32)
with tf.device("/gpu:1"):
b = tf.constant(0, shape=[1], dtype=tf.float32)
#config = tf.ConfigProto()
config = tf.ConfigProto(allow_soft_placement=True)
config.gpu_options.allow_growth = True
config.log_device_placement = True
with tf.Session(config=config) as sess:
sess.run(tf.global_variables_initializer())
c = sess.run(a)
b_ = sess.run(b)
print(b_,c)
681

被折叠的 条评论
为什么被折叠?



