keras参考资料:https://keras.io/zh/getting-started/faq/#sample-batch-epoch
https://keras-cn.readthedocs.io/en/latest/
参考资料:https://www.jianshu.com/p/5d47f152ff62
https://blog.youkuaiyun.com/A632189007/article/details/77978058
###写法一:充分利用申请的资源
import tensorflow as tf
import keras.backend.tensorflow_backend as KTF
config = tf.ConfigProto()
config.gpu_options.allow_growth=True
session = tf.Session(config=config)
KTF.set_session(session)
#######写法二:限制GPU使用率
import os
import tensorflow as tf
import keras.backend.tensorflow_backend as KTF
config = tf.ConfigProto()
# 获取环境变量
env_dist = os.environ
#单卡GPU最大显存
card_gpu_mem_total = int(env_dist['ALIYUN_COM_GPU_MEM_CONTAINER'])
#启动Notebook分配的GPU显存
request_card_gpu_mem = int(env_dist['ALIYUN_COM_GPU_MEM_DEV'])
# percent=0.7 is because tensorflow control gpu memory is not accurate, it is recommended to multiply by 0.7 to ensure that the upper limit is not exceeded.
percent=0.7
fraction = round(card_gpu_mem_total*percent/request_card_gpu_mem, 1)
config.gpu_options.per_process_gpu_memory_fraction = fraction
session = tf.Session(config=config)
KTF.set_session(session)
##tensorflow调用GPU命名行
import os
import tensorflow as tf
# 获取环境变量
env_dist = os.environ
#单卡GPU最大显存
card_gpu_mem_total=int(env_dist['ALIYUN_COM_GPU_MEM_CONTAINER'])
#启动Notebook分配的GPU显存
request_card_gpu_mem=int(env_dist['ALIYUN_COM_GPU_MEM_DEV'])
# percent=0.7 is because tensorflow control gpu memory is not accurate, it is recommended to multiply by 0.7 to ensure that the upper limit is not exceeded.
percent=0.7
fraction=round(card_gpu_mem_total*percent/request_card_gpu_mem , 1)
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = fraction
sess = tf.Session(config=config)
# write your code
a = tf.constant(3)
b = tf.constant(4)
print('a + b = {0}'.format(sess.run(a+b)))
##pytorch调用GPU命令行
import os, torch
torch.cuda.is_available()
os.environ["CUDA_VISIBLE_DEVICES"] = os.getenv("ALIYUN_COM_GPU_MEM_IDX")
##pytorch按卡使用,不能按显存按需使用
###################
###keras调用GPU命令行
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import tensorflow as tf
import keras.backend.tensorflow_backend as KTF
config = tf.ConfigProto()
config.gpu_options.allow_growth=True
session = tf.Session(config=config)
KTF.set_session(sess)
####查看GPU使用状态:nvidia-smi实时刷新并高亮显示状态
##其他参考资料:https://blog.youkuaiyun.com/huangfei711/article/details/79230446#
watch -n 1 -d nvidia-smi
####参考资料:https://blog.youkuaiyun.com/HaixWang/article/details/83349023
##https://blog.youkuaiyun.com/u013066730/article/details/77510015
##https://blog.youkuaiyun.com/silent56_th/article/details/60154637
##其他补充说明:
import os
import tensorflow as tf
import keras.backend.tensorflow_backend as KTF
#进行配置,每个GPU使用60%上限现存
os.environ["CUDA_VISIBLE_DEVICES"]="1,2" # 使用编号为1,2号的GPU
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.6 #每个GPU现存上限控制在60%以内
session = tf.Session(config=config)
# 设置session
KTF.set_session(session)
##########
##分布式
##keras的分布式是利用TensorFlow实现的,要想完成分布式的训练,你需要将Keras注册在连接一个集群的TensorFlow会话上:
server = tf.train.Server.create_local_server()
sess = tf.Session(server.target)
from keras import backend as K
K.set_session(sess)
##########