方案一
获取已用显存,总显存,gpu 名称,使用率等等
命令获取gpu信息
public class ShellService {
@Value("${micronaut.server.port}")
private Integer myServerPort;
@Inject
private TrainModelRepository trainModelRepository;
private static HashMap<String, Process> processHashMap = new HashMap<>();
public String getGPU() throws IOException {
Process process = null;
try {
if (Platform.isWindows()) {
process = Runtime.getRuntime().exec("nvidia-smi.exe");
} else if (Platform.isLinux()) {
String[] shell = {"/bin/bash", "-c", "nvidia-smi"};
process = Runtime.getRuntime().exec(shell);
}
process.getOutputStream().close();
} catch (IOException e) {
e.printStackTrace();
throw new IndaiException("显卡不存在或获取显卡信息失败");
}
BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream()));
StringBuffer stringBuffer = new StringBuffer();
String line = "";
while (null != (line = reader.readLine())) {
stringBuffer.append(line + "\n");
}
return stringBuffer.toString();
}
}
封装获取的信息
/**
* @author fuchanghai
*/
@Controller("/api/GPU")
public class GPUController {
@Inject
ShellService shellService;
@Get(value = "/get/gpu")
public ApiResponse getGPU() throws IOException {
List<GPUInfo> gpuInfoList = getGpuInfos();
return ApiResponse.responseSuccess(gpuInfoList);
}
public List<GPUInfo> getGpuInfos() throws IOException {
String gpus = null;
gpus = shellService.getGPU();
//命令行调用后获取的信息
/*String gpus = "Mon Jun 1 10:47:16 2020 \n" +
"+-----------------------------------------------------------------------------+\n" +
"| NVIDIA-SMI 418.87.01 Driver Version: 418.87.01 CUDA Version: 10.1 |\n" +
"|-------------------------------+----------------------+----------------------+\n" +
"| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n" +
"| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n" +
"|===============================+======================+======================|\n" +
"| 0 TITAN V Off | 00000000:2D:00.0 On | N/A |\n" +
"| 29% 43C P8 27W / 250W | 1123MiB / 12035MiB | 0% Default |\n" +
"+-------------------------------+----------------------+----------------------+\n" +
"| 1 GeForce RTX 208... Off | 00000000:99:00.0 Off | N/A |\n" +
"| 0% 29C P8 20W / 260W | 11MiB / 10989MiB | 0% Default |\n" +
"+-------------------------------+----------------------+----------------------+\n" +
" \n" +
"+-----------------------------------------------------------------------------+\n" +
"| Processes: GPU Memory |\n" +
"| GPU PID Type Process name Usage |\n" +
"|=============================================================================|\n" +
"| 0 16841 C inference_worker 1077MiB |\n" +
"| 0 19996 G /usr/lib/xorg/Xorg 33MiB |\n" +
"+-----------------------------------------------------------------------------+\n";*/
//System.out.println("命令行获取的结果: " + gpus);
//分割废物信息
String[] split = gpus.split("\\|===============================\\+======================\\+======================\\|");
String[] gpusInfo = split[1].split(" ");
// 分割多个gpu
String[] gpuInfo = gpusInfo[0].split("\\+-------------------------------\\+----------------------\\+----------------------\\+");
//System.out.println("000000000000000000000000000000000");
List<GPUInfo> gpuInfoList = new ArrayList<>();
for (int i = 0; i < gpuInfo.length - 1; i++) {
GPUInfo gpuInfo1 = new GPUInfo();
String[] nameAndInfo = gpuInfo[i].split("\n");
//只要第二块的数据
/*0
*TITAN
*V
*Off
* */
String[] split1 = nameAndInfo[1].split("\\|")[1] // 0 TITAN V Off
.split("\\s+");//去空格
gpuInfo1.setNumber(Integer.parseInt(split1[1]));
StringBuffer name = new StringBuffer();
for (int j = 0; j < split1.length - 1; j++) {
if (j > 1 && j != split1.length) {
name.append(split1[j] + " ");
}
}
gpuInfo1.setName(name.toString());
String[] info = nameAndInfo[2].split("\\|")[2].split("\\s+");
/* System.out.println("biubiu~~~biubiu~~~biubiu~~~biubiu~~~biubiu~~~biubiu~~~biubiu~~~biubiu~~~biubiu~~~biubiu~~~");*/
gpuInfo1.setUsedMemory(info[1]);
gpuInfo1.setTotalMemory(info[3]);
int useable = Integer.parseInt(gpuInfo1.getTotalMemory().split("MiB")[0]) - Integer.parseInt(gpuInfo1.getUsedMemory().split("MiB")[0]);
gpuInfo1.setUseableMemory(useable + "MiB");
Double usageRate = Integer.parseInt(gpuInfo1.getUsedMemory().split("MiB")[0]) * 100.00 / Integer.parseInt(gpuInfo1.getTotalMemory().split("MiB")[0]);
gpuInfo1.setUsageRate(usageRate);
gpuInfoList.add(gpuInfo1);
}
return gpuInfoList;
}
}
gpu 实体类
@Data
public class GPUInfo {
private Integer number;
private String name;
private String totalMemory;
private String usedMemory;
private String useableMemory;
private Double usageRate;
}
方案二
英伟达开源工具
https://github.com/NVIDIA/dcgm-exporter