# -*- coding:utf-8 -*-
import os
import sys
import io
import socket
import time
import re
def read_config():
config = {}
try:
with open('config.txt', 'r') as config_file:
for line in config_file:
if '=' in line:
key, value = line.strip().split('=', 1)
config[key] = value
print("Read config success.", flush=True)
except FileNotFoundError:
print("Unable to open config file!", flush=True)
return None
return config
def is_command_available(command):
"""检查命令是否可用"""
if os.name == 'nt': # Windows 系统
result = os.system(f'where {command} >nul 2>&1')
else: # Linux 系统
result = os.system(f'which {command} >/dev/null 2>&1')
return result == 0
def get_hardware_info(send_count):
# 执行 cnmon info -e 命令并捕获输出
with os.popen('cnmon info -e') as f:
result_temp = f.read()
temperature_lines = extract_info(result_temp, "Temperature", "QDD Status")
# 执行 cnmon info -p 命令并捕获输出
with os.popen('cnmon info -p') as f:
result_power = f.read()
power_lines = extract_info(result_power, "Power", "QDD Status")
# 执行 cnmon info -m 命令并捕获输出
with os.popen('cnmon info -m') as f:
result_memory = f.read()
memory_lines = extract_info(result_memory, "v4.9.5", "Virtual Memory Usage")
# 执行 cnmon info -b 命令并捕获输出
with os.popen('cnmon info -b') as f:
result_bandwidth = f.read()
bandwidth_lines = extract_info(result_bandwidth, "QDD 7 : Invalid", " Chassis")
# 执行 cnmon info -u 命令并捕获输出
with os.popen('cnmon info -u') as f:
result_usage = f.read()
usage_lines = extract_info(result_usage, "Utilization", "QDD Status")
usage_str = '\n'.join(usage_lines)
mlu_average = 0
mlu_0 = 0
mlu_1 = 0
mlu_2 = 0
mlu_3 = 0
cpu_chip = 0
cpu_core_0 = 0
cpu_core_1 = 0
cpu_core_2 = 0
cpu_core_3 = 0
# 提取 MLU 平均利用率
mlu_avg_match = re.search(r'MLU Average\s+:\s+(\d+) %', usage_str)
if mlu_avg_match:
mlu_average = float(mlu_avg_match.group(1))
# 提取 MLU 0 - 3 利用率
mlu_0_3_match = re.search(r'MLU 0-3\s+:\s+(\d+) %\s+(\d+) %\s+(\d+) %\s+(\d+) %', usage_str)
if mlu_0_3_match:
mlu_0 = float(mlu_0_3_match.group(1))
mlu_1 = float(mlu_0_3_match.group(2))
mlu_2 = float(mlu_0_3_match.group(3))
mlu_3 = float(mlu_0_3_match.group(4))
# 提取 CPU 芯片利用率
cpu_chip_match = re.search(r'Device CPU Chip\s+:\s+(\d+) %', usage_str)
if cpu_chip_match:
cpu_chip = float(cpu_chip_match.group(1))
# 提取 CPU 核心 0 - 3 利用率
cpu_core_0_3_match = re.search(r'Device CPU Core 0-3\s+:\s+(\d+) %\s+(\d+) %\s+(\d+) %\s+(\d+) %', usage_str)
if cpu_core_0_3_match:
cpu_core_0 = float(cpu_core_0_3_match.group(1))
cpu_core_1 = float(cpu_core_0_3_match.group(2))
cpu_core_2 = float(cpu_core_0_3_match.group(3))
cpu_core_3 = float(cpu_core_0_3_match.group(4))
# 添加分隔符和发送次数
info_str = (
"温度信息:\n{}\n\n功率信息:\n{}\n\n内存信息:\n{}\n\n带宽信息:\n{}\n\nMLU信息:\nMLU Average: {}%\nMLU 0-3 利用率:\nMLU 0: {}%\nMLU 1: {}%\nMLU 2: {}%\nMLU 3: {}%\n\nCPU信息:\n"
"Device CPU Chip: {}%\nCPU核心 0-3 利用率:\nDevice CPU Core 0: {}%\nDevice CPU Core 1: {}%\nDevice CPU Core 2: {}%\nDevice CPU Core 3: {}%\n发送次数:\nsend_count : {}次\n---------------END---------------"
).format(
'\n'.join(temperature_lines),
'\n'.join(power_lines),
'\n'.join(memory_lines),
'\n'.join(bandwidth_lines),
mlu_average,
mlu_0,
mlu_1,
mlu_2,
mlu_3,
cpu_chip,
cpu_core_0,
cpu_core_1,
cpu_core_2,
cpu_core_3,
send_count
)
return info_str
def extract_info(result, start_keyword, end_keyword):
"""提取指定关键字之间的信息"""
lines = result.splitlines()
extracted = []
started = False
for line in lines:
if start_keyword in line:
started = True
elif started and line.strip():
if end_keyword in line:
break
extracted.append(line.strip())
return extracted
def main():
# 设置无缓冲输出
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', line_buffering=True)
config = read_config()
if config is None:
return
server_ip = config.get('SERVER_IP')
server_port = int(config.get('SERVER_PORT'))
client_ip = config.get('CLIENT_IP')
client_port = int(config.get('CLIENT_PORT'))
print(f"ServerIP: {server_ip}", flush=True)
print(f"ServerPort: {server_port}", flush=True)
print(f"ClientIP: {client_ip}", flush=True)
print(f"ClientPort: {client_port}", flush=True)
# 创建 UDP 套接字
sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
# 端口号快速重用
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
print("Set socket ok.", flush=True)
# 绑定服务器 IP 和端口
try:
sock.bind((server_ip, server_port))
print("Bind success.", flush=True)
except OSError as e:
print(f"Bind error: {e}", flush=True)
return
send_count = 1
try:
while True:
if not is_command_available('cnmon'):
print("当前cnmon命令不可用,使用nmon功能", flush=True)
info = (
"温度信息:\n"
"Board : 34 C\n"
"Cluster 0 : 36 C\n"
"\n"
"功率信息:\n"
"Usage : 3.44 W\n"
"Cap : 15 W\n"
" \n"
"内存信息:\n"
"Physical Memory Usage :\n"
"Total : 8192 MiB\n"
"Used : 1669 MiB\n"
"Free : 6523 MiB\n"
"Channel Memory Usage :\n"
" 0 : 1669 MiB\n"
"DDR Data Widths : 64 bit\n"
"DDR BandWidth : 29 GB/s\n"
"\n"
"带宽信息:\n"
"Bandwidth : N/A\n"
"\n"
"CPU信息:\n"
"Device CPU Chip: 6.0%\n"
"CPU核心 0-3 利用率:\n"
"Device CPU Core 0: 3.0%\n"
"Device CPU Core 1: 20.0%\n"
"Device CPU Core 2: 9.0%\n"
"Device CPU Core 3: 3.0%\n"
"\n"
"---------------END---------------\n"
)
else:
print("获取硬件信息...", flush=True)
info = get_hardware_info(send_count)
try:
sock.sendto(info.encode(), (client_ip, client_port))
print("Send data success:", flush=True)
print(info, flush=True) # 确保输出立即显示
except OSError as e:
print(f"Send error: {e}", flush=True)
send_count += 1
time.sleep(0.5)
except KeyboardInterrupt:
print("Program interrupted by user.", flush=True)
finally:
sock.close()
print("Socket closed.", flush=True)
if __name__ == "__main__":
main()优化一下这段程序
最新发布