"""
mlperf inference benchmarking tool
"""
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
# from memory_profiler import profile
import argparse
import array
import collections
import json
import logging
import os
import sys
import threading
import time
from multiprocessing import JoinableQueue
import sklearn
import star_loadgen as lg
import numpy as np
from mindspore import context
from mindspore.train.model import Model
from mindspore.train.serialization import load_checkpoint, load_param_into_net
from src.model_utils.device_adapter import get_device_num, get_rank_id,get_device_id
import os
from mindspore import Model, context
from mindspore.train.serialization import load_checkpoint, load_param_into_net,\
build_searched_strategy, merge_sliced_parameter
from src.wide_and_deep import PredictWithSigmoid, TrainStepWrap, NetWithLossClass, WideDeepModel
from src.callbacks import LossCallBack
from src.datasets import create_dataset, DataType
from src.metrics import AUCMetric
from src.model_utils.moxing_adapter import moxing_wrapper
from src.metrics import AUCMetric
from src.callbacks import EvalCallBack
import src.wide_and_deep as wide_deep
import src.datasets as datasets
from src.model_utils.config import config
# from pygcbs_client.task import Task
# task = Task()
# config = task.config
context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target , device_id = 1)
print(config.device_id)
batch_size = config.batch_size
def add_write(file_path, print_str):
with open(file_path, 'a+', encoding='utf-8') as file_out:
file_out.write(print_str + '\n')
def get_WideDeep_net(config):
"""
Get network of wide&deep model.
"""
WideDeep_net = WideDeepModel(config)
loss_net = NetWithLossClass(WideDeep_net, config)
train_net = TrainStepWrap(loss_net)
eval_net = PredictWithSigmoid(WideDeep_net)
return train_net, eval_net
class ModelBuilder():
"""
Wide and deep model builder
"""
def __init__(self):
pass
def get_hook(self):
pass
def get_train_hook(self):
hooks = []
callback = LossCallBack()
hooks.append(callback)
if int(os.getenv('DEVICE_ID')) == 0:
pass
return hooks
def get_net(self, config):
return get_WideDeep_net(config)
logging.basicConfig(level=logging.INFO)
log = logging.getLogger("main")
NANO_SEC = 1e9
MILLI_SEC = 1000
# pylint: disable=missing-docstring
# the datasets we support
SUPPORTED_DATASETS = {
"debug":
(datasets.Dataset, wide_deep.pre_process_criteo_wide_deep, wide_deep.WideDeepPostProcess(),
{"randomize": 'total', "memory_map": True}),
"multihot-criteo-sample":
(wide_deep.WideDeepModel, wide_deep.pre_process_criteo_wide_deep, wide_deep.WideDeepPostProcess(),
{"randomize": 'total', "memory_map": True}),
"kaggle-criteo":
(wide_deep.WideDeepModel, wide_deep.pre_process_criteo_wide_deep, wide_deep.WideDeepPostProcess(),
{"randomize": 'total', "memory_map": True}),
}
# pre-defined command line options so simplify things. They are used as defaults and can be
# overwritten from command line
SUPPORTED_PROFILES = {
"defaults": {
"dataset": "multihot-criteo",
"inputs": "continuous and categorical features",
"outputs": "probability",
"backend": "mindspore-native",
"model": "wide_deep",
"max-batchsize": 2048,
},
"dlrm-debug-mindspore": {
"dataset": "debug",
"inputs": "continuous and categorical features",
"outputs": "probability",
"backend": "pytorch-native",
"model": "dlrm",
"max-batchsize": 128,
},
"dlrm-multihot-sample-mindspore": {
"dataset": "multihot-criteo-sample",
"inputs": "continuous and categorical features",
"outputs": "probability",
"backend": "pytorch-native",
"model": "dlrm",
"max-batchsize": 2048,
},
"dlrm-multihot-mindspore": {
"dataset": "multihot-criteo",
"inputs": "continuous and categorical features",
"outputs": "probability",
"backend": "pytorch-native",
"model": "dlrm",
"max-batchsize": 2048,
}
}
SCENARIO_MAP = {
"SingleStream": lg.TestScenario.SingleStream,
"MultiStream": lg.TestScenario.MultiStream,
"Server": lg.TestScenario.Server,
"Offline": lg.TestScenario.Offline,
}
last_timeing = []
import copy
class Item:
"""An item that we queue for processing by the thread pool."""
def __init__(self, query_id, content_id, features, batch_T=None, idx_offsets=None):
self.query_id = query_id
self.content_id = content_id
self.data = features
self.batch_T = batch_T
self.idx_offsets = idx_offsets
self.start = time.time()
import mindspore.dataset as mds
lock = threading.Lock()
class RunnerBase:
def __init__(self, model, ds, threads, post_proc=None):
self.take_accuracy = False
self.ds = ds
self.model = model
self.post_process = post_proc
self.threads = threads
self.result_timing = []
def handle_tasks(self, tasks_queue):
pass
def start_run(self, result_dict, take_accuracy):
self.result_dict = result_dict
self.result_timing = []
self.take_accuracy = take_accuracy
self.post_process.start()
def run_one_item(self, qitem):
# run the prediction
processed_results = []
try:
lock.acquire()
data = datasets._get_mindrecord_dataset(*qitem.data)
t1 = time.time()
results = self.model.eval(data)
self.result_timing.append(time.time() - t1)
print('##################',time.time())
lock.release()
processed_results = self.post_process(results, qitem.batch_T, self.result_dict)
# self.post_process.add_results(, )
# g_lables.extend(self.model.auc_metric.true_labels)
# g_predicts.extend(self.model.auc_metric.pred_probs)
g_lables.extend(self.model.auc_metric.true_labels)
g_predicts.extend(self.model.auc_metric.pred_probs)
except Exception as ex: # pylint: disable=broad-except
log.error("thread: failed, %s", ex)
# since post_process will not run, fake empty responses
processed_results = [[]] * len(qitem.query_id)
finally:
response_array_refs = []
response = []
for idx, query_id in enumerate(qitem.query_id):
# NOTE: processed_results returned by DlrmPostProcess store both
# result = processed_results[idx][0] and target = processed_results[idx][1]
# also each idx might be a query of samples, rather than a single sample
# depending on the --samples-to-aggregate* arguments.
# debug prints
# print("s,e:",s_idx,e_idx, len(processed_results))
response_array = array.array("B", np.array(processed_results[0:1], np.float32).tobytes())
response_array_refs.append(response_array)
bi = response_array.buffer_info()
response.append(lg.QuerySampleResponse(query_id, bi[0], bi[1]))
lg.QuerySamplesComplete(response)
def enqueue(self, query_samples):
idx = [q.index for q in query_samples]
query_id = [q.id for q in query_samples]
# print(idx)
query_len = len(query_samples)
# if query_len < self.max_batchsize:
# samples = self.ds.get_samples(idx)
# # batch_T = [self.ds.get_labels(sample) for sample in samples]
# self.run_one_item(Item(query_id, idx, samples))
# else:
bs = 1
for i in range(0, query_len, bs):
ie = min(i + bs, query_len)
samples = self.ds.get_samples(idx[i:ie])
# batch_T = [self.ds.get_labels(sample) for sample in samples]
self.run_one_item(Item(query_id[i:ie], idx[i:ie], samples))
def finish(self):
pass
import threading
class MyQueue:
def __init__(self, *args, **kwargs):
self.lock = threading.Lock()
self._data = []
self.status = True
def put(self, value):
# self.lock.acquire()
self._data.append(value)
# self.lock.release()
def get(self):
if self.status and self._data:
return self._data.pop(0)
if self.status:
while self._data:
time.sleep(0.1)
return self._data.pop(0)
return None
# self.lock.acquire()
# return self._data.pop(0)
# self.lock.release()
def task_done(self, *args, **kwargs):
self.status = False
class QueueRunner(RunnerBase):
def __init__(self, model, ds, threads, post_proc=None):
super().__init__(model, ds, threads, post_proc)
queue_size_multiplier = 4 # (args.samples_per_query_offline + max_batchsize - 1) // max_batchsize)
self.tasks = JoinableQueue(maxsize=threads * queue_size_multiplier)
self.workers = []
self.result_dict = {}
for _ in range(self.threads):
worker = threading.Thread(target=self.handle_tasks, args=(self.tasks,))
worker.daemon = True
self.workers.append(worker)
worker.start()
def handle_tasks(self, tasks_queue):
"""Worker thread."""
while True:
qitem = tasks_queue.get()
if qitem is None:
# None in the queue indicates the parent want us to exit
tasks_queue.task_done()
break
self.run_one_item(qitem)
tasks_queue.task_done()
def enqueue(self, query_samples):
idx = [q.index for q in query_samples]
query_id = [q.id for q in query_samples]
query_len = len(query_samples)
# print(idx)
# if query_len < self.max_batchsize:
# samples = self.ds.get_samples(idx)
# # batch_T = [self.ds.get_labels(sample) for sample in samples]
# data = Item(query_id, idx, samples)
# self.tasks.put(data)
# else:
bs = 1
for i in range(0, query_len, bs):
ie = min(i + bs, query_len)
samples = self.ds.get_samples(idx)
# batch_T = [self.ds.get_labels(sample) for sample in samples]
self.tasks.put(Item(query_id[i:ie], idx[i:ie], samples))
def finish(self):
# exit all threads
for _ in self.workers:
self.tasks.put(None)
for worker in self.workers:
worker.join()
def add_results(final_results, name, result_dict, result_list, took, show_accuracy=False):
percentiles = [50., 80., 90., 95., 99., 99.9]
buckets = np.percentile(result_list, percentiles).tolist()
buckets_str = ",".join(["{}:{:.4f}".format(p, b) for p, b in zip(percentiles, buckets)])
if result_dict["total"] == 0:
result_dict["total"] = len(result_list)
# this is what we record for each run
result = {
"took": took,
"mean": np.mean(result_list),
"percentiles": {str(k): v for k, v in zip(percentiles, buckets)},
"qps": len(result_list) / took,
"count": len(result_list),
"good_items": result_dict["good"],
"total_items": result_dict["total"],
}
acc_str = ""
if show_accuracy:
result["accuracy"] = 100. * result_dict["good"] / result_dict["total"]
acc_str = ", acc={:.3f}%".format(result["accuracy"])
if "roc_auc" in result_dict:
result["roc_auc"] = 100. * result_dict["roc_auc"]
acc_str += ", auc={:.3f}%".format(result["roc_auc"])
# add the result to the result dict
final_results[name] = result
# to stdout
print("{} qps={:.2f}, mean={:.4f}, time={:.3f}{}, queries={}, tiles={}".format(
name, result["qps"], result["mean"], took, acc_str,
len(result_list), buckets_str))
lock = threading.Lock()
def append_file(file_path, data):
with lock:
my_string = ','.join(str(f) for f in data)
with open(file_path, 'a+') as file:
file.write(my_string + '\n')
def read_file(file_path):
lines_as_lists = []
with open(file_path, 'r') as file:
for line in file:
# 去除行尾的换行符,并将行分割成列表
lines_as_lists.extend([float(num) for num in line.strip().split(',')])
return lines_as_lists
def get_score(model, quality: float, performance: float):
print(model, quality,performance)
try:
score =0
if model["scenario"] == 'SingleStream'or model["scenario"]== 'MultiStream':
if quality >= model["accuracy"]:
score = model["baseline_performance"] / (performance + 1e-9) * model["base_score"]
else:
score ==0
elif model["scenario"]== 'Server' or model["scenario"] == 'Offline':
if quality >= model["accuracy"]:
score = performance * model["base_score"] / model["baseline_performance"]
print(model["baseline_performance"])
else:
score ==0
except Exception as e:
score ==0
finally:
return score
def main():
global last_timeing
# 初始化时清空文件
global g_lables
global g_predicts
# args = get_args()
g_lables=[]
g_predicts=[]
# # dataset to use
wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS["debug"]
# # --count-samples can be used to limit the number of samples used for testing
ds = wanted_dataset(directory=config.dataset_path,
train_mode=False,
epochs=15,
line_per_sample=1000,
batch_size=config.test_batch_size,
data_type=DataType.MINDRECORD,total_size = config.total_size)
# # load model to backend
# model = backend.load(args.model_path, inputs=args.inputs, outputs=args.outputs)
net_builder = ModelBuilder()
train_net, eval_net = net_builder.get_net(config)
# ckpt_path = config.ckpt_path
param_dict = load_checkpoint(config.ckpt_path)
load_param_into_net(eval_net, param_dict)
train_net.set_train()
eval_net.set_train(False)
# acc_metric = AccMetric()
# model = Model(train_net, eval_network=eval_net, metrics={"acc": acc_metric})
auc_metric1 = AUCMetric()
model = Model(train_net, eval_network=eval_net, metrics={"auc": auc_metric1})
model.auc_metric = auc_metric1
# res = model.eval(ds_eval)
final_results = {
"runtime": "wide_deep_mindspore",
"version": "v2",
"time": int(time.time()),
"cmdline": str(config),
}
mlperf_conf = os.path.abspath(config.mlperf_conf)
if not os.path.exists(mlperf_conf):
log.error("{} not found".format(mlperf_conf))
sys.exit(1)
user_conf = os.path.abspath(config.user_conf)
if not os.path.exists(user_conf):
log.error("{} not found".format(user_conf))
sys.exit(1)
if config.output:
output_dir = os.path.abspath(config.output)
os.makedirs(output_dir, exist_ok=True)
os.chdir(output_dir)
#
# make one pass over the dataset to validate accuracy
#
count = ds.get_item_count()
count = 5
base_score = config.config.base_score #task.
accuracy = config.config.accuracy #task.
baseline_performance = config.baseline_performance
scenario_str = config["scenario"] #task.
scenario = SCENARIO_MAP[scenario_str]
runner_map = {
lg.TestScenario.SingleStream: RunnerBase,
lg.TestScenario.MultiStream: QueueRunner,
lg.TestScenario.Server: QueueRunner,
lg.TestScenario.Offline: QueueRunner
}
runner = runner_map[scenario](model, ds, config.threads_count, post_proc=post_proc)
def issue_queries(query_samples):
runner.enqueue(query_samples)
def flush_queries():
pass
settings = lg.TestSettings()
settings.FromConfig(mlperf_conf, config.model_path, config.scenario)
settings.FromConfig(user_conf, config.model_path, config.scenario)
settings.scenario = scenario
settings.mode = lg.TestMode.AccuracyOnly
sut = lg.ConstructSUT(issue_queries, flush_queries)
qsl = lg.ConstructQSL(count, config.performance_count, ds.load_query_samples,
ds.unload_query_samples)
log.info("starting {}".format(scenario))
result_dict = {"good": 0, "total": 0, "roc_auc": 0, "scenario": str(scenario)}
runner.start_run(result_dict, config.accuracy)
lg.StartTest(sut, qsl, settings)
result_dict["good"] = runner.post_process.good
result_dict["total"] = runner.post_process.total
last_timeing = runner.result_timing
post_proc.finalize(result_dict)
add_results(final_results, "{}".format(scenario),
result_dict, last_timeing, time.time() - ds.last_loaded, config.accuracy)
runner.finish()
lg.DestroyQSL(qsl)
lg.DestroySUT(sut)
# If multiple subprocesses are running the model send a signal to stop them
if (int(os.environ.get("WORLD_SIZE", 1)) > 1):
model.eval(None)
from sklearn.metrics import roc_auc_score
# labels = read_file(labels_path)
# predicts = read_file(predicts_path)
final_results['auc'] = sklearn.metrics.roc_auc_score(g_lables, g_predicts)
print("auc+++++", final_results['auc'])
NormMetric= {
'scenario': scenario_str,
'accuracy': accuracy,
'baseline_performance': baseline_performance,
'performance_unit': 's',
'base_score': base_score
}
# 打开文件
reprot_array=[]
test_suit_array=[]
test_suit_obj={}
test_cases_array=[]
test_cases_obj={}
test_cases_obj["Name"] = config["task_name"]
test_cases_obj["Performance Unit"] = config["performance_unit"]
test_cases_obj["Total Duration"] = time.time() - ds.last_loaded
test_cases_obj["Train Duration"] = None
test_cases_obj["Training Info"] = {
"Real Quality" :None,
"Learning Rate" :None,
"Base Quality" :None,
"Epochs" :None,
"Optimizer" :None
}
test_cases_obj["Software Versions"] = {
"Python" :3.8,
"Framework" :"Mindspore 2.2.14"
}
percentiles = [50., 80., 90., 95., 99., 99.9]
buckets = np.percentile(last_timeing, percentiles).tolist()
took = time.time() - ds.last_loaded
qps = len(last_timeing) / took
print(buckets)
if scenario_str=="SingleStream":
test_cases_obj["Performance Metric"] = buckets[2]
if scenario_str=="MultiStream":
test_cases_obj["Performance Metric"] = buckets[4]
if scenario_str=="Server":
test_cases_obj["Performance Metric"] = qps
if scenario_str=="Offline":
test_cases_obj["Performance Metric"] = qps
score = get_score(NormMetric, final_results["auc"], test_cases_obj["Performance Metric"])
test_cases_obj["Score"] = score
test_cases_array.append(test_cases_obj)
test_suit_obj["Test Cases"] = test_cases_array
test_suit_obj["Name"] = "Inference Suite"
test_suit_array.append(test_suit_obj)
test_obj = {"Test Suites": test_suit_array}
reprot_array.append(test_obj)
test_suit_result_obj = {"Name":"Inference Suite","Description":"inference model","Score":score }
test_suit_result_array = []
test_suit_result_array.append(test_suit_result_obj)
test_suit_result_obj1 = {"Test Suites Results":test_suit_result_array}
reprot_array.append(test_suit_result_obj1)
epoch_obj={
"epoch":None,
"epoch_time":None,
"train_loss":None,
"metric":None,
"metric_name":None
}
reprot_array.append(epoch_obj)
result_final = {"Report Info": reprot_array }
print("result_final", result_final)
# task.save(result_final)
if __name__ == "__main__":
# try:
# if task.config["is_run_infer"] == True:
# main()
# task.close()
# else:
# task.close()
# raise ValueError
# except Exception as e:
# task.logger.error(e)
# task.close(e)
# task.logger.info("Finish ")
# if task.config["is_run_infer"] == True:
# print(config)
main()
# task.close()介绍一下这段代码
最新发布