performance &OS Test Note

本文探讨了在商业价值驱动下,各技术部门的平等性,强调了用户体验的重要性,指出测试不仅仅是Debugging,还涉及需求分析等关键环节。通过数据说话,确保产品质量。重点介绍了测试自动化技术及关注内存泄露的重要性。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

 在商业价值面前,凡是能够创造商业价值的部门都是平等的,并不是说负责Coding的部门就高人一等。

  用户买的是一种服务--用户体验,而不是各种牛逼的技术。

测试不应是Coding中的Debugging,也可以设计缺陷,需求分析缺陷等。

符合purpose的quality才是真正的quality。

拿数据说话才是产品质量的硬道理。

 

操作系统:API+SHELL

 

测试自动化技术。测试一门很有潜力的技术。软件工程知识支撑。

工具模拟测试结果。

关注内存泄露。

""" mlperf inference benchmarking tool """ from __future__ import division from __future__ import print_function from __future__ import unicode_literals # from memory_profiler import profile import argparse import array import collections import json import logging import os import sys import threading import time from multiprocessing import JoinableQueue import sklearn import star_loadgen as lg import numpy as np from mindspore import context from mindspore.train.model import Model from mindspore.train.serialization import load_checkpoint, load_param_into_net from src.model_utils.device_adapter import get_device_num, get_rank_id,get_device_id import os from mindspore import Model, context from mindspore.train.serialization import load_checkpoint, load_param_into_net,\ build_searched_strategy, merge_sliced_parameter from src.wide_and_deep import PredictWithSigmoid, TrainStepWrap, NetWithLossClass, WideDeepModel from src.callbacks import LossCallBack from src.datasets import create_dataset, DataType from src.metrics import AUCMetric from src.model_utils.moxing_adapter import moxing_wrapper from src.metrics import AUCMetric from src.callbacks import EvalCallBack import src.wide_and_deep as wide_deep import src.datasets as datasets from src.model_utils.config import config # from pygcbs_client.task import Task # task = Task() # config = task.config context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target , device_id = 1) print(config.device_id) batch_size = config.batch_size def add_write(file_path, print_str): with open(file_path, 'a+', encoding='utf-8') as file_out: file_out.write(print_str + '\n') def get_WideDeep_net(config): """ Get network of wide&deep model. """ WideDeep_net = WideDeepModel(config) loss_net = NetWithLossClass(WideDeep_net, config) train_net = TrainStepWrap(loss_net) eval_net = PredictWithSigmoid(WideDeep_net) return train_net, eval_net class ModelBuilder(): """ Wide and deep model builder """ def __init__(self): pass def get_hook(self): pass def get_train_hook(self): hooks = [] callback = LossCallBack() hooks.append(callback) if int(os.getenv('DEVICE_ID')) == 0: pass return hooks def get_net(self, config): return get_WideDeep_net(config) logging.basicConfig(level=logging.INFO) log = logging.getLogger("main") NANO_SEC = 1e9 MILLI_SEC = 1000 # pylint: disable=missing-docstring # the datasets we support SUPPORTED_DATASETS = { "debug": (datasets.Dataset, wide_deep.pre_process_criteo_wide_deep, wide_deep.WideDeepPostProcess(), {"randomize": 'total', "memory_map": True}), "multihot-criteo-sample": (wide_deep.WideDeepModel, wide_deep.pre_process_criteo_wide_deep, wide_deep.WideDeepPostProcess(), {"randomize": 'total', "memory_map": True}), "kaggle-criteo": (wide_deep.WideDeepModel, wide_deep.pre_process_criteo_wide_deep, wide_deep.WideDeepPostProcess(), {"randomize": 'total', "memory_map": True}), } # pre-defined command line options so simplify things. They are used as defaults and can be # overwritten from command line SUPPORTED_PROFILES = { "defaults": { "dataset": "multihot-criteo", "inputs": "continuous and categorical features", "outputs": "probability", "backend": "mindspore-native", "model": "wide_deep", "max-batchsize": 2048, }, "dlrm-debug-mindspore": { "dataset": "debug", "inputs": "continuous and categorical features", "outputs": "probability", "backend": "pytorch-native", "model": "dlrm", "max-batchsize": 128, }, "dlrm-multihot-sample-mindspore": { "dataset": "multihot-criteo-sample", "inputs": "continuous and categorical features", "outputs": "probability", "backend": "pytorch-native", "model": "dlrm", "max-batchsize": 2048, }, "dlrm-multihot-mindspore": { "dataset": "multihot-criteo", "inputs": "continuous and categorical features", "outputs": "probability", "backend": "pytorch-native", "model": "dlrm", "max-batchsize": 2048, } } SCENARIO_MAP = { "SingleStream": lg.TestScenario.SingleStream, "MultiStream": lg.TestScenario.MultiStream, "Server": lg.TestScenario.Server, "Offline": lg.TestScenario.Offline, } last_timeing = [] import copy class Item: """An item that we queue for processing by the thread pool.""" def __init__(self, query_id, content_id, features, batch_T=None, idx_offsets=None): self.query_id = query_id self.content_id = content_id self.data = features self.batch_T = batch_T self.idx_offsets = idx_offsets self.start = time.time() import mindspore.dataset as mds lock = threading.Lock() class RunnerBase: def __init__(self, model, ds, threads, post_proc=None): self.take_accuracy = False self.ds = ds self.model = model self.post_process = post_proc self.threads = threads self.result_timing = [] def handle_tasks(self, tasks_queue): pass def start_run(self, result_dict, take_accuracy): self.result_dict = result_dict self.result_timing = [] self.take_accuracy = take_accuracy self.post_process.start() def run_one_item(self, qitem): # run the prediction processed_results = [] try: lock.acquire() data = datasets._get_mindrecord_dataset(*qitem.data) t1 = time.time() results = self.model.eval(data) self.result_timing.append(time.time() - t1) print('##################',time.time()) lock.release() processed_results = self.post_process(results, qitem.batch_T, self.result_dict) # self.post_process.add_results(, ) # g_lables.extend(self.model.auc_metric.true_labels) # g_predicts.extend(self.model.auc_metric.pred_probs) g_lables.extend(self.model.auc_metric.true_labels) g_predicts.extend(self.model.auc_metric.pred_probs) except Exception as ex: # pylint: disable=broad-except log.error("thread: failed, %s", ex) # since post_process will not run, fake empty responses processed_results = [[]] * len(qitem.query_id) finally: response_array_refs = [] response = [] for idx, query_id in enumerate(qitem.query_id): # NOTE: processed_results returned by DlrmPostProcess store both # result = processed_results[idx][0] and target = processed_results[idx][1] # also each idx might be a query of samples, rather than a single sample # depending on the --samples-to-aggregate* arguments. # debug prints # print("s,e:",s_idx,e_idx, len(processed_results)) response_array = array.array("B", np.array(processed_results[0:1], np.float32).tobytes()) response_array_refs.append(response_array) bi = response_array.buffer_info() response.append(lg.QuerySampleResponse(query_id, bi[0], bi[1])) lg.QuerySamplesComplete(response) def enqueue(self, query_samples): idx = [q.index for q in query_samples] query_id = [q.id for q in query_samples] # print(idx) query_len = len(query_samples) # if query_len < self.max_batchsize: # samples = self.ds.get_samples(idx) # # batch_T = [self.ds.get_labels(sample) for sample in samples] # self.run_one_item(Item(query_id, idx, samples)) # else: bs = 1 for i in range(0, query_len, bs): ie = min(i + bs, query_len) samples = self.ds.get_samples(idx[i:ie]) # batch_T = [self.ds.get_labels(sample) for sample in samples] self.run_one_item(Item(query_id[i:ie], idx[i:ie], samples)) def finish(self): pass import threading class MyQueue: def __init__(self, *args, **kwargs): self.lock = threading.Lock() self._data = [] self.status = True def put(self, value): # self.lock.acquire() self._data.append(value) # self.lock.release() def get(self): if self.status and self._data: return self._data.pop(0) if self.status: while self._data: time.sleep(0.1) return self._data.pop(0) return None # self.lock.acquire() # return self._data.pop(0) # self.lock.release() def task_done(self, *args, **kwargs): self.status = False class QueueRunner(RunnerBase): def __init__(self, model, ds, threads, post_proc=None): super().__init__(model, ds, threads, post_proc) queue_size_multiplier = 4 # (args.samples_per_query_offline + max_batchsize - 1) // max_batchsize) self.tasks = JoinableQueue(maxsize=threads * queue_size_multiplier) self.workers = [] self.result_dict = {} for _ in range(self.threads): worker = threading.Thread(target=self.handle_tasks, args=(self.tasks,)) worker.daemon = True self.workers.append(worker) worker.start() def handle_tasks(self, tasks_queue): """Worker thread.""" while True: qitem = tasks_queue.get() if qitem is None: # None in the queue indicates the parent want us to exit tasks_queue.task_done() break self.run_one_item(qitem) tasks_queue.task_done() def enqueue(self, query_samples): idx = [q.index for q in query_samples] query_id = [q.id for q in query_samples] query_len = len(query_samples) # print(idx) # if query_len < self.max_batchsize: # samples = self.ds.get_samples(idx) # # batch_T = [self.ds.get_labels(sample) for sample in samples] # data = Item(query_id, idx, samples) # self.tasks.put(data) # else: bs = 1 for i in range(0, query_len, bs): ie = min(i + bs, query_len) samples = self.ds.get_samples(idx) # batch_T = [self.ds.get_labels(sample) for sample in samples] self.tasks.put(Item(query_id[i:ie], idx[i:ie], samples)) def finish(self): # exit all threads for _ in self.workers: self.tasks.put(None) for worker in self.workers: worker.join() def add_results(final_results, name, result_dict, result_list, took, show_accuracy=False): percentiles = [50., 80., 90., 95., 99., 99.9] buckets = np.percentile(result_list, percentiles).tolist() buckets_str = ",".join(["{}:{:.4f}".format(p, b) for p, b in zip(percentiles, buckets)]) if result_dict["total"] == 0: result_dict["total"] = len(result_list) # this is what we record for each run result = { "took": took, "mean": np.mean(result_list), "percentiles": {str(k): v for k, v in zip(percentiles, buckets)}, "qps": len(result_list) / took, "count": len(result_list), "good_items": result_dict["good"], "total_items": result_dict["total"], } acc_str = "" if show_accuracy: result["accuracy"] = 100. * result_dict["good"] / result_dict["total"] acc_str = ", acc={:.3f}%".format(result["accuracy"]) if "roc_auc" in result_dict: result["roc_auc"] = 100. * result_dict["roc_auc"] acc_str += ", auc={:.3f}%".format(result["roc_auc"]) # add the result to the result dict final_results[name] = result # to stdout print("{} qps={:.2f}, mean={:.4f}, time={:.3f}{}, queries={}, tiles={}".format( name, result["qps"], result["mean"], took, acc_str, len(result_list), buckets_str)) lock = threading.Lock() def append_file(file_path, data): with lock: my_string = ','.join(str(f) for f in data) with open(file_path, 'a+') as file: file.write(my_string + '\n') def read_file(file_path): lines_as_lists = [] with open(file_path, 'r') as file: for line in file: # 去除行尾的换行符,并将行分割成列表 lines_as_lists.extend([float(num) for num in line.strip().split(',')]) return lines_as_lists def get_score(model, quality: float, performance: float): print(model, quality,performance) try: score =0 if model["scenario"] == 'SingleStream'or model["scenario"]== 'MultiStream': if quality >= model["accuracy"]: score = model["baseline_performance"] / (performance + 1e-9) * model["base_score"] else: score ==0 elif model["scenario"]== 'Server' or model["scenario"] == 'Offline': if quality >= model["accuracy"]: score = performance * model["base_score"] / model["baseline_performance"] print(model["baseline_performance"]) else: score ==0 except Exception as e: score ==0 finally: return score def main(): global last_timeing # 初始化时清空文件 global g_lables global g_predicts # args = get_args() g_lables=[] g_predicts=[] # # dataset to use wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS["debug"] # # --count-samples can be used to limit the number of samples used for testing ds = wanted_dataset(directory=config.dataset_path, train_mode=False, epochs=15, line_per_sample=1000, batch_size=config.test_batch_size, data_type=DataType.MINDRECORD,total_size = config.total_size) # # load model to backend # model = backend.load(args.model_path, inputs=args.inputs, outputs=args.outputs) net_builder = ModelBuilder() train_net, eval_net = net_builder.get_net(config) # ckpt_path = config.ckpt_path param_dict = load_checkpoint(config.ckpt_path) load_param_into_net(eval_net, param_dict) train_net.set_train() eval_net.set_train(False) # acc_metric = AccMetric() # model = Model(train_net, eval_network=eval_net, metrics={"acc": acc_metric}) auc_metric1 = AUCMetric() model = Model(train_net, eval_network=eval_net, metrics={"auc": auc_metric1}) model.auc_metric = auc_metric1 # res = model.eval(ds_eval) final_results = { "runtime": "wide_deep_mindspore", "version": "v2", "time": int(time.time()), "cmdline": str(config), } mlperf_conf = os.path.abspath(config.mlperf_conf) if not os.path.exists(mlperf_conf): log.error("{} not found".format(mlperf_conf)) sys.exit(1) user_conf = os.path.abspath(config.user_conf) if not os.path.exists(user_conf): log.error("{} not found".format(user_conf)) sys.exit(1) if config.output: output_dir = os.path.abspath(config.output) os.makedirs(output_dir, exist_ok=True) os.chdir(output_dir) # # make one pass over the dataset to validate accuracy # count = ds.get_item_count() count = 5 base_score = config.config.base_score #task. accuracy = config.config.accuracy #task. baseline_performance = config.baseline_performance scenario_str = config["scenario"] #task. scenario = SCENARIO_MAP[scenario_str] runner_map = { lg.TestScenario.SingleStream: RunnerBase, lg.TestScenario.MultiStream: QueueRunner, lg.TestScenario.Server: QueueRunner, lg.TestScenario.Offline: QueueRunner } runner = runner_map[scenario](model, ds, config.threads_count, post_proc=post_proc) def issue_queries(query_samples): runner.enqueue(query_samples) def flush_queries(): pass settings = lg.TestSettings() settings.FromConfig(mlperf_conf, config.model_path, config.scenario) settings.FromConfig(user_conf, config.model_path, config.scenario) settings.scenario = scenario settings.mode = lg.TestMode.AccuracyOnly sut = lg.ConstructSUT(issue_queries, flush_queries) qsl = lg.ConstructQSL(count, config.performance_count, ds.load_query_samples, ds.unload_query_samples) log.info("starting {}".format(scenario)) result_dict = {"good": 0, "total": 0, "roc_auc": 0, "scenario": str(scenario)} runner.start_run(result_dict, config.accuracy) lg.StartTest(sut, qsl, settings) result_dict["good"] = runner.post_process.good result_dict["total"] = runner.post_process.total last_timeing = runner.result_timing post_proc.finalize(result_dict) add_results(final_results, "{}".format(scenario), result_dict, last_timeing, time.time() - ds.last_loaded, config.accuracy) runner.finish() lg.DestroyQSL(qsl) lg.DestroySUT(sut) # If multiple subprocesses are running the model send a signal to stop them if (int(os.environ.get("WORLD_SIZE", 1)) > 1): model.eval(None) from sklearn.metrics import roc_auc_score # labels = read_file(labels_path) # predicts = read_file(predicts_path) final_results['auc'] = sklearn.metrics.roc_auc_score(g_lables, g_predicts) print("auc+++++", final_results['auc']) NormMetric= { 'scenario': scenario_str, 'accuracy': accuracy, 'baseline_performance': baseline_performance, 'performance_unit': 's', 'base_score': base_score } # 打开文件 reprot_array=[] test_suit_array=[] test_suit_obj={} test_cases_array=[] test_cases_obj={} test_cases_obj["Name"] = config["task_name"] test_cases_obj["Performance Unit"] = config["performance_unit"] test_cases_obj["Total Duration"] = time.time() - ds.last_loaded test_cases_obj["Train Duration"] = None test_cases_obj["Training Info"] = { "Real Quality" :None, "Learning Rate" :None, "Base Quality" :None, "Epochs" :None, "Optimizer" :None } test_cases_obj["Software Versions"] = { "Python" :3.8, "Framework" :"Mindspore 2.2.14" } percentiles = [50., 80., 90., 95., 99., 99.9] buckets = np.percentile(last_timeing, percentiles).tolist() took = time.time() - ds.last_loaded qps = len(last_timeing) / took print(buckets) if scenario_str=="SingleStream": test_cases_obj["Performance Metric"] = buckets[2] if scenario_str=="MultiStream": test_cases_obj["Performance Metric"] = buckets[4] if scenario_str=="Server": test_cases_obj["Performance Metric"] = qps if scenario_str=="Offline": test_cases_obj["Performance Metric"] = qps score = get_score(NormMetric, final_results["auc"], test_cases_obj["Performance Metric"]) test_cases_obj["Score"] = score test_cases_array.append(test_cases_obj) test_suit_obj["Test Cases"] = test_cases_array test_suit_obj["Name"] = "Inference Suite" test_suit_array.append(test_suit_obj) test_obj = {"Test Suites": test_suit_array} reprot_array.append(test_obj) test_suit_result_obj = {"Name":"Inference Suite","Description":"inference model","Score":score } test_suit_result_array = [] test_suit_result_array.append(test_suit_result_obj) test_suit_result_obj1 = {"Test Suites Results":test_suit_result_array} reprot_array.append(test_suit_result_obj1) epoch_obj={ "epoch":None, "epoch_time":None, "train_loss":None, "metric":None, "metric_name":None } reprot_array.append(epoch_obj) result_final = {"Report Info": reprot_array } print("result_final", result_final) # task.save(result_final) if __name__ == "__main__": # try: # if task.config["is_run_infer"] == True: # main() # task.close() # else: # task.close() # raise ValueError # except Exception as e: # task.logger.error(e) # task.close(e) # task.logger.info("Finish ") # if task.config["is_run_infer"] == True: # print(config) main() # task.close()介绍一下这段代码
最新发布
06-17
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值