Post-Query Samples

本文介绍FormBuilder中Post-Query触发器的功能及使用方法,包括如何基于当前记录查询其他表中的值,以及如何避免不必要的When-Validate-Item触发器执行。

Perform an action after fetching a record, such as looking up values in other tables based on a value in the current record. Fires once for each record fetched into the block.

Description
When a query is open in the block, the Post-Query trigger fires each time Form Builder fetches a record into a block. The trigger fires once for each record placed on the block's list of records.
Definition Level form or block

Legal Commands
SELECT statements, unrestricted built-ins
Enter Query Mode no

Usage Notes
Use a Post-Query trigger to perform the following tasks:
*populate control items or items in other blocks
*calculate statistics about the records retrieved by a query
*calculate a running total
*When you use a Post-Query trigger to SELECT non-base table values into control items, Form Builder marks each record as CHANGED, and so fires the When-Validate-Item trigger by default. You can avoid the execution of the When-Validate-Item trigger by explicitly setting the Status property of each record to QUERY in the Post-Query trigger. To set record status programmatically, use SET_RECORD_PROPERTY .

On Failure
Form Builder flushes the record from the block and attempts to fetch the next record from the database. If there are no other records in the database, Form Builder closes the query and waits for the next operator action.

Sample 1

This example retrieves descriptions for code fields, for display in non-database items in the current block.

DECLARE 
 
  CURSOR lookup_payplan IS SELECT Payplan_Desc 
                             FROM Payplan 
                            WHERE Payplan_Id = 
                                  :Employee.Payplan_Id; 
 
  CURSOR lookup_area    IS SELECT Area_Name 
                             FROM Zip_Code 
                             WHERE Zip = :Employee.Zip; 

BEGIN 
  /* 
  ** Lookup the Payment Plan Description given the 
  ** Payplan_Id in the Employee Record just fetched. 
  ** Use Explicit Cursor for highest efficiency. 
  */ 
  OPEN lookup_payplan; 
  FETCH lookup_payplan INTO :Employee.Payplan_Desc_Nondb; 
  CLOSE lookup_payplan; 
 
  /* 
  ** Lookup Area Descript given the Zipcode in 
  ** the Employee Record just fetched. Use Explicit 
  ** Cursor for highest efficiency. 
  */ 
  OPEN lookup_area; 
  FETCH lookup_area INTO :Employee.Area_Desc_Nondb; 
  CLOSE lookup_area; 
END; 



""" mlperf inference benchmarking tool """ from __future__ import division from __future__ import print_function from __future__ import unicode_literals # from memory_profiler import profile import argparse import array import collections import json import logging import os import sys import threading import time from multiprocessing import JoinableQueue import sklearn import star_loadgen as lg import numpy as np from mindspore import context from mindspore.train.model import Model from mindspore.train.serialization import load_checkpoint, load_param_into_net from src.model_utils.device_adapter import get_device_num, get_rank_id,get_device_id import os from mindspore import Model, context from mindspore.train.serialization import load_checkpoint, load_param_into_net,\ build_searched_strategy, merge_sliced_parameter from src.wide_and_deep import PredictWithSigmoid, TrainStepWrap, NetWithLossClass, WideDeepModel from src.callbacks import LossCallBack from src.datasets import create_dataset, DataType from src.metrics import AUCMetric from src.model_utils.moxing_adapter import moxing_wrapper from src.metrics import AUCMetric from src.callbacks import EvalCallBack import src.wide_and_deep as wide_deep import src.datasets as datasets from src.model_utils.config import config # from pygcbs_client.task import Task # task = Task() # config = task.config context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target , device_id = 1) print(config.device_id) batch_size = config.batch_size def add_write(file_path, print_str): with open(file_path, 'a+', encoding='utf-8') as file_out: file_out.write(print_str + '\n') def get_WideDeep_net(config): """ Get network of wide&deep model. """ WideDeep_net = WideDeepModel(config) loss_net = NetWithLossClass(WideDeep_net, config) train_net = TrainStepWrap(loss_net) eval_net = PredictWithSigmoid(WideDeep_net) return train_net, eval_net class ModelBuilder(): """ Wide and deep model builder """ def __init__(self): pass def get_hook(self): pass def get_train_hook(self): hooks = [] callback = LossCallBack() hooks.append(callback) if int(os.getenv('DEVICE_ID')) == 0: pass return hooks def get_net(self, config): return get_WideDeep_net(config) logging.basicConfig(level=logging.INFO) log = logging.getLogger("main") NANO_SEC = 1e9 MILLI_SEC = 1000 # pylint: disable=missing-docstring # the datasets we support SUPPORTED_DATASETS = { "debug": (datasets.Dataset, wide_deep.pre_process_criteo_wide_deep, wide_deep.WideDeepPostProcess(), {"randomize": 'total', "memory_map": True}), "multihot-criteo-sample": (wide_deep.WideDeepModel, wide_deep.pre_process_criteo_wide_deep, wide_deep.WideDeepPostProcess(), {"randomize": 'total', "memory_map": True}), "kaggle-criteo": (wide_deep.WideDeepModel, wide_deep.pre_process_criteo_wide_deep, wide_deep.WideDeepPostProcess(), {"randomize": 'total', "memory_map": True}), } # pre-defined command line options so simplify things. They are used as defaults and can be # overwritten from command line SUPPORTED_PROFILES = { "defaults": { "dataset": "multihot-criteo", "inputs": "continuous and categorical features", "outputs": "probability", "backend": "mindspore-native", "model": "wide_deep", "max-batchsize": 2048, }, "dlrm-debug-mindspore": { "dataset": "debug", "inputs": "continuous and categorical features", "outputs": "probability", "backend": "pytorch-native", "model": "dlrm", "max-batchsize": 128, }, "dlrm-multihot-sample-mindspore": { "dataset": "multihot-criteo-sample", "inputs": "continuous and categorical features", "outputs": "probability", "backend": "pytorch-native", "model": "dlrm", "max-batchsize": 2048, }, "dlrm-multihot-mindspore": { "dataset": "multihot-criteo", "inputs": "continuous and categorical features", "outputs": "probability", "backend": "pytorch-native", "model": "dlrm", "max-batchsize": 2048, } } SCENARIO_MAP = { "SingleStream": lg.TestScenario.SingleStream, "MultiStream": lg.TestScenario.MultiStream, "Server": lg.TestScenario.Server, "Offline": lg.TestScenario.Offline, } last_timeing = [] import copy class Item: """An item that we queue for processing by the thread pool.""" def __init__(self, query_id, content_id, features, batch_T=None, idx_offsets=None): self.query_id = query_id self.content_id = content_id self.data = features self.batch_T = batch_T self.idx_offsets = idx_offsets self.start = time.time() import mindspore.dataset as mds lock = threading.Lock() class RunnerBase: def __init__(self, model, ds, threads, post_proc=None): self.take_accuracy = False self.ds = ds self.model = model self.post_process = post_proc self.threads = threads self.result_timing = [] def handle_tasks(self, tasks_queue): pass def start_run(self, result_dict, take_accuracy): self.result_dict = result_dict self.result_timing = [] self.take_accuracy = take_accuracy self.post_process.start() def run_one_item(self, qitem): # run the prediction processed_results = [] try: lock.acquire() data = datasets._get_mindrecord_dataset(*qitem.data) t1 = time.time() results = self.model.eval(data) self.result_timing.append(time.time() - t1) print('##################',time.time()) lock.release() processed_results = self.post_process(results, qitem.batch_T, self.result_dict) # self.post_process.add_results(, ) # g_lables.extend(self.model.auc_metric.true_labels) # g_predicts.extend(self.model.auc_metric.pred_probs) g_lables.extend(self.model.auc_metric.true_labels) g_predicts.extend(self.model.auc_metric.pred_probs) except Exception as ex: # pylint: disable=broad-except log.error("thread: failed, %s", ex) # since post_process will not run, fake empty responses processed_results = [[]] * len(qitem.query_id) finally: response_array_refs = [] response = [] for idx, query_id in enumerate(qitem.query_id): # NOTE: processed_results returned by DlrmPostProcess store both # result = processed_results[idx][0] and target = processed_results[idx][1] # also each idx might be a query of samples, rather than a single sample # depending on the --samples-to-aggregate* arguments. # debug prints # print("s,e:",s_idx,e_idx, len(processed_results)) response_array = array.array("B", np.array(processed_results[0:1], np.float32).tobytes()) response_array_refs.append(response_array) bi = response_array.buffer_info() response.append(lg.QuerySampleResponse(query_id, bi[0], bi[1])) lg.QuerySamplesComplete(response) def enqueue(self, query_samples): idx = [q.index for q in query_samples] query_id = [q.id for q in query_samples] # print(idx) query_len = len(query_samples) # if query_len < self.max_batchsize: # samples = self.ds.get_samples(idx) # # batch_T = [self.ds.get_labels(sample) for sample in samples] # self.run_one_item(Item(query_id, idx, samples)) # else: bs = 1 for i in range(0, query_len, bs): ie = min(i + bs, query_len) samples = self.ds.get_samples(idx[i:ie]) # batch_T = [self.ds.get_labels(sample) for sample in samples] self.run_one_item(Item(query_id[i:ie], idx[i:ie], samples)) def finish(self): pass import threading class MyQueue: def __init__(self, *args, **kwargs): self.lock = threading.Lock() self._data = [] self.status = True def put(self, value): # self.lock.acquire() self._data.append(value) # self.lock.release() def get(self): if self.status and self._data: return self._data.pop(0) if self.status: while self._data: time.sleep(0.1) return self._data.pop(0) return None # self.lock.acquire() # return self._data.pop(0) # self.lock.release() def task_done(self, *args, **kwargs): self.status = False class QueueRunner(RunnerBase): def __init__(self, model, ds, threads, post_proc=None): super().__init__(model, ds, threads, post_proc) queue_size_multiplier = 4 # (args.samples_per_query_offline + max_batchsize - 1) // max_batchsize) self.tasks = JoinableQueue(maxsize=threads * queue_size_multiplier) self.workers = [] self.result_dict = {} for _ in range(self.threads): worker = threading.Thread(target=self.handle_tasks, args=(self.tasks,)) worker.daemon = True self.workers.append(worker) worker.start() def handle_tasks(self, tasks_queue): """Worker thread.""" while True: qitem = tasks_queue.get() if qitem is None: # None in the queue indicates the parent want us to exit tasks_queue.task_done() break self.run_one_item(qitem) tasks_queue.task_done() def enqueue(self, query_samples): idx = [q.index for q in query_samples] query_id = [q.id for q in query_samples] query_len = len(query_samples) # print(idx) # if query_len < self.max_batchsize: # samples = self.ds.get_samples(idx) # # batch_T = [self.ds.get_labels(sample) for sample in samples] # data = Item(query_id, idx, samples) # self.tasks.put(data) # else: bs = 1 for i in range(0, query_len, bs): ie = min(i + bs, query_len) samples = self.ds.get_samples(idx) # batch_T = [self.ds.get_labels(sample) for sample in samples] self.tasks.put(Item(query_id[i:ie], idx[i:ie], samples)) def finish(self): # exit all threads for _ in self.workers: self.tasks.put(None) for worker in self.workers: worker.join() def add_results(final_results, name, result_dict, result_list, took, show_accuracy=False): percentiles = [50., 80., 90., 95., 99., 99.9] buckets = np.percentile(result_list, percentiles).tolist() buckets_str = ",".join(["{}:{:.4f}".format(p, b) for p, b in zip(percentiles, buckets)]) if result_dict["total"] == 0: result_dict["total"] = len(result_list) # this is what we record for each run result = { "took": took, "mean": np.mean(result_list), "percentiles": {str(k): v for k, v in zip(percentiles, buckets)}, "qps": len(result_list) / took, "count": len(result_list), "good_items": result_dict["good"], "total_items": result_dict["total"], } acc_str = "" if show_accuracy: result["accuracy"] = 100. * result_dict["good"] / result_dict["total"] acc_str = ", acc={:.3f}%".format(result["accuracy"]) if "roc_auc" in result_dict: result["roc_auc"] = 100. * result_dict["roc_auc"] acc_str += ", auc={:.3f}%".format(result["roc_auc"]) # add the result to the result dict final_results[name] = result # to stdout print("{} qps={:.2f}, mean={:.4f}, time={:.3f}{}, queries={}, tiles={}".format( name, result["qps"], result["mean"], took, acc_str, len(result_list), buckets_str)) lock = threading.Lock() def append_file(file_path, data): with lock: my_string = ','.join(str(f) for f in data) with open(file_path, 'a+') as file: file.write(my_string + '\n') def read_file(file_path): lines_as_lists = [] with open(file_path, 'r') as file: for line in file: # 去除行尾的换行符,并将行分割成列表 lines_as_lists.extend([float(num) for num in line.strip().split(',')]) return lines_as_lists def get_score(model, quality: float, performance: float): print(model, quality,performance) try: score =0 if model["scenario"] == 'SingleStream'or model["scenario"]== 'MultiStream': if quality >= model["accuracy"]: score = model["baseline_performance"] / (performance + 1e-9) * model["base_score"] else: score ==0 elif model["scenario"]== 'Server' or model["scenario"] == 'Offline': if quality >= model["accuracy"]: score = performance * model["base_score"] / model["baseline_performance"] print(model["baseline_performance"]) else: score ==0 except Exception as e: score ==0 finally: return score def main(): global last_timeing # 初始化时清空文件 global g_lables global g_predicts # args = get_args() g_lables=[] g_predicts=[] # # dataset to use wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS["debug"] # # --count-samples can be used to limit the number of samples used for testing ds = wanted_dataset(directory=config.dataset_path, train_mode=False, epochs=15, line_per_sample=1000, batch_size=config.test_batch_size, data_type=DataType.MINDRECORD,total_size = config.total_size) # # load model to backend # model = backend.load(args.model_path, inputs=args.inputs, outputs=args.outputs) net_builder = ModelBuilder() train_net, eval_net = net_builder.get_net(config) # ckpt_path = config.ckpt_path param_dict = load_checkpoint(config.ckpt_path) load_param_into_net(eval_net, param_dict) train_net.set_train() eval_net.set_train(False) # acc_metric = AccMetric() # model = Model(train_net, eval_network=eval_net, metrics={"acc": acc_metric}) auc_metric1 = AUCMetric() model = Model(train_net, eval_network=eval_net, metrics={"auc": auc_metric1}) model.auc_metric = auc_metric1 # res = model.eval(ds_eval) final_results = { "runtime": "wide_deep_mindspore", "version": "v2", "time": int(time.time()), "cmdline": str(config), } mlperf_conf = os.path.abspath(config.mlperf_conf) if not os.path.exists(mlperf_conf): log.error("{} not found".format(mlperf_conf)) sys.exit(1) user_conf = os.path.abspath(config.user_conf) if not os.path.exists(user_conf): log.error("{} not found".format(user_conf)) sys.exit(1) if config.output: output_dir = os.path.abspath(config.output) os.makedirs(output_dir, exist_ok=True) os.chdir(output_dir) # # make one pass over the dataset to validate accuracy # count = ds.get_item_count() count = 5 base_score = config.config.base_score #task. accuracy = config.config.accuracy #task. baseline_performance = config.baseline_performance scenario_str = config["scenario"] #task. scenario = SCENARIO_MAP[scenario_str] runner_map = { lg.TestScenario.SingleStream: RunnerBase, lg.TestScenario.MultiStream: QueueRunner, lg.TestScenario.Server: QueueRunner, lg.TestScenario.Offline: QueueRunner } runner = runner_map[scenario](model, ds, config.threads_count, post_proc=post_proc) def issue_queries(query_samples): runner.enqueue(query_samples) def flush_queries(): pass settings = lg.TestSettings() settings.FromConfig(mlperf_conf, config.model_path, config.scenario) settings.FromConfig(user_conf, config.model_path, config.scenario) settings.scenario = scenario settings.mode = lg.TestMode.AccuracyOnly sut = lg.ConstructSUT(issue_queries, flush_queries) qsl = lg.ConstructQSL(count, config.performance_count, ds.load_query_samples, ds.unload_query_samples) log.info("starting {}".format(scenario)) result_dict = {"good": 0, "total": 0, "roc_auc": 0, "scenario": str(scenario)} runner.start_run(result_dict, config.accuracy) lg.StartTest(sut, qsl, settings) result_dict["good"] = runner.post_process.good result_dict["total"] = runner.post_process.total last_timeing = runner.result_timing post_proc.finalize(result_dict) add_results(final_results, "{}".format(scenario), result_dict, last_timeing, time.time() - ds.last_loaded, config.accuracy) runner.finish() lg.DestroyQSL(qsl) lg.DestroySUT(sut) # If multiple subprocesses are running the model send a signal to stop them if (int(os.environ.get("WORLD_SIZE", 1)) > 1): model.eval(None) from sklearn.metrics import roc_auc_score # labels = read_file(labels_path) # predicts = read_file(predicts_path) final_results['auc'] = sklearn.metrics.roc_auc_score(g_lables, g_predicts) print("auc+++++", final_results['auc']) NormMetric= { 'scenario': scenario_str, 'accuracy': accuracy, 'baseline_performance': baseline_performance, 'performance_unit': 's', 'base_score': base_score } # 打开文件 reprot_array=[] test_suit_array=[] test_suit_obj={} test_cases_array=[] test_cases_obj={} test_cases_obj["Name"] = config["task_name"] test_cases_obj["Performance Unit"] = config["performance_unit"] test_cases_obj["Total Duration"] = time.time() - ds.last_loaded test_cases_obj["Train Duration"] = None test_cases_obj["Training Info"] = { "Real Quality" :None, "Learning Rate" :None, "Base Quality" :None, "Epochs" :None, "Optimizer" :None } test_cases_obj["Software Versions"] = { "Python" :3.8, "Framework" :"Mindspore 2.2.14" } percentiles = [50., 80., 90., 95., 99., 99.9] buckets = np.percentile(last_timeing, percentiles).tolist() took = time.time() - ds.last_loaded qps = len(last_timeing) / took print(buckets) if scenario_str=="SingleStream": test_cases_obj["Performance Metric"] = buckets[2] if scenario_str=="MultiStream": test_cases_obj["Performance Metric"] = buckets[4] if scenario_str=="Server": test_cases_obj["Performance Metric"] = qps if scenario_str=="Offline": test_cases_obj["Performance Metric"] = qps score = get_score(NormMetric, final_results["auc"], test_cases_obj["Performance Metric"]) test_cases_obj["Score"] = score test_cases_array.append(test_cases_obj) test_suit_obj["Test Cases"] = test_cases_array test_suit_obj["Name"] = "Inference Suite" test_suit_array.append(test_suit_obj) test_obj = {"Test Suites": test_suit_array} reprot_array.append(test_obj) test_suit_result_obj = {"Name":"Inference Suite","Description":"inference model","Score":score } test_suit_result_array = [] test_suit_result_array.append(test_suit_result_obj) test_suit_result_obj1 = {"Test Suites Results":test_suit_result_array} reprot_array.append(test_suit_result_obj1) epoch_obj={ "epoch":None, "epoch_time":None, "train_loss":None, "metric":None, "metric_name":None } reprot_array.append(epoch_obj) result_final = {"Report Info": reprot_array } print("result_final", result_final) # task.save(result_final) if __name__ == "__main__": # try: # if task.config["is_run_infer"] == True: # main() # task.close() # else: # task.close() # raise ValueError # except Exception as e: # task.logger.error(e) # task.close(e) # task.logger.info("Finish ") # if task.config["is_run_infer"] == True: # print(config) main() # task.close()介绍一下这段代码
最新发布
06-17
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值