首先下载diffusion_policy代码:https://github.com/real-stanford/diffusion_policy/tree/main
修改diffusion_policy/policy/diffusion_unet_lowdim_policy.py:
from typing import Dict
import torch
import torch.nn as nn
import torch.nn.functional as F
from einops import rearrange, reduce
from diffusers.schedulers.scheduling_ddpm import DDPMScheduler
from diffusion_policy.model.common.normalizer import LinearNormalizer
from diffusion_policy.policy.base_lowdim_policy import BaseLowdimPolicy
from diffusion_policy.model.diffusion.conditional_unet1d import ConditionalUnet1D
from diffusion_policy.model.diffusion.mask_generator import LowdimMaskGenerator
class DiffusionUnetLowdimPolicy(BaseLowdimPolicy):
def __init__(self,
model: ConditionalUnet1D,
noise_scheduler: DDPMScheduler,
horizon,
obs_dim,
action_dim,
n_action_steps,
n_obs_steps,
num_inference_steps=None,
obs_as_local_cond=False,
obs_as_global_cond=False,
pred_action_steps_only=False,
oa_step_convention=False,
# parameters passed to step
**kwargs):
super().__init__()
assert not (obs_as_local_cond and obs_as_global_cond)
if pred_action_steps_only:
assert obs_as_global_cond
self.model = model
self.noise_scheduler = noise_scheduler
self.mask_generator = LowdimMaskGenerator(
action_dim=action_dim,
obs_dim=0 if (obs_as_local_cond or obs_as_global_cond) else obs_dim,
max_n_obs_steps=n_obs_steps,
fix_obs_steps=True,
action_visible=False
)
self.normalizer = LinearNormalizer()
self.horizon = horizon
self.obs_dim = obs_dim
self.action_dim = action_dim
self.n_action_steps = n_action_steps
self.n_obs_steps = n_obs_steps
self.obs_as_local_cond = obs_as_local_cond
self.obs_as_global_cond = obs_as_global_cond
self.pred_action_steps_only = pred_action_steps_only
self.oa_step_convention = oa_step_convention
self.kwargs = kwargs
if num_inference_steps is None:
num_inference_steps = noise_scheduler.config.num_train_timesteps
self.num_inference_steps = num_inference_steps
# ========= inference ============
def conditional_sample(self,
condition_data, condition_mask,
local_cond=None, global_cond=None,
generator=None,
# keyword arguments to scheduler.step
**kwargs
):
model = self.model
scheduler = self.noise_scheduler
trajectory = torch.randn(
size=condition_data.shape,
dtype=condition_data.dtype,
device=condition_data.device,
generator=generator)
# set step values
scheduler.set_timesteps(self.num_inference_steps)
for t in scheduler.timesteps:
print(t)
# 1. apply conditioning
#trajectory[condition_mask] = condition_data[condition_mask]
trajectory = torch.where(condition_mask == 1,condition_data, trajectory)
# 2. predict model output
model_output = model(trajectory, t,
local_cond=local_cond, global_cond=global_cond)
# 3. compute previous image: x_t -> x_t-1
trajectory = scheduler.step(
model_output, t, trajectory,
generator=generator,
**kwargs
).prev_sample
# finally make sure conditioning is enforced
#trajectory[condition_mask] = condition_data[condition_mask]
trajectory = torch.where(condition_mask == 1,condition_data, trajectory)
return trajectory
def predict_action(self, obs_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
"""
obs_dict: must include "obs" key
result: must include "action" key
"""
assert 'obs' in obs_dict
assert 'past_action' not in obs_dict # not implemented yet
nobs = self.normalizer['obs'].normalize(obs_dict['obs'])
B, _, Do = nobs.shape
To = self.n_obs_steps
assert Do == self.obs_dim
T = self.horizon
Da = self.action_dim
# build input
device = self.device
dtype = self.dtype
# handle different ways of passing observation
local_cond = None
global_cond = None
if self.obs_as_local_cond:
# condition through local feature
# all zero except first To timesteps
local_cond = torch.zeros(size=(B,T,Do), device=device, dtype=dtype)
local_cond[:,:To] = nobs[:,:To]
shape = (B, T, Da)
cond_data = torch.zeros(size=shape, device=device, dtype=dtype)
cond_mask = torch.zeros_like(cond_data, dtype=torch.bool)
elif self.obs_as_global_cond:
# condition throught global feature
global_cond = nobs[:,:To].reshape(nobs.shape[0], -1)
shape = (B, T, Da)
if self.pred_action_steps_only:
shape = (B, self.n_action_steps, Da)
cond_data = torch.zeros(size=shape, device=device, dtype=dtype)
cond_mask = torch.zeros_like(cond_data, dtype=torch.bool)
else:
# condition through impainting
shape = (B, T, Da+Do)
cond_data = torch.zeros(size=shape, device=device, dtype=dtype)
cond_mask = torch.zeros_like(cond_data, dtype=torch.bool)
cond_data[:,:To,Da:] = nobs[:,:To]
cond_mask[:,:To,Da:] = True
# run sampling
nsample = self.conditional_sample(
cond_data,
cond_mask,
local_cond=local_cond,
global_cond=global_cond,
**self.kwargs)
# unnormalize prediction
naction_pred = nsample[...,:Da]
action_pred = self.normalizer['action'].unnormalize(naction_pred)
# get action
if self.pred_action_steps_only:
action = action_pred
else:
start = To
if self.oa_step_convention:
start = To - 1
end = start + self.n_action_steps
action = action_pred[:,start:end]
result = {
'action': action,
'action_pred': action_pred
}
if not (self.obs_as_local_cond or self.obs_as_global_cond):
nobs_pred = nsample[...,Da:]
obs_pred = self.normalizer['obs'].unnormalize(nobs_pred)
action_obs_pred = obs_pred[:,start:end]
result['action_obs_pred'] = action_obs_pred
result['obs_pred'] = obs_pred
return result
def forward(self, obs, obs_mask):
"""
obs_dict: must include "obs" key
result: must include "action" key
"""
obs_dict = {'obs':obs, 'obs_mask':obs_mask}
nobs = self.normalizer['obs'].normalize(obs_dict['obs'])
B, _, Do = nobs.shape
To = self.n_obs_steps
assert Do == self.obs_dim
T = self.horizon
Da = self.action_dim
# build input
device = self.device
dtype = self.dtype
# handle different ways of passing observation
local_cond = None
global_cond = None
if self.obs_as_local_cond:
# condition through local feature
# all zero except first To timesteps
local_cond = torch.zeros(size=(B,T,Do), device=device, dtype=dtype)
local_cond[:,:To] = nobs[:,:To]
shape = (B, T, Da)
cond_data = torch.zeros(size=shape, device=device, dtype=dtype)
cond_mask = torch.zeros_like(cond_data, dtype=torch.bool)
elif self.obs_as_global_cond:
# condition throught global feature
global_cond = nobs[:,:To].reshape(nobs.shape[0], -1)
shape = (B, T, Da)
if self.pred_action_steps_only:
shape = (B, self.n_action_steps, Da)
cond_data = torch.zeros(size=shape, device=device, dtype=dtype)
cond_mask = torch.zeros_like(cond_data, dtype=torch.bool)
else:
# condition through impainting
shape = (B, T, Da+Do)
cond_data = torch.zeros(size=shape, device=device, dtype=dtype)
cond_mask = torch.zeros_like(cond_data, dtype=torch.bool)
cond_data[:,:To,Da:] = nobs[:,:To]
cond_mask[:,:To,Da:] = True
# run sampling
nsample = self.conditional_sample(
cond_data,
cond_mask,
local_cond=local_cond,
global_cond=global_cond,
**self.kwargs)
# unnormalize prediction
naction_pred = nsample[...,:Da]
action_pred = self.normalizer['action'].unnormalize(naction_pred)
# get action
if self.pred_action_steps_only:
action = action_pred
else:
start = To
if self.oa_step_convention:
start = To - 1
end = start + self.n_action_steps
action = action_pred[:,start:end]
result = {
'action': action,
'action_pred': action_pred
}
if not (self.obs_as_local_cond or self.obs_as_global_cond):
nobs_pred = nsample[...,Da:]
obs_pred = self.normalizer['obs'].unnormalize(nobs_pred)
action_obs_pred = obs_pred[:,start:end]
result['action_obs_pred'] = action_obs_pred
result['obs_pred'] = obs_pred
return action, action_pred, action_obs_pred, obs_pred
# ========= training ============
def set_normalizer(self, normalizer: LinearNormalizer):
self.normalizer.load_state_dict(normalizer.state_dict())
def compute_loss(self, batch):
# normalize input
assert 'valid_mask' not in batch
nbatch = self.normalizer.normalize(batch)
obs = nbatch['obs']
action = nbatch['action']
# handle different ways of passing observation
local_cond = None
global_cond = None
trajectory = action
if self.obs_as_local_cond:
# zero out observations after n_obs_steps
local_cond = obs
local_cond[:,self.n_obs_steps:,:] = 0
elif self.obs_as_global_cond:
global_cond = obs[:,:self.n_obs_steps,:].reshape(
obs.shape[0], -1)
if self.pred_action_steps_only:
To = self.n_obs_steps
start = To
if self.oa_step_convention:
start = To - 1
end = start + self.n_action_steps
trajectory = action[:,start:end]
else:
trajectory = torch.cat([action, obs], dim=-1)
# generate impainting mask
if self.pred_action_steps_only:
condition_mask = torch.zeros_like(trajectory, dtype=torch.bool)
else:
condition_mask = self.mask_generator(trajectory.shape)
# Sample noise that we'll add to the images
noise = torch.randn(trajectory.shape, device=trajectory.device)
bsz = trajectory.shape[0]
# Sample a random timestep for each image
timesteps = torch.randint(
0, self.noise_scheduler.config.num_train_timesteps,
(bsz,), device=trajectory.device
).long()
# Add noise to the clean images according to the noise magnitude at each timestep
# (this is the forward diffusion process)
noisy_trajectory = self.noise_scheduler.add_noise(
trajectory, noise, timesteps)
# compute loss mask
loss_mask = ~condition_mask
# apply conditioning
noisy_trajectory[condition_mask] = trajectory[condition_mask]
# Predict the noise residual
pred = self.model(noisy_trajectory, timesteps,
local_cond=local_cond, global_cond=global_cond)
pred_type = self.noise_scheduler.config.prediction_type
if pred_type == 'epsilon':
target = noise
elif pred_type == 'sample':
target = trajectory
else:
raise ValueError(f"Unsupported prediction type {pred_type}")
loss = F.mse_loss(pred, target, reduction='none')
loss = loss * loss_mask.type(loss.dtype)
loss = reduce(loss, 'b ... -> b (...)', 'mean')
loss = loss.mean()
return loss
编写脚本导出onnx模型:
import torch
import hydra
import dill
from diffusion_policy.workspace.base_workspace import BaseWorkspace
checkpoint = "data/0550-test_mean_score=0.969.ckpt"
output_dir = "data/pusht_eval_output"
payload = torch.load(open(checkpoint, 'rb'), pickle_module=dill)
cfg = payload['cfg']
cls = hydra.utils.get_class(cfg._target_)
workspace = cls(cfg, output_dir=output_dir)
workspace: BaseWorkspace
workspace.load_payload(payload, exclude_keys=None, include_keys=None)
policy = workspace.model
policy = policy.to("cuda")
obs = torch.randn(56, 2, 20).to("cuda")
obs_mask = torch.randn(56, 2, 20).to("cuda")
torch.onnx.register_custom_op_symbolic("aten::lift_fresh", lambda g, x: x, 13)
torch.onnx.export(policy, (obs, obs_mask), "model.onnx", opset_version=13)
onnxruntime推理脚本:
import numpy as np
import onnxruntime
onnx_session = onnxruntime.InferenceSession("model.onnx", providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
input_name = []
for node in onnx_session.get_inputs():
input_name.append(node.name)
output_name = []
for node in onnx_session.get_outputs():
output_name.append(node.name)
inputs = {}
inputs['x.1'] = np.random.randn(56, 2, 20).astype(np.float32)
outputs = onnx_session.run(None, inputs)
print(outputs)
tensorrt推理脚本(tensorrt版本>10):
import numpy as np
import tensorrt as trt
import common
logger = trt.Logger(trt.Logger.WARNING)
with open("model.engine", "rb") as f, trt.Runtime(logger) as runtime:
engine = runtime.deserialize_cuda_engine(f.read())
context = engine.create_execution_context()
inputs, outputs, bindings, stream = common.allocate_buffers(engine)
input = np.random.randn(56, 2, 20).astype(np.float32)
np.copyto(inputs[0].host, input.ravel())
output = common.do_inference(context,engine=engine, bindings=bindings,inputs=inputs, outputs=outputs, stream=stream,)
print(output)
其中common.py:
#
# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import argparse
import os
import ctypes
from typing import Optional, List
import numpy as np
import tensorrt as trt
from cuda import cuda, cudart
try:
# Sometimes python does not understand FileNotFoundError
FileNotFoundError
except NameError:
FileNotFoundError = IOError
EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
def check_cuda_err(err):
if isinstance(err, cuda.CUresult):
if err != cuda.CUresult.CUDA_SUCCESS:
raise RuntimeError("Cuda Error: {}".format(err))
if isinstance(err, cudart.cudaError_t):
if err != cudart.cudaError_t.cudaSuccess:
raise RuntimeError("Cuda Runtime Error: {}".format(err))
else:
raise RuntimeError("Unknown error type: {}".format(err))
def cuda_call(call):
err, res = call[0], call[1:]
check_cuda_err(err)
if len(res) == 1:
res = res[0]
return res
def GiB(val):
return val * 1 << 30
def add_help(description):
parser = argparse.ArgumentParser(description=description, formatter_class=argparse.ArgumentDefaultsHelpFormatter)
args, _ = parser.parse_known_args()
def find_sample_data(description="Runs a TensorRT Python sample", subfolder="", find_files=[], err_msg=""):
"""
Parses sample arguments.
Args:
description (str): Description of the sample.
subfolder (str): The subfolder containing data relevant to this sample
find_files (str): A list of filenames to find. Each filename will be replaced with an absolute path.
Returns:
str: Path of data directory.
"""
# Standard command-line arguments for all samples.
kDEFAULT_DATA_ROOT = os.path.join(os.sep, "usr", "src", "tensorrt", "data")
parser = argparse.ArgumentParser(description=description, formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument(
"-d",
"--datadir",
help="Location of the TensorRT sample data directory, and any additional data directories.",
action="append",
default=[kDEFAULT_DATA_ROOT],
)
args, _ = parser.parse_known_args()
def get_data_path(data_dir):
# If the subfolder exists, append it to the path, otherwise use the provided path as-is.
data_path = os.path.join(data_dir, subfolder)
if not os.path.exists(data_path):
if data_dir != kDEFAULT_DATA_ROOT:
print("WARNING: " + data_path + " does not exist. Trying " + data_dir + " instead.")
data_path = data_dir
# Make sure data directory exists.
if not (os.path.exists(data_path)) and data_dir != kDEFAULT_DATA_ROOT:
print(
"WARNING: {:} does not exist. Please provide the correct data path with the -d option.".format(
data_path
)
)
return data_path
data_paths = [get_data_path(data_dir) for data_dir in args.datadir]
return data_paths, locate_files(data_paths, find_files, err_msg)
def locate_files(data_paths, filenames, err_msg=""):
"""
Locates the specified files in the specified data directories.
If a file exists in multiple data directories, the first directory is used.
Args:
data_paths (List[str]): The data directories.
filename (List[str]): The names of the files to find.
Returns:
List[str]: The absolute paths of the files.
Raises:
FileNotFoundError if a file could not be located.
"""
found_files = [None] * len(filenames)
for data_path in data_paths:
# Find all requested files.
for index, (found, filename) in enumerate(zip(found_files, filenames)):
if not found:
file_path = os.path.abspath(os.path.join(data_path, filename))
if os.path.exists(file_path):
found_files[index] = file_path
# Check that all files were found
for f, filename in zip(found_files, filenames):
if not f or not os.path.exists(f):
raise FileNotFoundError(
"Could not find {:}. Searched in data paths: {:}\n{:}".format(filename, data_paths, err_msg)
)
return found_files
class HostDeviceMem:
"""Pair of host and device memory, where the host memory is wrapped in a numpy array"""
def __init__(self, size: int, dtype: np.dtype):
nbytes = size * dtype.itemsize
host_mem = cuda_call(cudart.cudaMallocHost(nbytes))
pointer_type = ctypes.POINTER(np.ctypeslib.as_ctypes_type(dtype))
self._host = np.ctypeslib.as_array(ctypes.cast(host_mem, pointer_type), (size,))
self._device = cuda_call(cudart.cudaMalloc(nbytes))
self._nbytes = nbytes
@property
def host(self) -> np.ndarray:
return self._host
@host.setter
def host(self, arr: np.ndarray):
if arr.size > self.host.size:
raise ValueError(
f"Tried to fit an array of size {arr.size} into host memory of size {self.host.size}"
)
np.copyto(self.host[:arr.size], arr.flat, casting='safe')
@property
def device(self) -> int:
return self._device
@property
def nbytes(self) -> int:
return self._nbytes
def __str__(self):
return f"Host:\n{self.host}\nDevice:\n{self.device}\nSize:\n{self.nbytes}\n"
def __repr__(self):
return self.__str__()
def free(self):
cuda_call(cudart.cudaFree(self.device))
cuda_call(cudart.cudaFreeHost(self.host.ctypes.data))
# Allocates all buffers required for an engine, i.e. host/device inputs/outputs.
# If engine uses dynamic shapes, specify a profile to find the maximum input & output size.
def allocate_buffers(engine: trt.ICudaEngine, profile_idx: Optional[int] = None):
inputs = []
outputs = []
bindings = []
stream = cuda_call(cudart.cudaStreamCreate())
tensor_names = [engine.get_tensor_name(i) for i in range(engine.num_io_tensors)]
for binding in tensor_names:
# get_tensor_profile_shape returns (min_shape, optimal_shape, max_shape)
# Pick out the max shape to allocate enough memory for the binding.
shape = engine.get_tensor_shape(binding) if profile_idx is None else engine.get_tensor_profile_shape(binding, profile_idx)[-1]
shape_valid = np.all([s >= 0 for s in shape])
if not shape_valid and profile_idx is None:
raise ValueError(f"Binding {binding} has dynamic shape, " +\
"but no profile was specified.")
size = trt.volume(shape)
if engine.has_implicit_batch_dimension:
size *= engine.max_batch_size
dtype = np.dtype(trt.nptype(engine.get_tensor_dtype(binding)))
# Allocate host and device buffers
bindingMemory = HostDeviceMem(size, dtype)
# Append the device buffer to device bindings.
bindings.append(int(bindingMemory.device))
# Append to the appropriate list.
if engine.get_tensor_mode(binding) == trt.TensorIOMode.INPUT:
inputs.append(bindingMemory)
else:
outputs.append(bindingMemory)
return inputs, outputs, bindings, stream
# Frees the resources allocated in allocate_buffers
def free_buffers(inputs: List[HostDeviceMem], outputs: List[HostDeviceMem], stream: cudart.cudaStream_t):
for mem in inputs + outputs:
mem.free()
cuda_call(cudart.cudaStreamDestroy(stream))
# Wrapper for cudaMemcpy which infers copy size and does error checking
def memcpy_host_to_device(device_ptr: int, host_arr: np.ndarray):
nbytes = host_arr.size * host_arr.itemsize
cuda_call(cudart.cudaMemcpy(device_ptr, host_arr, nbytes, cudart.cudaMemcpyKind.cudaMemcpyHostToDevice))
# Wrapper for cudaMemcpy which infers copy size and does error checking
def memcpy_device_to_host(host_arr: np.ndarray, device_ptr: int):
nbytes = host_arr.size * host_arr.itemsize
cuda_call(cudart.cudaMemcpy(host_arr, device_ptr, nbytes, cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost))
def _do_inference_base(inputs, outputs, stream, execute_async):
# Transfer input data to the GPU.
kind = cudart.cudaMemcpyKind.cudaMemcpyHostToDevice
[cuda_call(cudart.cudaMemcpyAsync(inp.device, inp.host, inp.nbytes, kind, stream)) for inp in inputs]
# Run inference.
execute_async()
# Transfer predictions back from the GPU.
kind = cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost
[cuda_call(cudart.cudaMemcpyAsync(out.host, out.device, out.nbytes, kind, stream)) for out in outputs]
# Synchronize the stream
cuda_call(cudart.cudaStreamSynchronize(stream))
# Return only the host outputs.
return [out.host for out in outputs]
def do_inference(context, engine, bindings, inputs, outputs, stream):
def execute_async_func():
context.execute_async_v3(stream_handle=stream)
# Setup context tensor address.
num_io = engine.num_io_tensors
for i in range(num_io):
context.set_tensor_address(engine.get_tensor_name(i), bindings[i])
return _do_inference_base(inputs, outputs, stream, execute_async_func)

552

被折叠的 条评论
为什么被折叠?



