Mac's gray box

Mac'sgraybox样式实现

Mac's gray box


code:

<style type="text/css">
div.quote{
    margin:5px 20px;
    border:1px solid #999999;
    padding:5px;
    background:#dddddd;
    line-height:normal;
}
</style>

<br><div class="quote"> Mac's gray box</div>

我需要 彩色的,点云重建得好的。代码:import os import cv2 import numpy as np import matplotlib.pyplot as plt import argparse import trimesh from tqdm import tqdm from typing import Tuple import time def normalize_disparity_map(disparity_map): """Normalize disparity map for visualization, clip negative values.""" disparity_valid = disparity_map[disparity_map > 0] if len(disparity_valid) == 0: return np.zeros_like(disparity_map) d_min, d_max = disparity_valid.min(), disparity_valid.max() normalized = np.zeros_like(disparity_map, dtype=np.float32) if d_max > d_min: normalized = (disparity_map - d_min) / (d_max - d_min) normalized[disparity_map <= 0] = 0 return normalized def visualize_disparity_map(disparity_map, gt_map, save_path=None): """ Visualize or save the estimated and ground truth disparity maps side-by-side. Use JET colormap for better visual interpretation (colorful). """ # Normalize both maps disp_norm = normalize_disparity_map(disparity_map) gt_norm = normalize_disparity_map(gt_map) # Apply jet colormap -> (H, W, 3) disp_color = plt.cm.jet(disp_norm)[:, :, :3] # Remove alpha gt_color = plt.cm.jet(gt_norm)[:, :, :3] # Concatenate horizontally: [result | GT] concat_color = np.hstack([disp_color, gt_color]) if save_path is None: plt.figure(figsize=(12, 5)) plt.imshow(concat_color) plt.axis('off') plt.title("Estimated Disparity (left) vs Ground Truth (right)") plt.show() else: os.makedirs(os.path.dirname(save_path), exist_ok=True) plt.imsave(save_path, concat_color, cmap='jet') # Save as colorful PNG print(f"Disparity map saved to {save_path}") def task1_compute_disparity_map_simple( ref_img: np.ndarray, sec_img: np.ndarray, window_size: int, disparity_range: Tuple[int, int], matching_function: str ): """ Compute disparity map using simple window-based matching. Uses OpenCV's boxFilter for fast cost aggregation. Returns a single-channel float32 disparity map. """ ref = ref_img.astype(np.float32) sec = sec_img.astype(np.float32) H, W = ref.shape dmin, dmax = disparity_range if dmin < 0: raise ValueError("min_disparity should be >= 0.") disparities = list(range(dmin, dmax + 1)) ksize = (window_size, window_size) N = float(window_size * window_size) eps = 1e-6 cost_volume = np.empty((len(disparities), H, W), dtype=np.float32) for idx, d in enumerate(disparities): shifted_sec = np.zeros_like(sec) if d == 0: shifted_sec = sec.copy() else: shifted_sec[:, d:] = sec[:, :-d] if matching_function == "SSD": diff = ref - shifted_sec cost = cv2.boxFilter(diff * diff, -1, ksize, normalize=False, borderType=cv2.BORDER_REFLECT101) elif matching_function == "SAD": diff = np.abs(ref - shifted_sec) cost = cv2.boxFilter(diff, -1, ksize, normalize=False, borderType=cv2.BORDER_REFLECT101) elif matching_function == "normalized_correlation": sumI = cv2.boxFilter(ref, -1, ksize, normalize=False, borderType=cv2.BORDER_REFLECT101) sumJ = cv2.boxFilter(shifted_sec, -1, ksize, normalize=False, borderType=cv2.BORDER_REFLECT101) sumI2 = cv2.boxFilter(ref * ref, -1, ksize, normalize=False, borderType=cv2.BORDER_REFLECT101) sumJ2 = cv2.boxFilter(shifted_sec * shifted_sec, -1, ksize, normalize=False, borderType=cv2.BORDER_REFLECT101) sumIJ = cv2.boxFilter(ref * shifted_sec, -1, ksize, normalize=False, borderType=cv2.BORDER_REFLECT101) num = sumIJ - (sumI * sumJ) / N den = np.sqrt(np.maximum(sumI2 - sumI*sumI/N, 0)) * np.sqrt(np.maximum(sumJ2 - sumJ*sumJ/N, 0)) + eps ncc = num / den cost = -ncc # Maximize NCC => Minimize (-NCC) else: raise ValueError(f"Unknown matching function: {matching_function}") if d > 0: cost[:, :d] = np.inf cost_volume[idx] = cost best_idx = np.argmin(cost_volume, axis=0) disparity_map = np.take(np.array(disparities, dtype=np.float32), best_idx) best_cost = np.min(cost_volume, axis=0) disparity_map[~np.isfinite(best_cost)] = 0.0 return disparity_map def task1_simple_disparity(ref_img, sec_img, gt_map, img_name='tsukuba'): """ Run Task 1: test different configurations of window size, disparity range, and matching function. Save all results as colorful disparity images and log runtime. """ window_sizes = [5, 9, 15] disparity_range = (0, 64) matching_functions = ['SSD', 'SAD', 'normalized_correlation'] disparity_maps = [] print(f"🚀 Starting Task 1: Testing multiple configurations on '{img_name}'...") for window_size in window_sizes: for matching_function in matching_functions: start_time = time.time() print(f"⚙️ Computing: ws={window_size}, func={matching_function}") disparity_map = task1_compute_disparity_map_simple( ref_img, sec_img, window_size, disparity_range, matching_function) runtime = time.time() - start_time print(f"⏱️ Done in {runtime:.2f}s") disparity_maps.append((disparity_map.copy(), window_size, matching_function, disparity_range)) # Save path with full info dmin, dmax = disparity_range save_path = f"output/task1_{img_name}_{window_size}_{dmin}_{dmax}_{matching_function}.png" visualize_disparity_map(disparity_map, gt_map, save_path=save_path) # Log runtime for report with open("output/runtime_log.txt", "a") as f: f.write(f"Task1,{img_name},{window_size},{dmin},{dmax},{matching_function},{runtime:.4f}\n") return disparity_maps def task2_compute_depth_map(disparity_map, baseline=0.2, focal_length=615): """ Convert disparity to depth using z = fB / d Ignore zero/negative disparities. """ depth_map = np.zeros_like(disparity_map, dtype=np.float32) valid = disparity_map > 0 depth_map[valid] = (focal_length * baseline) / (disparity_map[valid] + 1e-8) return depth_map def task2_visualize_pointcloud( ref_img_bgr: np.ndarray, disparity_map: np.ndarray, save_path: str = 'output/task2_tsukuba.ply' ): """ Generate a FULLY COLORFUL 3D point cloud from disparity. - X,Y: pixel coordinates - Z: depth derived from disparity - Color: true RGB from reference image (convert BGR → RGB) Filters out invalid depths and extreme outliers. """ # Calibration parameters (Tsukuba dataset typical values) baseline = 0.2 # meters focal_length = 615 # pixels depth_map = task2_compute_depth_map(disparity_map, baseline, focal_length) # Remove infinite/nan and clip top 1% valid = np.isfinite(depth_map) & (depth_map > 0) if np.sum(valid) == 0: print("⚠️ No valid depth points found!") return # Clip far outliers max_depth = np.percentile(depth_map[valid], 99) valid &= (depth_map <= max_depth) H, W = depth_map.shape xs, ys = np.meshgrid(np.arange(W), np.arange(H), indexing='xy') points = np.stack([xs[valid], ys[valid], depth_map[valid]], axis=1) # Convert BGR to RGB and extract colors ref_rgb = cv2.cvtColor(ref_img_bgr, cv2.COLOR_BGR2RGB) colors = ref_rgb[valid].reshape(-1, 3).astype(np.uint8) # Create and save PLY pc = trimesh.PointCloud(vertices=points, colors=colors) os.makedirs(os.path.dirname(save_path), exist_ok=True) pc.export(save_path, file_type='ply') print(f"🎨 Colored point cloud saved to {save_path}") def task3_compute_disparity_map_dp(ref_img, sec_img): """ Dynamic Programming based stereo matching along horizontal scanlines. Handles occlusions via penalty terms. """ ref = ref_img.astype(np.float32) sec = sec_img.astype(np.float32) H, W = ref.shape occlusion_penalty = 20.0 max_disparity = 64 disparity_map = np.zeros((H, W), dtype=np.float32) t0 = time.perf_counter() for r in tqdm(range(H), desc="DP Scanline Processing"): L = ref[r] R = sec[r] dp = np.full((W+1, W+1), np.inf, dtype=np.float32) move = np.zeros((W+1, W+1), dtype=np.uint8) # 0=match, 1=occL, 2=occR dp[0, 0] = 0.0 for i in range(1, W+1): dp[i, 0] = dp[i-1, 0] + occlusion_penalty move[i, 0] = 1 for i in range(1, W+1): li = L[i-1] for j in range(1, i+1): # Match only if within max_disparity disp = i - j if disp <= max_disparity: c_match = dp[i-1, j-1] + abs(li - R[j-1]) else: c_match = np.inf c_occL = dp[i-1, j] + occlusion_penalty c_occR = dp[i, j-1] + occlusion_penalty if c_match <= c_occL and c_match <= c_occR: dp[i, j] = c_match move[i, j] = 0 elif c_occL <= c_occR: dp[i, j] = c_occL move[i, j] = 1 else: dp[i, j] = c_occR move[i, j] = 2 # Backtrack i, j = W, W while i > 0 or j > 0: m = move[i, j] if i > 0 and j > 0 and m == 0: disparity_map[r, i-1] = float(i - j) i -= 1 j -= 1 elif i > 0 and (j == 0 or m == 1): disparity_map[r, i-1] = 0.0 i -= 1 else: j -= 1 t1 = time.perf_counter() print(f"[Task3] DP runtime: {t1 - t0:.3f}s") return disparity_map def main(tasks): # Load images try: moebius_img1 = cv2.imread("data/moebius1.png") moebius_img1_gray = cv2.cvtColor(moebius_img1, cv2.COLOR_BGR2GRAY).astype(np.float32) moebius_img2 = cv2.imread("data/moebius2.png") moebius_img2_gray = cv2.cvtColor(moebius_img2, cv2.COLOR_BGR2GRAY).astype(np.float32) moebius_gt = cv2.imread("data/moebius_gt.png", cv2.IMREAD_GRAYSCALE).astype(np.float32) except Exception as e: print("Moebius data not available:", e) tsukuba_img1 = cv2.imread("data/tsukuba1.jpg") tsukuba_img1_gray = cv2.cvtColor(tsukuba_img1, cv2.COLOR_BGR2GRAY).astype(np.float32) tsukuba_img2 = cv2.imread("data/tsukuba2.jpg") tsukuba_img2_gray = cv2.cvtColor(tsukuba_img2, cv2.COLOR_BGR2GRAY).astype(np.float32) tsukuba_gt = cv2.imread("data/tsukuba_gt.jpg", cv2.IMREAD_GRAYSCALE).astype(np.float32) # Ensure output directory exists os.makedirs("output", exist_ok=True) # Clear or init runtime log with open("output/runtime_log.txt", "a") as f: f.write("Method,Image,WindowSize,dMin,dMax,MatchingFunction,Runtime(s)\n") # Task 0: OpenCV Baseline if '0' in tasks: print('🔧 Running Task 0: OpenCV StereoBM baseline...') stereo = cv2.StereoBM.create(numDisparities=64, blockSize=15) tsukuba_disparity_cv2 = stereo.compute(tsukuba_img1_gray.astype(np.uint8), tsukuba_img2_gray.astype(np.uint8)).astype(np.float32) tsukuba_disparity_cv2[tsukuba_disparity_cv2 < 0] = 0 visualize_disparity_map(tsukuba_disparity_cv2, tsukuba_gt, save_path="output/task0_tsukuba_colormap.png") if '2' in tasks: task2_visualize_pointcloud(tsukuba_img1, tsukuba_disparity_cv2, save_path='output/task2_tsukuba_cv2.ply') # Task 1: Simple Matching if '1' in tasks: print('🔍 Running Task 1: Window-based Matching with Multiple Settings...') start_time = time.time() disparity_maps = task1_simple_disparity(tsukuba_img1_gray, tsukuba_img2_gray, tsukuba_gt, img_name='tsukuba') total_time = time.time() - start_time print(f"🏁 Task 1 completed in {total_time:.2f}s") if '2' in tasks: print('🎨 Generating colored point clouds for each Task 1 result...') for dm, ws, mf, dr in disparity_maps: dmin, dmax = dr path = f'output/task2_tsukuba_{ws}_{dmin}_{dmax}_{mf}.ply' task2_visualize_pointcloud(tsukuba_img1, dm, save_path=path) # Task 3: DP Matching if '3' in tasks: print('⚡ Running Task 3: Dynamic Programming Matching...') start_time = time.time() tsukuba_disparity_dp = task3_compute_disparity_map_dp(tsukuba_img1_gray, tsukuba_img2_gray) runtime = time.time() - start_time print(f"⏱️ Task 3 runtime: {runtime:.2f}s") with open("output/runtime_log.txt", "a") as f: f.write(f"Task3,tsukuba,DP,0,64,DP,{runtime:.4f}\n") visualize_disparity_map(tsukuba_disparity_dp, tsukuba_gt, save_path='output/task3_tsukuba_colormap.png') if '2' in tasks: task2_visualize_pointcloud(tsukuba_img1, tsukuba_disparity_dp, save_path='output/task2_tsukuba_dp.ply') if __name__ == '__main__': parser = argparse.ArgumentParser(description='Homework 4: Binocular Stereo (FULL COLOR VERSION)') parser.add_argument('--tasks', type=str, default='0123', help='Tasks to run: e.g., 0, 12, 023') args = parser.parse_args() main(args.tasks) 。 严格按照题目要求,不要修改原代码
11-28
我的代码长这样:import numpy as np from scipy import ndimage, spatial import cv2 import os import matplotlib.pyplot as plt ROOT_DIR = ‘/Users/mac/Desktop/Problem_Set_2-3’ IMGDIR = os.path.join(ROOT_DIR, ‘Problem2Images’) OUTPUT_DIR = os.path.join(ROOT_DIR, ‘outputs’) os.makedirs(OUTPUT_DIR, exist_ok=True) def gradient_x(img): if len(img.shape) == 3: gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) else: gray = img gray = ndimage.gaussian_filter(gray.astype(np.float32), sigma=1) grad_x = ndimage.sobel(gray, axis=1, mode=‘reflect’) return grad_x def gradient_y(img): if len(img.shape) == 3: gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) else: gray = img gray = ndimage.gaussian_filter(gray.astype(np.float32), sigma=1) grad_y = ndimage.sobel(gray, axis=0, mode=‘reflect’) return grad_y def harris_response(img, alpha, win_size): if img is None: raise ValueError(“输入图像为空”) if len(img.shape) == 3: gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) else: gray = img grad_x = cv2.Sobel(gray, cv2.CV_32F, 1, 0, ksize=3) grad_y = cv2.Sobel(gray, cv2.CV_32F, 0, 1, ksize=3) Ixx = grad_x * grad_x Ixy = grad_x * grad_y Iyy = grad_y * grad_y sigma = win_size / 4 A = cv2.GaussianBlur(Ixx, (win_size, win_size), sigma) B = cv2.GaussianBlur(Ixy, (win_size, win_size), sigma) C = cv2.GaussianBlur(Iyy, (win_size, win_size), sigma) det_M = A * C - B * B trace_M = A + C R = det_M - alpha * (trace_M ** 2) return R def corner_selection(R, thresh, min_dist): size = min_dist * 2 + 1 R_max = ndimage.maximum_filter(R, size=size) mask = (R == R_max) & (R > thresh * 0.08) corners_y, corners_x = np.where(mask) corner_responses = R[corners_y, corners_x] sorted_indices = np.argsort(corner_responses)[::-1] max_corners = min(2000, len(sorted_indices)) selected_indices = sorted_indices[:max_corners] corners_x = corners_x[selected_indices] corners_y = corners_y[selected_indices] pix = list(zip(corners_x, corners_y)) return pix def histogram_of_gradients(img, pix): if img is None: raise ValueError(“输入图像为空”) if len(img.shape) == 3: gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) else: gray = img grad_x = cv2.Sobel(gray, cv2.CV_32F, 1, 0, ksize=3) grad_y = cv2.Sobel(gray, cv2.CV_32F, 0, 1, ksize=3) grad_mag = np.sqrt(grad_x2 + grad_y2) grad_ori = np.arctan2(grad_y, grad_x) * 180 / np.pi grad_ori = np.mod(grad_ori, 360) cell_size = 8 block_size = 2 num_bins = 9 bin_width = 360 / num_bins features = [] for point in pix: x, y = point patch_size = cell_size * block_size if (x < patch_size//2 or x >= gray.shape[1] - patch_size//2 or y < patch_size//2 or y >= gray.shape[0] - patch_size//2): features.append(np.zeros(block_size * block_size * num_bins)) continue keypoint_feature = [] for i in range(block_size): for j in range(block_size): start_x = x - patch_size//2 + j * cell_size start_y = y - patch_size//2 + i * cell_size cell_hist = np.zeros(num_bins) for cy in range(cell_size): for cx in range(cell_size): px = start_x + cx py = start_y + cy magnitude = grad_mag[py, px] orientation = grad_ori[py, px] bin_idx = int(orientation / bin_width) bin_idx = bin_idx % num_bins cell_hist[bin_idx] += magnitude keypoint_feature.extend(cell_hist) keypoint_feature = np.array(keypoint_feature) norm = np.linalg.norm(keypoint_feature) if norm > 0.1: keypoint_feature = keypoint_feature / norm features.append(keypoint_feature) features = np.array(features) return features def feature_matching(img_1, img_2): R1 = harris_response(img_1, 0.04, 9) R2 = harris_response(img_2, 0.04, 9) cor1 = corner_selection(R1, 0.0005 * np.max(R1) if np.max(R1) > 0 else 0, 8) cor2 = corner_selection(R2, 0.0005 * np.max(R2) if np.max(R2) > 0 else 0, 8) if len(cor1) < 4 or len(cor2) < 4: return [], [] fea1 = histogram_of_gradients(img_1, cor1) fea2 = histogram_of_gradients(img_2, cor2) dis = spatial.distance.cdist(fea1, fea2, metric=‘euclidean’) matches_1to2 = [] matches_2to1 = [] threshold = 0.75 for i in range(len(cor1)): if len(dis[i]) < 2: continue sorted_idx = np.argsort(dis[i]) if dis[i][sorted_idx[0]] < threshold * dis[i][sorted_idx[1]]: matches_1to2.append((i, sorted_idx[0])) for j in range(len(cor2)): if len(dis[:, j]) < 2: continue sorted_idx = np.argsort(dis[:, j]) if dis[sorted_idx[0], j] < threshold * dis[sorted_idx[1], j]: matches_2to1.append((sorted_idx[0], j)) consistent = set(matches_1to2) & set(matches_2to1) pixels_1, pixels_2 = [], [] for i, j in consistent: pixels_1.append(cor1[i]) pixels_2.append(cor2[j]) if len(pixels_1) < 6: all_possible = [] for i in range(len(cor1)): if len(dis[i]) == 0: continue j = np.argmin(dis[i]) all_possible.append((i, j, dis[i][j])) all_possible.sort(key=lambda x: x[2]) for i, j, _ in all_possible: if (i, j) not in consistent: pixels_1.append(cor1[i]) pixels_2.append(cor2[j]) if len(pixels_1) >= 6: break return pixels_1, pixels_2 def test_matching(): img_1 = cv2.imread(os.path.join(IMGDIR, ‘1_1.jpg’)) img_2 = cv2.imread(os.path.join(IMGDIR, ‘1_2.jpg’)) if img_1 is None or img_2 is None: print(“⚠️ 测试图像加载失败,请检查路径”) return img_gray_1 = cv2.cvtColor(img_1, cv2.COLOR_BGR2GRAY) img_gray_2 = cv2.cvtColor(img_2, cv2.COLOR_BGR2GRAY) pixels_1, pixels_2 = feature_matching(img_1, img_2) H_1, W_1 = img_gray_1.shape H_2, W_2 = img_gray_2.shape img = np.zeros((max(H_1, H_2), W_1 + W_2, 3)) img[:H_1, :W_1, (2, 1, 0)] = img_1 / 255 img[:H_2, W_1:, (2, 1, 0)] = img_2 / 255 plt.figure(figsize=(20, 10), dpi=300) plt.imshow(img) N = len(pixels_1) for i in range(N): x1, y1 = pixels_1[i] x2, y2 = pixels_2[i] plt.plot([x1, x2+W_1], [y1, y2]) plt.savefig(os.path.join(OUTPUT_DIR, ‘test_matching.jpg’), bbox_inches=‘tight’) plt.close() def compute_homography(pixels_1, pixels_2): if len(pixels_1) < 4: raise ValueError(“Need at least 4 point pairs to compute homography”) A = [] for (x1, y1), (x2, y2) in zip(pixels_1, pixels_2): A.append([x1, y1, 1, 0, 0, 0, -x2x1, -x2y1, -x2]) A.append([0, 0, 0, x1, y1, 1, -y2x1, -y2y1, -y2]) A = np.array(A) U, S, Vt = np.linalg.svd(A) H = Vt[-1, :].reshape(3, 3) homo_matrix = H / H[2, 2] return homo_matrix def align_pair(pixels_1, pixels_2): if len(pixels_1) < 4: raise ValueError(“Need at least 4 point pairs for RANSAC”) est_homo = None best_inliers = 0 max_iterations = 3000 threshold = 4.0 for _ in range(max_iterations): indices = np.random.choice(len(pixels_1), 4, replace=False) sample_1 = [pixels_1[i] for i in indices] sample_2 = [pixels_2[i] for i in indices] try: H = compute_homography(sample_1, sample_2) inlier_count = 0 for (x1, y1), (x2, y2) in zip(pixels_1, pixels_2): p1 = np.array([x1, y1, 1]) p2_est = H @ p1 p2_est = p2_est / p2_est[2] error = np.sqrt((p2_est[0] - x2)**2 + (p2_est[1] - y2)**2) if error < threshold: inlier_count += 1 if inlier_count > best_inliers: best_inliers = inlier_count est_homo = H except np.linalg.LinAlgError: continue if est_homo is None: est_homo = compute_homography(pixels_1, pixels_2) return est_homo def stitch_blend(img_1, img_2, est_homo): if img_1 is None or img_2 is None: raise ValueError(“输入图像为空”) h1, w1, d1 = img_1.shape h2, w2, d2 = img_2.shape img_1_uint8 = img_1.copy() img_2_uint8 = img_2.copy() corners_img1 = np.array([[0,0,1], [w1,0,1], [0,h1,1], [w1,h1,1]], dtype=np.float32) corners_img1_proj = (est_homo @ corners_img1.T).T corners_img1_proj = corners_img1_proj[:, :2] / corners_img1_proj[:, 2:] all_x = np.concatenate([[0, w2], corners_img1_proj[:, 0]]) all_y = np.concatenate([[0, h2], corners_img1_proj[:, 1]]) x_min = int(np.floor(all_x.min())) x_max = int(np.ceil(all_x.max())) y_min = int(np.floor(all_y.min())) y_max = int(np.ceil(all_y.max())) canvas_h = y_max - y_min + 1 canvas_w = x_max - x_min + 1 est_img1 = np.zeros((canvas_h, canvas_w, 3), dtype=np.uint8) est_img2 = np.zeros_like(est_img1) x_range = np.arange(x_min, x_max + 1, 1, dtype=np.float64) y_range = np.arange(y_min, y_max + 1, 1, dtype=np.float64) x, y = np.meshgrid(x_range, y_range) try: homo_inv = np.linalg.inv(est_homo) except np.linalg.LinAlgError: homo_inv = np.linalg.pinv(est_homo) ones = np.ones_like(x, dtype=np.float64) coords = np.stack([x, y, ones], axis=-1) coords_proj = coords @ homo_inv.T coords_proj = coords_proj / coords_proj[..., 2:3] trans_x = coords_proj[..., 0].astype(np.float32) trans_y = coords_proj[..., 1].astype(np.float32) est_img1 = cv2.remap( img_1_uint8, trans_x, trans_y, interpolation=cv2.INTER_CUBIC, borderMode=cv2.BORDER_CONSTANT, borderValue=(0, 0, 0) ) start_y = max(0, -y_min) end_y = start_y + h2 start_x = max(0, -x_min) end_x = start_x + w2 end_y = min(end_y, canvas_h) end_x = min(end_x, canvas_w) img2_crop_h = end_y - start_y img2_crop_w = end_x - start_x if img2_crop_h > 0 and img2_crop_w > 0: est_img2[start_y:end_y, start_x:end_x] = img_2_uint8[:img2_crop_h, :img2_crop_w] alpha1 = cv2.remap( np.ones((h1, w1), dtype=np.float32), trans_x, trans_y, cv2.INTER_CUBIC, borderMode=cv2.BORDER_CONSTANT, borderValue=0 ) alpha2 = np.zeros_like(alpha1) alpha2[start_y:end_y, start_x:end_x] = 1.0 alpha_sum = alpha1 + alpha2 alpha_sum[alpha_sum == 0] = 1.0 alpha1_norm = alpha1 / alpha_sum alpha2_norm = alpha2 / alpha_sum alpha1_3ch = np.stack([alpha1_norm, alpha1_norm, alpha1_norm], axis=-1) alpha2_3ch = np.stack([alpha2_norm, alpha2_norm, alpha2_norm], axis=-1) est_img = (est_img1.astype(np.float32) * alpha1_3ch + est_img2.astype(np.float32) * alpha2_3ch) est_img = np.clip(est_img, 0, 255).astype(np.uint8) return est_img def generate_panorama(ordered_img_seq): length = len(ordered_img_seq) mid = length // 2 principle_img = ordered_img_seq[mid].copy() for j in range(mid + 1, length): try: pixels1, pixels2 = feature_matching(ordered_img_seq[j], principle_img) if len(pixels1) < 4: print(f"匹配点不足,跳过图像 {j}") continue homo_matrix = align_pair(pixels1, pixels2) principle_img = stitch_blend(ordered_img_seq[j], principle_img, homo_matrix) principle_img = np.uint8(principle_img) except Exception as e: print(f"右侧拼接失败 {j}: {e}") for i in range(mid - 1, -1, -1): try: pixels1, pixels2 = feature_matching(ordered_img_seq[i], principle_img) if len(pixels1) < 4: print(f"匹配点不足,跳过图像 {i}") continue homo_matrix = align_pair(pixels1, pixels2) principle_img = stitch_blend(ordered_img_seq[i], principle_img, homo_matrix) principle_img = np.uint8(principle_img) except Exception as e: print(f"左侧拼接失败 {i}: {e}") return principle_img test_matching() blend_pairs = [ (os.path.join(IMGDIR, “1_1.jpg”), os.path.join(IMGDIR, “1_2.jpg”)), (os.path.join(IMGDIR, “2_1.jpg”), os.path.join(IMGDIR, “2_2.jpg”)), (os.path.join(IMGDIR, “3_1.jpg”), os.path.join(IMGDIR, “3_2.jpg”)) ] for idx, (path1, path2) in enumerate(blend_pairs, 1): img1 = cv2.imread(path1) img2 = cv2.imread(path2) if img1 is None: print(f"无法加载图像1: {path1}“) continue if img2 is None: print(f"无法加载图像2: {path2}”) continue try: pixels1, pixels2 = feature_matching(img1, img2) homo = align_pair(pixels1, pixels2) blended_img = stitch_blend(img1, img2, homo) save_path = os.path.join(OUTPUT_DIR, f"blend_{idx}.png") cv2.imwrite(save_path, blended_img) print(f"成功保存: {save_path}“) except Exception as e: print(f"混合组{idx}失败:{e}”) panorama_tasks = [ { “name”: “grail”, “files”: [f"grail{i:02d}.jpg" for i in range(5)], “output”: “panorama_1.png” }, { “name”: “library”, “files”: [f"{i}.jpg" for i in range(10, 15)], “output”: “panorama_2.png” }, { “name”: “parrington”, “files”: [f"prtn{i:02d}.jpg" for i in range(5)], “output”: “panorama_3.png” }, { “name”: “Xue-Mountain-Entrance”, “files”: [f"DSC_017{i}.jpg" for i in range(4, 9)], “output”: “panorama_4.png” } ] for task in panorama_tasks: img_list = [] for file in task[“files”]: img_path = os.path.join(IMGDIR, “panoramas”, task[“name”], file) img = cv2.imread(img_path) if img is None: print(f"无法加载图像: {img_path}“) continue img_list.append(img) if len(img_list) < 2: print(f"图像数量不足,跳过任务: {task[‘name’]}”) continue try: pano = generate_panorama(img_list) if pano is not None: save_path = os.path.join(OUTPUT_DIR, task[“output”]) cv2.imwrite(save_path, pano) print(f"成功保存全景图: {save_path}“) else: print(f"全景图生成失败: {task[‘name’]}”) except Exception as e: print(f"景任务失败 {task[‘name’]}: {e}") 对的吗?
10-30
digraph main_flow { rankdir=TB; node [shape=rect, style=rounded]; Start [label="函数入口", shape=oval]; AuthCheck [label="SOAP上下文验证\n&用户认证"]; ParseReq [label="解析XML请求\n提取Type/Token"]; LoadConfig [label="加载全局配置:\nVIDEO_MAIN/MINOR\nMOTION_DETECT/OD_ALARM"]; ProfileSelect [label="配置文件选择"]; ProfileLoop [label="配置文件循环处理", shape=box]; BuildComponents [label="构建配置组件"]; VideoSource [label="视频源配置\n(token/bounds)"]; VideoEncoder [label="视频编码器\n(编码/分辨率/码率)"]; Analytics [label="分析模块\n(移动侦测/篡改检测)"]; MemManage [label="内存管理\n(MALLOC_AND_REGISTER)"]; GenResponse [label="生成XML响应"]; FreeMem [label="释放资源\n(free_ptrs)"]; End [label="返回响应", shape=oval]; // 主流程 Start -> AuthCheck; AuthCheck -> ParseReq; ParseReq -> LoadConfig; LoadConfig -> ProfileSelect; ProfileSelect -> ProfileLoop [label="确定处理范围"]; ProfileLoop -> BuildComponents; BuildComponents -> VideoSource; VideoSource -> VideoEncoder; VideoEncoder -> Analytics; Analytics -> MemManage; MemManage -> ProfileLoop [label="下一个配置"]; ProfileLoop -> GenResponse [label="全部处理完成"]; GenResponse -> FreeMem; FreeMem -> End; // 分支说明 ProfileSelect -> ProfileLoop [label="Type=All\n处理所有4个配置"]; ProfileSelect -> ProfileLoop [label="指定Token\n处理匹配配置"]; // 核心数据结构 subgraph cluster_data { label="关键数据结构"; fontcolor=gray; node [shape=note]; ReqData [label="请求参数:\n- Type(All/VideoSource等)\n- Token(可选)"]; Profile [label="配置文件结构:\n- token\n- video_source\n- video_encoder\n- analytics"]; Response [label="响应结构:\n- profiles[]\n- configuration"]; } ReqData -> ParseReq [style=dotted]; Profile -> BuildComponents [style=dotted]; Response -> GenResponse [style=dotted]; } 在drawio界面ctl+v后把文字复制进去了
09-15
内容概要:本文介绍了一个基于多传感器融合的定位系统设计方案,采用GPS、里程计和电子罗盘作为定位传感器,利用扩展卡尔曼滤波(EKF)算法对多源传感器数据进行融合处理,最终输出目标的滤波后位置信息,并提供了完整的Matlab代码实现。该方法有效提升了定位精度与稳定性,尤其适用于存在单一传感器误差或信号丢失的复杂环境,如自动驾驶、移动采用GPS、里程计和电子罗盘作为定位传感器,EKF作为多传感器的融合算法,最终输出目标的滤波位置(Matlab代码实现)机器人导航等领域。文中详细阐述了各传感器的数据建模方式、状态转移与观测方程构建,以及EKF算法的具体实现步骤,具有较强的工程实践价值。; 适合人群:具备一定Matlab编程基础,熟悉传感器原理和滤波算法的高校研究生、科研人员及从事自动驾驶、机器人导航等相关领域的工程技术人员。; 使用场景及目标:①学习和掌握多传感器融合的基本理论与实现方法;②应用于移动机器人、无人车、无人机等系统的高精度定位与导航开发;③作为EKF算法在实际工程中应用的教学案例或项目参考; 阅读建议:建议读者结合Matlab代码逐行理解算法实现过程,重点关注状态预测与观测更新模块的设计逻辑,可尝试引入真实传感器数据或仿真噪声环境以验证算法鲁棒性,并进一步拓展至UKF、PF等更高级滤波算法的研究与对比。
内容概要:文章围绕智能汽车新一代传感器的发展趋势,重点阐述了BEV(鸟瞰图视角)端到端感知融合架构如何成为智能驾驶感知系统的新范式。传统后融合与前融合方案因信息丢失或算力需求过高难以满足高阶智驾需求,而基于Transformer的BEV融合方案通过统一坐标系下的多源传感器特征融合,在保证感知精度的同时兼顾算力可行性,显著提升复杂场景下的鲁棒性与系统可靠性。此外,文章指出BEV模型落地面临大算力依赖与高数据成本的挑战,提出“数据采集-模型训练-算法迭代-数据反哺”的高效数据闭环体系,通过自动化标注与长尾数据反馈实现算法持续进化,降低对人工标注的依赖,提升数据利用效率。典型企业案例进一步验证了该路径的技术可行性与经济价值。; 适合人群:从事汽车电子、智能驾驶感知算法研发的工程师,以及关注自动驾驶技术趋势的产品经理和技术管理者;具备一定自动驾驶基础知识,希望深入了解BEV架构与数据闭环机制的专业人士。; 使用场景及目标:①理解BEV+Transformer为何成为当前感知融合的主流技术路线;②掌握数据闭环在BEV模型迭代中的关键作用及其工程实现逻辑;③为智能驾驶系统架构设计、传感器选型与算法优化提供决策参考; 阅读建议:本文侧重技术趋势分析与系统级思考,建议结合实际项目背景阅读,重点关注BEV融合逻辑与数据闭环构建方法,并可延伸研究相关企业在舱泊一体等场景的应用实践。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值