Check图片类型[JPEG(.jpg 、.jpeg),TIF,GIF,BMP,PNG,PDF]

public static FileExtension CheckFileExtension(string fileName)
  {
   if (!File.Exists(fileName))
   {
    return FileExtension.VALIDFILE;
   }
   FileStream fs = new FileStream(fileName, FileMode.Open, FileAccess.Read);
   System.IO.BinaryReader br = new System.IO.BinaryReader(fs);
   string fileType = string.Empty;
   FileExtension extension = FileExtension.VALIDFILE;
   try
   {
    byte data = br.ReadByte();
    fileType += data.ToString();
    data = br.ReadByte();
    fileType += data.ToString();
    extension = (FileExtension)Enum.Parse(typeof(FileExtension), fileType);
    if (extension.ToString().Equals(fileType))
    {
     extension = FileExtension.VALIDFILE;
    }
   }
   catch
   {
    extension = FileExtension.VALIDFILE;
   }
   finally
   {
    if (fs != null)
    {
     fs.Close();
     br.Close();
    }
   }

   return extension;
  }
  public enum FileExtension
  {
   JPEG = 255216,
   TIF = 7373,
   GIF = 7173,
   BMP = 6677,
   PNG = 13780,
   PDF = 3780,
   VALIDFILE = 9999999
  }

train: WARNING ⚠️ /root/lanyun-tmp/yolov11/datasets3/train/images/Abyssinian_34.jpg: ignoring corrupt image/label: invalid image format GIF. Supported formats are: images: {'tif', 'webp', 'dng', 'pfm', 'mpo', 'tiff', 'png', 'jpg', 'bmp', 'jpeg', 'heic'} videos: {'mpg', 'mp4', 'mpeg', 'wmv', 'mkv', 'ts', 'webm', 'gif', 'avi', 'm4v', 'mov', 'asf'} train: WARNING ⚠️ /root/lanyun-tmp/yolov11/datasets3/train/images/Egyptian_Mau_138.jpg: corrupt JPEG restored and saved train: WARNING ⚠️ /root/lanyun-tmp/yolov11/datasets3/train/images/Egyptian_Mau_139.jpg: ignoring corrupt image/label: invalid image format GIF. Supported formats are: images: {'tif', 'webp', 'dng', 'pfm', 'mpo', 'tiff', 'png', 'jpg', 'bmp', 'jpeg', 'heic'} videos: {'mpg', 'mp4', 'mpeg', 'wmv', 'mkv', 'ts', 'webm', 'gif', 'avi', 'm4v', 'mov', 'asf'} train: WARNING ⚠️ /root/lanyun-tmp/yolov11/datasets3/train/images/Egyptian_Mau_145.jpg: ignoring corrupt image/label: invalid image format GIF. Supported formats are: images: {'tif', 'webp', 'dng', 'pfm', 'mpo', 'tiff', 'png', 'jpg', 'bmp', 'jpeg', 'heic'} videos: {'mpg', 'mp4', 'mpeg', 'wmv', 'mkv', 'ts', 'webm', 'gif', 'avi', 'm4v', 'mov', 'asf'} train: WARNING ⚠️ /root/lanyun-tmp/yolov11/datasets3/train/images/Egyptian_Mau_167.jpg: ignoring corrupt image/label: invalid image format GIF. Supported formats are: images: {'tif', 'webp', 'dng', 'pfm', 'mpo', 'tiff', 'png', 'jpg', 'bmp', 'jpeg', 'heic'} videos: {'mpg', 'mp4', 'mpeg', 'wmv', 'mkv', 'ts', 'webm', 'gif', 'avi', 'm4v', 'mov', 'asf'} train: WARNING ⚠️ /root/lanyun-tmp/yolov11/datasets3/train/images/Egyptian_Mau_177.jpg: ignoring corrupt image/label: invalid image format GIF. Supported formats are: images: {'tif', 'webp', 'dng', 'pfm', 'mpo', 'tiff', 'png', 'jpg', 'bmp', 'jpeg', 'heic'} videos: {'mpg', 'mp4', 'mpeg', 'wmv', 'mkv', 'ts', 'webm', 'gif', 'avi', 'm4v', 'mov', 'asf'} train: WARNING ⚠️ /root/lanyun-tmp/yolov11/datasets3/train/images/Egyptian_Mau_191.jpg: ignoring corrupt image/label: invalid im
05-20
import os import threading from concurrent.futures import ThreadPoolExecutor, as_completed from PIL import Image import time def get_image_size_and_channels(file_path): """快速获取图片尺寸和通道数(仅读取元数据,不加载像素)""" try: with Image.open(file_path) as img: # 获取尺寸和通道信息 width, height = img.size # 获取通道数(通过bands信息) bands = img.getbands() num_channels = len(bands) return (width, height, num_channels), None except Exception as e: return None, str(e) def check_image_shapes(folder_path, max_workers=8): """高效检查指定文件夹中所有图片的尺寸和通道数并统计结果""" # 支持的图片格式 image_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.gif', '.tif', '.tiff', '.webp'} # 使用os.scandir高效遍历 image_paths = [] with os.scandir(folder_path) as it: for entry in it: if entry.is_file() and entry.name.lower().endswith(tuple(image_extensions)): image_paths.append(entry.path) if not image_paths: return None # 没有图片文件 # 使用线程池并行处理图片 shape_count = {} lock = threading.Lock() processed = 0 def process_image(path): nonlocal processed result, error = get_image_size_and_channels(path) with lock: processed += 1 if processed % 100 == 0: # 每100张报告进度 print(f" 已处理 {processed}/{len(image_paths)} 张图片...", end='\r') if result: return result return None # 使用线程池处理图片 with ThreadPoolExecutor(max_workers=max_workers) as executor: future_to_path = {executor.submit(process_image, path): path for path in image_paths} for future in as_completed(future_to_path): result = future.result() if result: with lock: shape_count[result] = shape_count.get(result, 0) + 1 # 清理进度显示 if processed > 0: print(" " * 50, end='\r') # 清除进度行 return shape_count def analyze_shapes(shape_count): """分析尺寸和通道数统计数据""" if not shape_count: return 0, 0, 0, [], 0 total_images = sum(shape_count.values()) same_count = 0 distinct_count = 0 channel_distribution = {} # 直接计算统计值 for (w, h, channels), count in shape_count.items(): # 更新通道分布统计 channel_distribution[channels] = channel_distribution.get(channels, 0) + count if count > 1: same_count += count else: distinct_count += count # 生成尺寸详情(包含通道数) shape_details = [(f"{w}x{h} ({channels}通道)", count) for (w, h, channels), count in shape_count.items()] shape_details.sort(key=lambda x: x[1], reverse=True) # 仅返回前10个尺寸 return total_images, same_count, distinct_count, shape_details[:10], channel_distribution def check_all_subfolders(parent_folder, max_workers=4): """高效检查父文件夹下所有子文件夹的图片形状和通道数""" # 获取所有直接子文件夹 subfolders = [] with os.scandir(parent_folder) as it: for entry in it: if entry.is_dir(): subfolders.append(entry.name) if not subfolders: print(f"错误:在 {parent_folder} 中未找到任何子文件夹") return print(f"开始检查父文件夹: {parent_folder}") print(f"共发现 {len(subfolders)} 个子文件夹\n") # 为每个子文件夹创建独立线程池 for subfolder in subfolders: subfolder_path = os.path.join(parent_folder, subfolder) print(f"{'=' * 60}") print(f"正在检查子文件夹: {subfolder}") print(f"路径: {subfolder_path}") start_time = time.time() shape_count = check_image_shapes(subfolder_path, max_workers=max_workers) if shape_count is None: print(" 警告:此文件夹中未找到有效图片文件") print(f"{'=' * 60}\n") continue total, same, distinct, details, channel_dist = analyze_shapes(shape_count) elapsed = time.time() - start_time # 打印结果 print(f"\n 处理完成!耗时: {elapsed:.2f}秒 | 总图片数量: {total}") print(f" 相同尺寸+通道的图片: {same} 张 ({same / total:.1%})") print(f" 不同尺寸+通道的图片: {distinct} 张 ({distinct / total:.1%})") # 通道分布统计 print("\n 通道分布统计:") for channels, count in sorted(channel_dist.items()): channel_name = { 1: "灰度 (1通道)", 3: "RGB (3通道)", 4: "RGBA (4通道)", 2: "灰度+Alpha (2通道)", 5: "CMYK (4通道,但PIL中为5?)" }.get(channels, f"{channels}通道") print(f" {channel_name}: {count} 张 ({count / total:.1%})") print("\n 10个最常见尺寸+通道分布:") for i, (size_info, count) in enumerate(details, 1): print(f" {i}. {size_info}: {count} 张 ({count / total:.1%})") # 附加检查 if len(shape_count) == 1: print("\n ✅ 所有图片尺寸和通道数完全一致!") else: print(f"\n ⚠️ 共发现 {len(shape_count)} 种不同尺寸+通道组合") print(f"{'=' * 60}\n") # 主程序 if __name__ == "__main__": # 使用您提供的路径 parent_folder = r"D:\西北地区铜镍矿\多模态测试\图片训练" # 检查路径是否存在 if not os.path.exists(parent_folder): print(f"错误:指定的路径不存在\n{parent_folder}") print("请检查路径是否正确,注意使用原始字符串(前面加r)") else: print("=== 图片尺寸和通道数检查工具(高效版) ===") print("正在准备检查...") check_all_subfolders(parent_folder, max_workers=8) print("所有检查完成!") 帮我修改以上代码,我想查看的是图片的高度、宽度、通道数,
最新发布
11-22
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值