以下是qwen-image-edit单图编辑demo代码:
import os
import numpy as np
import json
import time
import random
import torch
import argparse
from PIL import Image
from utils.data_loading import get_image_name
from utils.utils import GiB, cachify
from jinn_modelhub import snapshot_download
from jinn_modelhub import login, logout
from src.cache_dit.cache_factory.cache_interface import enable_cache
from src.diffusers.models.transformers.transformer_qwenimage import QwenImageTransformer2DModel
from src.diffusers.pipelines.qwenimage.pipeline_qwenimage_edit_plus import QwenImageEditPlusPipeline
import pdb
pdb.set_trace()
# --- Model Loading ---
dtype = torch.bfloat16
device = "cuda" if torch.cuda.is_available() else "cpu"
# --- UI Constants and Helpers ---
MAX_SEED = np.iinfo(np.int32).max
class QwenIamgeEdit:
def __init__(self, args):
fileInfo = open(args.filesInfo, "r", encoding="utf-8")
jsonFile = open(args.jsonFile, mode='r', encoding='utf-8')
self.info = [json.loads(info) for info in fileInfo.readlines()]
self.inputInfo = json.load(jsonFile)
jsonFile.close()
fileInfo.close()
# 解析生成所需参数
# 解析输入数据根目录
self.input_path = self.inputInfo["materialInPath"]
# 模型拉取路径,当前workflow采用从模型仓库中拉取大模型方式加载,拉取到workflow临时路径,执行完成后删除
self.model_path = self.inputInfo["tempPath"]
# 随机种子。如果是具体数值,则指定种子数,默认42。如果为-1,则随机种子
self.seed = self.inputInfo["args"]["seed"]
# 随机种子随机设置几次
self.seed_random_num = self.inputInfo["args"]["seed_random_num"]
# 生成图是否缩放至原图尺寸
self.flag_resize = self.inputInfo["args"]["flag_resize"]
# 执行多少次“去噪”操作,去噪步骤越多图像质量越高,但推理速度会更慢
self.num_inference_steps = self.inputInfo["args"]["num_inference_steps"]
# 与文本提示相关程度,越高与文本提示越相关,但图像质量也会越低
self.guidance_scale = self.inputInfo["args"]["guidance_scale"]
# 是否启用缓存加速
self.cache_dit = self.inputInfo["args"]["cache_accelerate"]
# 是否启用xformers加速
self.xformers = self.inputInfo["args"]["xformers"]
# 生成图像保存路径
self.save_path = self.inputInfo["materialOutPath"]
# 生成图像提示词
self.prompts = self.inputInfo["args"]["prompts"]["prompt"]
# lora模型路径
self.lora_model_path = self.inputInfo["args"]["lora_model_path"]
# 图像路径
self.path_imgs = [info["file"] for info in self.info]
# 保存平台生成的json,调试使用
if not os.path.exists(self.save_path):
os.makedirs(self.save_path)
os.chmod(self.save_path, 0o777)
# 将读取的数据写入新的JSON文件
with open(self.save_path + '/file.json', 'w') as new_file:
json.dump(self.inputInfo, new_file, indent=4)
def init_model(self):
"""
初始化生成模型
"""
# 从模型仓库拉取模型
os.environ["MODEL_HUB_GROUP"] = "ARI"
login("40482", "SL-HqfH249qHhmsSSJp8:CVZuPeJXzz29so12iLCq", repo_id="QwenImageModel")
snapshot_download(repo_id="QwenImageModel", cache_dir=self.model_path)
logout("40482", repo_id="QwenImageModel")
model_tmp_path = "models--ModelHub--QwenImageModel/snapshots/7eb6c6df5de0e613e894814fc5090dab33cf3d56/"
# Build pipeline
if GiB() > 48:
self.transformer = QwenImageTransformer2DModel.from_pretrained(self.model_path + model_tmp_path + "Qwen-Image-Edit-2509", subfolder="transformer", torch_dtype=dtype).to(device)
self.pipe = QwenImageEditPlusPipeline.from_pretrained(self.model_path + model_tmp_path + "Qwen-Image-Edit-2509", transformer=self.transformer, torch_dtype=dtype).to(device)
if self.xformers:
print("Enabling xFormers memory efficient attention...")
self.pipe.enable_xformers_memory_efficient_attention()
if self.cache_dit:
enable_cache(
self.pipe,
# enable_taylorseer=True,
# enable_encoder_taylorseer=True,
# taylorseer_cache_type="residual",
# taylorseer_order=2, # default is 2.
max_warmup_steps=8, # steps do not cache
max_cached_steps=-1, # -1 means no limit
Fn_compute_blocks=8, # Fn, F8, etc.
Bn_compute_blocks=8, # Bn, B8, etc.
residual_diff_threshold=0.20,
)
else:
self.transformer = QwenImageTransformer2DModel.from_pretrained(self.model_path + model_tmp_path + "Qwen-Image-Edit-2509", subfolder="transformer", torch_dtype=dtype).to(device)
self.pipe = QwenImageEditPlusPipeline.from_pretrained(self.model_path + model_tmp_path + "Qwen-Image-Edit-2509", transformer=self.transformer, torch_dtype=dtype).to(device)
if self.xformers:
print("Enabling xFormers memory efficient attention...")
self.pipe.enable_xformers_memory_efficient_attention()
if self.cache_dit:
print("Enabled Cache before offload")
cachify(self.pipe)
if torch.cuda.device_count() <= 1:
# Enable memory savings
print("Enabled Model CPU Offload")
self.pipe.enable_model_cpu_offload()
if self.lora_model_path:
self.pipe.load_lora_weights(self.lora_model_path, weight_name="pytorch_lora_weights.safetensors")
def process_and_save(self, image_org, prompt, output_folder, base_img_name, id_prompt):
num_images_per_prompt=1
"""
Generates an image using the local Qwen-Image diffusers pipeline.
"""
# Hardcode the negative prompt as requested
negative_prompt = " "
# prompt = "这辆车在雪天行驶在马路上,车牌号为“浙A08LU2”"
for idx in range(self.seed_random_num):
if self.seed == -1:
seed = random.randint(0, MAX_SEED)
else:
seed = self.seed
# seed = 814415689
# Set up the generator for reproducibility
generator = torch.Generator(device=device).manual_seed(seed)
print(f"Calling pipeline with prompt: '{prompt}'")
# print(f"Negative Prompt: '{negative_prompt}'")
print(f"Seed: {seed}, Steps: {self.num_inference_steps}, Guidance: {self.guidance_scale}")
# Generate the image
image = []
start_time = time.time()
# if self.original_img_edit:
image = self.pipe(
image_org,
prompt=prompt,
negative_prompt=negative_prompt,
num_inference_steps=self.num_inference_steps,
generator=generator,
true_cfg_scale=self.guidance_scale,
num_images_per_prompt=num_images_per_prompt,
).images
# else:
# image = self.pipe(
# image_org,
# prompt=prompt,
# negative_prompt=negative_prompt,
# num_inference_steps=self.num_inference_steps,
# generator=generator,
# true_cfg_scale=self.guidance_scale,
# num_images_per_prompt=num_images_per_prompt,
# width=self.net_w,
# height=self.net_h
# ).images
end_time = time.time()
print("单张图片生成耗时:(s)", end_time - start_time)
if self.flag_resize:
result = image[0].resize(image_org.size)
else:
result = image[0]
output_img_path = os.path.join(output_folder, f"{base_img_name}_prompt{id_prompt}_seed{seed}_id{idx}.png")
result.save(output_img_path)
def process_image(self, image_path, prompt, output_folder, id_prompt):
"""
处理单个图像,生成并保存结果。
"""
base_img_name = os.path.splitext(os.path.basename(image_path))[0]
image_org = Image.open(image_path).convert("RGB")
self.process_and_save(image_org, prompt, output_folder, base_img_name, id_prompt)
def process_images(self, prompts):
"""
处理所有图像,生成并保存结果。
"""
# Load image
image_paths = []
image_paths = get_image_name(self.path_imgs)
id_prompt = 0
for prompt in prompts:
for image_path in image_paths:
# 结果保存路径,目录层级与输入文件夹保持一致
save_name = image_path.replace(self.input_path, self.save_path)
save_folder = os.path.dirname(save_name)
if not os.path.exists(save_folder):
os.makedirs(save_folder)
os.chmod(save_folder, 0o777)
print(f"Processing image: {image_path}")
self.process_image(image_path, prompt, save_folder, id_prompt)
id_prompt += 1
def run(self):
self.init_model()
self.process_images(self.prompts)
def getArgs():
"""
巨灵平台获取该方法参数的入口
"""
parser = argparse.ArgumentParser(description="局部生成")
parser.add_argument('--token', type=str, required=True)
parser.add_argument('--jsonFile', type=str, required=True)
parser.add_argument('--filesInfo', type=str, required=True)
return parser
if __name__ == '__main__':
parser = getArgs()
args = parser.parse_args()
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
p = QwenIamgeEdit(args)
p.run()
以下是qwen-image-edit 多图编辑demo代码:
import os
import numpy as np
import json
import time
import random
import torch
import argparse
import cache_dit
from PIL import Image
from diffusers import QwenImageEditPlusPipeline
from utils.data_loading import get_image_name
from utils.utils import GiB, cachify
from jinn_modelhub import snapshot_download
from jinn_modelhub import login, logout
import pdb
pdb.set_trace()
# --- Model Loading ---
dtype = torch.bfloat16
device = "cuda" if torch.cuda.is_available() else "cpu"
# --- UI Constants and Helpers ---
MAX_SEED = np.iinfo(np.int32).max
class QwenIamgeEditPlus:
def __init__(self, args):
fileInfo = open(args.filesInfo, "r", encoding="utf-8")
jsonFile = open(args.jsonFile, mode='r', encoding='utf-8')
self.info = [json.loads(info) for info in fileInfo.readlines()]
self.inputInfo = json.load(jsonFile)
jsonFile.close()
fileInfo.close()
# 解析生成所需参数
# 解析输入数据根目录
self.input_path = self.inputInfo["materialInPath"]
# 模型拉取路径,当前workflow采用从模型仓库中拉取大模型方式加载,拉取到workflow临时路径,执行完成后删除
self.model_path = self.inputInfo["tempPath"]
# 随机种子。如果是具体数值,则指定种子数,默认42。如果为-1,则随机种子
self.seed = self.inputInfo["args"]["seed"]
# 随机种子随机设置几次
self.seed_random_num = self.inputInfo["args"]["seed_random_num"]
# 生成图是否缩放至原图尺寸
self.flag_resize = self.inputInfo["args"]["flag_resize"]
# 网络输入大小(宽,高),默认1024,验证效果较好
self.net_w = self.inputInfo["args"]["net_w"]
self.net_h = self.inputInfo["args"]["net_h"]
# 执行多少次“去噪”操作,去噪步骤越多图像质量越高,但推理速度会更慢
self.num_inference_steps = self.inputInfo["args"]["num_inference_steps"]
# 与文本提示相关程度,越高与文本提示越相关,但图像质量也会越低
self.guidance_scale = self.inputInfo["args"]["guidance_scale"]
# 是否启用缓存加速
self.cache_dit = self.inputInfo["args"]["cache_accelerate"]
# 是否启用原图编辑功能,即只进行局部编译,保留原图其他部分不变
self.original_img_edit = self.inputInfo["args"]["original_img_edit"]
# 生成图像保存路径
self.save_path = self.inputInfo["materialOutPath"]
# 生成图像提示词
self.prompts = self.inputInfo["args"]["prompts"]["prompt"]
# 图像路径
self.path_imgs = [info["file"] for info in self.info]
# 保存平台生成的json,调试使用
if not os.path.exists(self.save_path):
os.makedirs(self.save_path)
os.chmod(self.save_path, 0o777)
# 将读取的数据写入新的JSON文件
with open(self.save_path + '/file.json', 'w') as new_file:
json.dump(self.inputInfo, new_file, indent=4)
def init_model(self):
"""
初始化生成模型
"""
# 从模型仓库拉取模型
os.environ["MODEL_HUB_GROUP"] = "ARI"
login("40482", "SL-HqfH249qHhmsSSJp8:CVZuPeJXzz29so12iLCq", repo_id="QwenImageModel")
snapshot_download(repo_id="QwenImageModel", cache_dir=self.model_path)
logout("40482", repo_id="QwenImageModel")
model_tmp_path = "models--ModelHub--QwenImageModel/snapshots/7eb6c6df5de0e613e894814fc5090dab33cf3d56/"
# Build pipeline
if GiB() > 48:
self.pipe = QwenImageEditPlusPipeline.from_pretrained(self.model_path + model_tmp_path + "Qwen-Image-Edit-2509",
torch_dtype=dtype).to(device)
if self.cache_dit:
cache_dit.enable_cache(
self.pipe,
# enable_taylorseer=True,
# enable_encoder_taylorseer=True,
# taylorseer_cache_type="residual",
# taylorseer_order=2, # default is 2.
max_warmup_steps=8, # steps do not cache
max_cached_steps=-1, # -1 means no limit
Fn_compute_blocks=8, # Fn, F8, etc.
Bn_compute_blocks=8, # Bn, B8, etc.
residual_diff_threshold=0.20,
)
else:
self.pipe = QwenImageEditPlusPipeline.from_pretrained(self.model_path + model_tmp_path + "Qwen-Image-Edit-2509",
torch_dtype=dtype)
# self.pipe.enable_model_cpu_offload()
# self.pipe = QwenImageEditPlusPipeline.from_pretrained(self.model_path + model_tmp_path + "Qwen-Image-Edit",
# torch_dtype=dtype,
# device_map=(
# "balanced" if (torch.cuda.device_count() > 1 and GiB() <= 48) else None
# ),
# )
if self.cache_dit:
print("Enabled Cache before offload")
cachify(self.pipe)
if torch.cuda.device_count() <= 1:
# Enable memory savings
print("Enabled Model CPU Offload")
self.pipe.enable_model_cpu_offload()
def process_and_save(self, image1_org, image2_org, prompt, output_folder, base_img1_name, base_img2_name, id_prompt):
num_images_per_prompt=1
"""
Generates an image using the local Qwen-Image diffusers pipeline.
"""
# Hardcode the negative prompt as requested
negative_prompt = " "
# prompt = "这辆车在雪天行驶在马路上,车牌号为“浙A08LU2”"
for idx in range(self.seed_random_num):
if self.seed == -1:
seed = random.randint(0, MAX_SEED)
else:
seed = self.seed
# seed = 814415689
# Set up the generator for reproducibility
generator = torch.Generator(device=device).manual_seed(seed)
print(f"Calling pipeline with prompt: '{prompt}'")
# print(f"Negative Prompt: '{negative_prompt}'")
print(f"Seed: {seed}, Steps: {self.num_inference_steps}, Guidance: {self.guidance_scale}")
# Generate the image
image = []
start_time = time.time()
if self.original_img_edit:
image = self.pipe(
[image1_org, image2_org],
prompt=prompt,
negative_prompt=negative_prompt,
num_inference_steps=self.num_inference_steps,
generator=generator,
true_cfg_scale=self.guidance_scale,
num_images_per_prompt=num_images_per_prompt,
).images
else:
image = self.pipe(
[image1_org, image2_org],
prompt=prompt,
negative_prompt=negative_prompt,
num_inference_steps=self.num_inference_steps,
generator=generator,
true_cfg_scale=self.guidance_scale,
num_images_per_prompt=num_images_per_prompt,
width=self.net_w,
height=self.net_h
).images
end_time = time.time()
print("单张图片生成耗时:(s)", end_time - start_time)
# if self.flag_resize:
# result = image[0].resize(image_org.size)
# else:
# result = image[0]
output_img_path = os.path.join(output_folder, f"{base_img1_name}_{base_img2_name}_prompt{id_prompt}_seed{seed}_id{idx}.png")
image[0].save(output_img_path)
def process_image(self, image1_path, image2_path, prompt, output_folder, id_prompt):
"""
处理单个图像,生成并保存结果。
"""
base_img1_name = os.path.splitext(os.path.basename(image1_path))[0]
base_img2_name = os.path.splitext(os.path.basename(image2_path))[0]
image1_org = Image.open(image1_path).convert('RGB')
image2_org = Image.open(image2_path).convert('RGB')
self.process_and_save(image1_org, image2_org, prompt, output_folder, base_img1_name, base_img2_name, id_prompt)
def process_images(self, prompts):
"""
处理所有图像,生成并保存结果。
"""
# Load image
image1_paths = []
image2_paths = []
image1_paths, image2_paths = get_image_name(self.path_imgs)
# image1_paths = []
# image2_paths = []
# path_img1 = os.path.join(self.input_path, "图1")
# path_img2 = os.path.join(self.input_path, "图2")
# if os.path.exists(path_img1) and os.path.isdir(path_img1):
# image1_paths = get_image_name(path_img1)
# if os.path.exists(path_img2) and os.path.isdir(path_img2):
# image2_paths = get_image_name(path_img2)
id_prompt = 0
for prompt in prompts:
for image1_path in image1_paths:
# 结果保存路径,目录层级与输入文件夹保持一致
save_name = image1_path.replace(self.input_path, self.save_path)
save_folder = os.path.dirname(save_name)
if not os.path.exists(save_folder):
os.makedirs(save_folder)
os.chmod(save_folder, 0o777)
print(f"Processing image1: {image1_path}")
for image2_path in image2_paths:
print(f"Processing image2: {image2_path}")
self.process_image(image1_path, image2_path, prompt, save_folder, id_prompt)
id_prompt += 1
def run(self):
self.init_model()
self.process_images(self.prompts)
def getArgs():
"""
巨灵平台获取该方法参数的入口
"""
parser = argparse.ArgumentParser(description="局部生成")
parser.add_argument('--token', type=str, required=True)
parser.add_argument('--jsonFile', type=str, required=True)
parser.add_argument('--filesInfo', type=str, required=True)
return parser
if __name__ == '__main__':
parser = getArgs()
args = parser.parse_args()
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
p = QwenIamgeEditPlus(args)
p.run()
请根据以上代码,输出一份demo兼容单图编辑及多图编辑功能,融合以上两份代码
最新发布