def _is_valid_sample(self, scene: str, turns: List[Dict]) -> tuple[bool, str]:
"""综合逻辑校验"""
binding = next((s for s in self.config["scenes"] if s.scene == scene), None)
if not binding:
return False, "未知场景"
valid_actions = set(binding.user_actions + binding.yizi_actions)
for turn in turns:
action = turn.get("body_language", "").strip()
if action and action not in valid_actions:
return False, f"动作与场景不符: {action}"
speaker = turn["speaker"]
utt = turn["utterance"]
# 禁止AI词汇
forbidden = ["AI", "人工智能", "模型", "系统"]
if any(f in utt for f in forbidden):
return False, f"包含禁止词: {utt}"
# 心理矛盾检测
thought = turn.get("inner_thought", "")
if ("不在乎" in utt or "随便" in utt) and any(kw in thought for kw in ["在乎", "牵挂", "担心"]):
return False, "话语与心理矛盾"
if not self._check_emotion_progression(turns):
return False, "情感未完成递进"
return True, "有效"
生成parting时很快,但是生成撒娇时一直出现"情感未完成递进"
def _check_emotion_progression(self, turns: List[Dict]) -> bool:
"""检查情感是否完成递进"""
total = len(turns)
if total < 4:
return False
start_end = max(2, total // 3)
middle_start = start_end
middle_end = max(middle_start + 1, 2 * total // 3)
final_start = middle_end
has_start = any(
any(kw in turn.get("utterance", "") for kw in ["舍不得", "别走", "担心", "难过"])
for turn in turns[:start_end]
)
has_middle = any(
any(kw in turn.get("utterance", "") for kw in ["解释", "叮嘱", "记得", "照顾好"])
for turn in turns[middle_start:middle_end]
)
has_final = any(
any(kw in turn.get("utterance", "") for kw in ["等你回来", "会想你", "约定", "回来"])
for turn in turns[final_start:]
)
return has_start and has_middle and has_final
最新发布