最近在试JD匹配简历,发现llamaindex自带了llama hub的resume screener组件,但其与最新版llamaindex有些方法过期了,
试图改造源代码。
1. 主代码如下:
from llama_index.core import Settings
from resume_screener_pack_copy.llama_index.packs.resume_screener.base import ResumeScreenerPack
from llama_index.llms.azure_openai import AzureOpenAI
from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())
api_key = os.environ['AZURE_OPENAI_KEY']
azure_endpoint = os.environ['AZURE_OPENAI_ENDPOINT']
api_version = os.environ['AZURE_API_VERSION']
cust_jd = """\
1.负责企业平台相关业务的Web端、App端测试。包括但不限于功能测试、接口测试、性能测试、稳定性测试等;
2.参与项目过程质量保障工作:参与技术评审,制定测试计划,设计测试用例,组织和执行整个测试过程,协助研发定位问题,跟踪分析、推动解决测试过程中问题及线上问题,并对应用开发过程做分析和总结;
3.围绕分析业务特点对现有质量保证方案持续改进,持续提高产品质量和测试效率;
4.推动产品研发测试上线流程优化,维护测试环境及测试数据,
5.开发自动化用例或引进/开发工具来切实提高效率,降低交付周期。
"""
# You need to deploy your own embedding model as well as your own chat completion model
embed = AzureOpenAIEmbedding(
model="text-embedding-ada-002",
deployment_name="text-embedding-ada-002",
api_key=api_key,
azure_endpoint=azure_endpoint,
api_version=api_version,
)
llm = AzureOpenAI(
model="gpt-4",
deployment_name="gpt-4",
api_key=api_key,
azure_endpoint=azure_endpoint,
api_version=api_version,
)
Settings.embed_model = embed
Settings.llm = llm
resume_screener = ResumeScreenerPack(
llm=llm,
job_description=cust_jd,
criteria=[
"有3年以上测试经验;",
"精通软件测试理论、方法和过程,熟悉黑盒测试、灰盒测试,有自动化经验优先。",
"有办公产品测试相关经验优先。",
"工作耐心细致,有较强的团队合作精神,良好的沟通能力,能承受较大的工作压力。",
],
)
response = resume_screener.run(
llm=llm,
embed_model=embed,
resume_path="./data/简历.pdf"
)
for cd in response.criteria_decisions:
print("### 标准决策")
print(cd.reasoning)
print(cd.decision)
print("#### 整体理由 ##### ")
print(str(response.overall_reasoning))
print(str(response.overall_decision))
2. 复制 resume_screener_pack 目录为 resume_screener_pack_copy
2.1 修改 init.py
from resume_screener_pack_copy.llama_index.packs.resume_screener.base import ResumeScreenerPack
__all__ = ["ResumeScreenerPack"]
2.2 修改 base.py
from pathlib import Path
from typing import Any, Dict, List, Optional
from llama_index.core import ServiceContext, Settings
from llama_index.core.llama_pack.base import BaseLlamaPack
from llama_index.core.response_synthesizers import TreeSummarize
from llama_index.core.schema import NodeWithScore
from llama_index.llms.openai import OpenAI
from llama_index.readers.file import PDFReader
from pydantic import BaseModel, Field
# backwards compatibility
try:
from llama_index.core.llms.llm import LLM
except ImportError:
from llama_index.core.llms.base import LLM
QUERY_TEMPLATE = """
你是简历审阅专家。
你的工作是根据职位描述和一系列标准来决定候选人是否通过简历筛选:
### 工作描述
{job_description}
### 面试标准
{criteria_str}
"""
class CriteriaDecision(BaseModel):
"""The decision made based on a single criteria."""
decision: bool = Field(description="根据标准作出的决定")
reasoning: str = Field(description="决定背后的原因")
class ResumeScreenerDecision(BaseModel):
"""The decision made by the resume screener."""
criteria_decisions: List[CriteriaDecision] = Field(
description="根据标准做出的决定"
)
overall_reasoning: str = Field(
description="整体决定背后的原因"
)
overall_decision: bool = Field(
description="根据标准作出的总体决定"
)
def _format_criteria_str(criteria: List[str]) -> str:
criteria_str = ""
for criterion in criteria:
criteria_str += f"- {criterion}\n"
return criteria_str
class ResumeScreenerPack(BaseLlamaPack):
def __init__(
self, job_description: str, criteria: List[str], llm: Optional[LLM] = None
) -> None:
self.reader = PDFReader()
llm = llm
#service_context = ServiceContext.from_defaults(llm=llm, embed_model= Settings.embed_model)
self.synthesizer = TreeSummarize(
output_cls=ResumeScreenerDecision, llm = llm,
)
criteria_str = _format_criteria_str(criteria)
self.query = QUERY_TEMPLATE.format(
job_description=job_description, criteria_str=criteria_str
)
def get_modules(self) -> Dict[str, Any]:
"""Get modules."""
return {"reader": self.reader, "synthesizer": self.synthesizer}
def run(self, resume_path: str, *args: Any, **kwargs: Any) -> Any:
"""Run pack."""
docs = self.reader.load_data(Path(resume_path))
output = self.synthesizer.synthesize(
query=self.query,
nodes=[NodeWithScore(node=doc, score=1.0) for doc in docs],
)
return output.response
3. 运行环境
3.1 申请azure openai.
3.2 准备一份简历,PDF格式,放在目录下,且与代码保持一致。
3.3 若代码与环境正确,能输出此简历是否满足JD及标准。
《完》