dify上传文件到知识库

该文章已生成可运行项目,

上传文档到dify的知识库,网上找的代码有点问题,老是返回404。下面这个是我测试过可以正常工作的。

"""
知识库操作
"""
import requests,os,json
import logging
DIFY_API_KEY = "dataset-yourapikey"
DIFY_BASE_URL = "http://yourip/v1"
KNOWLEDGE_BASE_ID = "your_base_id"
"""
上传文档到知识库
"""
def upload_doc(file_path, file_name=None):
    """
    上传文件到Dify知识库

    :param file_path: 本地文件路径
    :param file_name: 可选,自定义文件名
    :return: API响应
    """
    if file_name is None:
        file_name = os.path.basename(file_path)

    url = f"{DIFY_BASE_URL}/datasets/{KNOWLEDGE_BASE_ID}/document/create-by-file"

    headers = {
        "Authorization": f"Bearer {DIFY_API_KEY}",
    }

    #new_data = {"indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":True},{"id":"remove_urls_emails","enabled":True}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}
    process_rule = {
        "mode": "automatic",  # 自动处理模式
        "rules": {
            "pre_processing_rules": [
                {"id": "remove_extra_spaces", "enabled": True},
                {"id": "remove_urls_emails", "enabled": True}
            ],
            "segmentation": {
                "separator": "\n",  # 分段分隔符
                "max_tokens": 1000  # 每段最大token数
            }
        }
    }
    files = {
        'file': (file_path, open(file_path, 'rb')),
    }
    data = {}
    #try:
    #    import http.client as http_client
    #except ImportError:
    #    # Python 2
    #    import httplib as http_client
    #http_client.HTTPConnection.debuglevel = 1
    #logging.basicConfig()
    #logging.getLogger().setLevel(logging.DEBUG)
    #requests_log = logging.getLogger("requests.packages.urllib3")
    #requests_log.setLevel(logging.DEBUG)
    #requests_log.propagate = True
    data["indexing_technique"] = 'high_quality'
    data["process_rule"] = process_rule

    mydata = {}
    mydata['data'] = json.dumps(data)
    session = requests.Session()
    adapter = requests.adapters.HTTPAdapter()
    #session.mount('http://', adapter)
    #session.mount('https://', adapter)
    try:
        response = session.post(url, headers=headers,files=files,data=mydata)
        print(response.text)
        response.raise_for_status()
        return response.json()
    except requests.exceptions.HTTPError as err:
        print(f"HTTP错误发生: {err}")
        print(f"响应内容: {err.response.text}")
    return None

def query_datasets():
    headers = {
        "Authorization": f"Bearer {DIFY_API_KEY}",
    }
    url = f"{DIFY_BASE_URL}/datasets"
    response = requests.get(url, headers=headers)
    print(response.json())

if __name__ == '__main__':
    result = upload_doc("1.txt",'test')
    #query_datasets()

本文章已经生成可运行项目
评论 1
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值