pandas 输出 excel 文件流 StringIO or BytesIO

本文介绍如何使用Pandas库将DataFrame数据写入内存中的Excel文件,通过设置engine参数选择Excel格式,利用BytesIO处理二进制数据,并通过HttpResponse正确返回文件流,适用于Django REST框架。
部署运行你感兴趣的模型镜像
# Writing Excel files to memory

# Pandas supports writing Excel files to buffer-like objects such as StringIO or BytesIO using ExcelWriter.

# Safe import for either Python 2.x or 3.x
try:
    from io import BytesIO
except ImportError:
    from cStringIO import StringIO as BytesIO

bio = BytesIO()

# By setting the 'engine' in the ExcelWriter constructor.
writer = pd.ExcelWriter(bio, engine='xlsxwriter')
# 这里df就是你数据的pandas.DataFrame的数据
df.to_excel(writer, sheet_name='Sheet1')

# Save the workbook
writer.save()

# Seek to the beginning and read to copy the workbook to a variable in memory
bio.seek(0)
workbook = bio.read()

"""engine is optional but recommended. Setting the engine determines the version of
 workbook produced. Setting engine='xlrd' will produce an Excel 2003-format workbook
  (xls). Using either 'openpyxl' or 'xlsxwriter' will produce an Excel 2007-format
   workbook (xlsx). If omitted, an Excel 2007-formatted workbook is produced. """

上面摘自:pandas官方文档-writing-excel-files-to-memory

根据官方文档就可以直接得到一个文件流在内存中,然后只需要返回Response就好了。
我这里使用的是Django rest framework 框架。

非常坑的一点是,我之前一直使用的rest_framework.response.Response,直接将文件流传过去会一直报错,提示utf-8编码的问题,但是其实并不是编码的问题,而是Response接收的是data参数,而我们返回的是二进制的数据。所以,我们应该使用django.http.HttpResponse这个,他接收的是content,一个二进制的bytestring。
所以HttpResponse(workbook, content_type=content_type)这样就可以了。

然后就是设置content-type,这样浏览器就知道我们是以什么形式给他的。
我们office excel 2007的content-type是application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
具体可以参考 HTTP content-type 对照表 , 当然没有的话可以自己搜索。

还有一点就是Content-Disposition参数:
attachment — 作为附件下载
inline — 在线打开
具体使用如:
response['Content-Disposition'] = 'attachment;filename=文件名.txt'
response['Content-Disposition'] = 'inline;filename=文件名.txt'
这里还有一个坑就是如果你把文件名当做参数传进去,使用 % 或者 format 来格式化。一旦这个文件名中间包括空格,在http响应的时候就是Content-Disposition:attachment;filename=i love you.txt 然后系统会自动将这个空格将后面的舍弃掉。这样你再拿到这个文件的时候就只有第一个空格之前的filename,没有后缀名,都无法直接打开。
可以的办法就是使用转义符,使用双引号Content-Disposition:attachment;filename=\"i love you.txt\",这样你就可以将文件名使用格式化放进去了。

您可能感兴趣的与本文相关的镜像

Python3.8

Python3.8

Conda
Python

Python 是一种高级、解释型、通用的编程语言,以其简洁易读的语法而闻名,适用于广泛的应用,包括Web开发、数据分析、人工智能和自动化脚本

import re, json, io from collections import deque, defaultdict def parse_markdown_table(md: str): # 简单的 Markdown 表格解析器(不依赖 pandas) lines = [ln.strip() for ln in md.splitlines() if ln.strip()] # 找到首个以 | 开头的行作为 header table_lines = [ln for ln in lines if ln.startswith("|")] if not table_lines or len(table_lines) < 2: return [] header = [h.strip() for h in table_lines[0].strip("|").split("|")] # data starts after the separator (the second line) rows = [] for r in table_lines[2:]: cols = [c.strip() for c in r.strip("|").split("|")] if len(cols) != len(header): # 如果列数不匹配,尝试跳过 continue rows.append(dict(zip(header, cols))) return rows def parse_csv_text(csv_text: str): import csv f = io.StringIO(csv_text) reader = csv.DictReader(f) return [row for row in reader] def normalize_rows(rows): out = [] newid = 1 for r in rows: # 尝试多种字段名(兼容中文/英文) id_ = (r.get("id") or r.get("ID") or r.get("编号") or "").strip() name = (r.get("name") or r.get("task") or r.get("名称") or "").strip() dur = (r.get("duration_days") or r.get("duration") or r.get("工期") or "").strip() preds = (r.get("predecessors") or r.get("pre") or r.get("前置") or "").strip() if not id_: id_ = f"T{newid}" newid += 1 # parse duration int try: dur_i = int(re.sub(r"[^\d]", "", dur)) if dur != "" else 1 except: dur_i = 1 pred_list = [p.strip() for p in re.split(r"[,,;]", preds) if p.strip()] out.append({ "id": id_, "name": name or id_, "duration": dur_i, "predecessors": pred_list }) return out def compute_cpm(tasks): # tasks: list of {id,name,duration,predecessors} id_map = {t["id"]: t for t in tasks} succ = defaultdict(list) indeg = defaultdict(int) for t in tasks: indeg.setdefault(t["id"], 0) for t in tasks: for p in t["predecessors"]: if p not in id_map: # treat missing predecessor as external -> ignore but we keep note later continue succ[p].append(t["id"]) indeg[t["id"]] += 1 # topo order q = deque([tid for tid, d in indeg.items() if d == 0]) topo = [] while q: u = q.popleft() topo.append(u) for v in succ.get(u, []): indeg[v] -= 1 if indeg[v] == 0: q.append(v) if len(topo) != len(tasks): # cycle detected return None, "cycle_detected" # forward pass es = {tid:0 for tid in id_map} ef = {} for u in topo: dur = id_map[u]["duration"] ef[u] = es[u] + dur for v in succ.get(u, []): es[v] = max(es.get(v,0), ef[u]) project_finish = max(ef.values()) # backward pass lf = {tid: project_finish for tid in id_map} ls = {} for u in reversed(topo): dur = id_map[u]["duration"] lf[u] = min(lf.get(u, project_finish), lf.get(u, project_finish)) ls[u] = lf[u] - dur for p in id_map[u]["predecessors"]: if p in id_map: lf[p] = min(lf.get(p, project_finish), ls[u]) # compute slack & mark critical enriched = [] for tid, t in id_map.items(): es_v = es.get(tid, 0) ef_v = ef.get(tid, es_v + t["duration"]) ls_v = ls.get(tid, es_v) lf_v = lf.get(tid, ef_v) slack = ls_v - es_v enriched.append({ "id": tid, "name": t["name"], "duration": t["duration"], "predecessors": t["predecessors"], "es": es_v, "ef": ef_v, "ls": ls_v, "lf": lf_v, "slack": slack, "is_critical": (slack == 0) }) return {"tasks_enriched": enriched, "project_duration": project_finish}, None def sanitize_id(tid): return re.sub(r"[^0-9A-Za-z_]", "_", tid) def make_mermaid(enriched_tasks): lines = ["graph TD"] id_to_node = {} for t in enriched_tasks: nid = "N_" + sanitize_id(t["id"]) # label avoid using <br>,避免已知渲染 bug;用短标签 label = f'{t["name"]} ({t["duration"]}d)' lines.append(f'{nid}["{label}"]') id_to_node[t["id"]] = nid # edges for t in enriched_tasks: from_nodes = t["predecessors"] for p in from_nodes: if p in id_to_node: lines.append(f'{id_to_node[p]} --> {id_to_node[t["id"]]}') # class definitions for critical criticals = [ " ".join([id_to_node[t["id"]] for t in enriched_tasks if t["is_critical"]]) ] if any(t["is_critical"] for t in enriched_tasks): # classDef and class lines (mermaid supports class and classDef) lines.append("classDef critical fill:#ffdddd,stroke:#ff4444,stroke-width:2px;") for t in enriched_tasks: if t["is_critical"]: lines.append(f'class {id_to_node[t["id"]]} critical') return "\n".join(lines) # ---------------- 主逻辑 ---------------- parse_error = None try: rows = [] # 优先 raw_json if 'raw_json' in globals() and raw_json: rows = json.loads(raw_json) elif 'doc_text' in globals() and doc_text: # 文档文本 -> 先尝试解析 markdown table,再尝试解析 csv 格式 md_rows = parse_markdown_table(doc_text) if md_rows: rows = md_rows else: # 尝试把整段当作 CSV try: rows = parse_csv_text(doc_text) except Exception: rows = [] elif 'file_url' in globals() and file_url: # 从 URL 下载并解析(如果是 CSV 我们直接按 CSV 处理;如果是 xlsx,试用 pandas) import requests, os r = requests.get(file_url, timeout=20) fname = os.path.basename(file_url.split("?")[0]) if fname.lower().endswith(".csv"): rows = parse_csv_text(r.text) elif fname.lower().endswith((".xls",".xlsx")): try: import pandas as pd df = pd.read_excel(io.BytesIO(r.content)) rows = df.to_dict(orient='records') except Exception as e: parse_error = f"xlsx parse fail: {e}" rows = [] else: parse_error = "no input" tasks = normalize_rows(rows) cpm_res, err = compute_cpm(tasks) if err: # 返回错误信息(cycle) mermaid_text = "" enriched = [] parse_error = err else: enriched = cpm_res["tasks_enriched"] project_duration = cpm_res["project_duration"] mermaid_text = make_mermaid(enriched) parse_error = parse_error or None except Exception as e: enriched = [] mermaid_text = "" project_duration = 0 parse_error = str(e) # 最终返回 _out = { "tasks_enriched": enriched, "mermaid": mermaid_text, "project_duration": project_duration if 'project_duration' in globals() else None, "parse_error": parse_error } # Dify Code 节点要求返回 JSON-able object _out 详细解释以上代码
最新发布
10-08
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

天马行空波

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值