AI小白一枚,入门实验。
1. Pg-Hydra
颠覆数据库体验:Hydra —— 开源的列式存储 PostgreSQL-优快云博客
git clone https://github.com/hydradatabase/hydra && cd hydra
cp .env.example .env
docker compose up
psql postgres://postgres:hydra@127.0.0.1:5432
docker配置github仓库ghcr国内镜像加速_ghcr.io-优快云博客
{
"registry-mirrors": ["https://ghcr.nju.edu.cn"]
}
改一下路径,用镜像
services:
hydra:
container_name: hydra
image: ghcr.nju.edu.cn/hydradatabase/hydra:latest
ports:
- ${POSTGRES_PORT}:5432
environment:
POSTGRES_USER: ${POSTGRES_USER}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
volumes:
- ./volumes/db:/var/lib/postgresql/data
- ./files/postgres/postgresql.conf:/etc/postgresql/postgresql.conf
command: postgres -c 'config_file=/etc/postgresql/postgresql.conf'
2. conda 环境
- 安装conda
- 创建自定义虚拟环境
- <changeme>\anaconda3\condabin\conda_hook 文件中 conda activate XXXenv
3. jupyter lab
- 安装jupyter lab
- 快捷方式属性修改为:%windir%\system32\cmd.exe /c <changeme>\anaconda3\Scripts\jupyter.exe lab D:\<yourworkspace>
4. llamaindex / ollama
- 安装ollama, llama-index; 下载模型(随意,这里用的qwen2.5-coder,本地实验结果是代码能力确认比普通的chat要好一些)
- RAG实验数据就是网上找的DDL,
-- ===========================
-- create table ddl(s)
-- ===========================
CREATE TABLE NATION ( N_NATIONKEY INTEGER NOT NULL,
N_NAME CHAR(25) NOT NULL,
N_REGIONKEY INTEGER NOT NULL,
N_COMMENT VARCHAR(152));
CREATE TABLE REGION ( R_REGIONKEY INTEGER NOT NULL,
R_NAME CHAR(25) NOT NULL,
R_COMMENT VARCHAR(152));
CREATE TABLE PART ( P_PARTKEY INTEGER NOT NULL,
P_NAME VARCHAR(55) NOT NULL,
P_MFGR CHAR(25) NOT NULL,
P_BRAND CHAR(10) NOT NULL,
P_TYPE VARCHAR(25) NOT NULL,
P_SIZE INTEGER NOT NULL,
P_CONTAINER CHAR(10) NOT NULL,
P_RETAILPRICE DECIMAL(15,2) NOT NULL,
P_COMMENT VARCHAR(23) NOT NULL );
CREATE TABLE SUPPLIER ( S_SUPPKEY INTEGER NOT NULL,
S_NAME CHAR(25) NOT NULL,
S_ADDRESS VARCHAR(40) NOT NULL,
S_NATIONKEY INTEGER NOT NULL,
S_PHONE CHAR(15) NOT NULL,
S_ACCTBAL DECIMAL(15,2) NOT NULL,
S_COMMENT VARCHAR(101) NOT NULL);
CREATE TABLE PARTSUPP ( PS_PARTKEY INTEGER NOT NULL,
PS_SUPPKEY INTEGER NOT NULL,
PS_AVAILQTY INTEGER NOT NULL,
PS_SUPPLYCOST DECIMAL(15,2) NOT NULL,
PS_COMMENT VARCHAR(199) NOT NULL );
CREATE TABLE CUSTOMER ( C_CUSTKEY INTEGER NOT NULL,
C_NAME VARCHAR(25) NOT NULL,
C_ADDRESS VARCHAR(40) NOT NULL,
C_NATIONKEY INTEGER NOT NULL,
C_PHONE CHAR(15) NOT NULL,
C_ACCTBAL DECIMAL(15,2) NOT NULL,
C_MKTSEGMENT CHAR(10) NOT NULL,
C_COMMENT VARCHAR(117) NOT NULL);
CREATE TABLE ORDERS ( O_ORDERKEY INTEGER NOT NULL,
O_CUSTKEY INTEGER NOT NULL,
O_ORDERSTATUS CHAR(1) NOT NULL,
O_TOTALPRICE DECIMAL(15,2) NOT NULL,
O_ORDERDATE DATE NOT NULL,
O_ORDERPRIORITY CHAR(15) NOT NULL,
O_CLERK CHAR(15) NOT NULL,
O_SHIPPRIORITY INTEGER NOT NULL,
O_COMMENT VARCHAR(79) NOT NULL);
CREATE TABLE LINEITEM ( L_ORDERKEY INTEGER NOT NULL,
L_PARTKEY INTEGER NOT NULL,
L_SUPPKEY INTEGER NOT NULL,
L_LINENUMBER INTEGER NOT NULL,
L_QUANTITY DECIMAL(15,2) NOT NULL,
L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL,
L_DISCOUNT DECIMAL(15,2) NOT NULL,
L_TAX DECIMAL(15,2) NOT NULL,
L_RETURNFLAG CHAR(1) NOT NULL,
L_LINESTATUS CHAR(1) NOT NULL,
L_SHIPDATE DATE NOT NULL,
L_COMMITDATE DATE NOT NULL,
L_RECEIPTDATE DATE NOT NULL,
L_SHIPINSTRUCT CHAR(25) NOT NULL,
L_SHIPMODE CHAR(10) NOT NULL,
L_COMMENT VARCHAR(44) NOT NULL);
- RAG
实验数据代码:
```python
from dotenv import load_dotenv
load_dotenv()
from llama_index.core.agent import ReActAgent
from llama_index.llms.ollama import Ollama
from llama_index.core.tools import FunctionTool
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, Settings
from llama_index.core.tools import QueryEngineTool
```
```python
_model='qwen2.5-coder:3b'
```
```python
# settings
Settings.llm = Ollama(model=_model, request_timeout=120.0)
# function tools
def multiply(a: float, b: float) -> float:
"""Multiply two numbers and returns the product"""
return a * b
multiply_tool = FunctionTool.from_defaults(fn=multiply)
def add(a: float, b: float) -> float:
"""Add two numbers and returns the sum"""
return a + b
add_tool = FunctionTool.from_defaults(fn=add)
```
```python
from llama_index.embeddings.ollama import OllamaEmbedding
ollama_embedding = OllamaEmbedding(
model_name=_model,
base_url="http://localhost:11434",
ollama_additional_kwargs={"mirostat": 0},
)
Settings.embed_model = ollama_embedding
Settings.chunk_size = 512
# rag pipeline
documents = SimpleDirectoryReader("./sql").load_data()
index = VectorStoreIndex.from_documents(documents)
query_engine = index.as_query_engine()
```
```python
# rag pipeline as a tool
sql_schema_info_tool = QueryEngineTool.from_defaults(
query_engine,
name="TPC_H_table_schema_and_query_sql",
description="tpc-h table schema DDL(s) and Query SQL(s) to perform tpc-h performance test on postgre database"
)
```
```python
agent = ReActAgent.from_tools([sql_schema_info_tool], verbose=True)
response = agent.chat('''you are database expert.
please answer the quetions or reply in below format: CREATE TABLE TBL (
PS_PARTKEY INTEGER NOT NULL,
PS_SUPPLYCOST DECIMAL(15,2) NOT NULL,
PS_COMMENT VARCHAR(199) NOT NULL );
Now the tasks are:
1. what's CUSTOMER's table schema?
2. give CUSTOMER's full create table DDL.
3. give postgre sql to query total number count of customers from CUSTOMER table. ''')
print(response)
```
运行结果(还是可以让人满意的):
> Running step 0e071386-2c0e-4230-92a7-6634a0ce02ef. Step input: you are database expert.
please answer the quetions or reply in below format: CREATE TABLE PARTSUPP ( PS_PARTKEY INTEGER NOT NULL,
PS_SUPPKEY INTEGER NOT NULL,
PS_AVAILQTY INTEGER NOT NULL,