一、原始需求分析
DuckDB发布1.0版本后,貌似性能很强悍的样子,它是一个单机的嵌入式数据路,支持SQL语言操作数据,所以现在的需求是:基于DuckDB实现一个数据关联碰撞分析的功能。
1、定义一套标准的JSON格式规范,让使用方按规范传入JSON实体,方便后续提供API接口让前端页面调用;
2、根据JSON格式规范,采用JAVA语言开发解释代码,生成最终的SQL查询语句;
3、在DuckDB执行最终的SQL查询语句,返回结果;
根据需要内容,首先找一下有没现成的JSON转SQL的规范和开源框架,发现还没有符合要求的,所以需要定义JSON格式规范。
总体流程为:

二、JSON格式规范设计






以上,大模型已经按提示要求,完成了JSON格式规范的定义,其实它还把规范的必填、可选、描述、每个结构的示例都输出了,只是这里没有粘贴上来。结果如下:
JSON格式规范文档
1、顶层结构

2、示例JSON数据
{
"select": ["users.name", "users.email", {"function": "COUNT", "field": "*", "alias": "post_count"}],
"from": "users",
"joins": [
{
"type": "INNER JOIN",
"table": "posts",
"on": {
"conditions": [
{"left": "users.id", "operator": "=", "right": "posts.user_id"},
{"left": "users.status", "operator": "=", "right": "posts.status"}
],
"logic": "AND"
}
}
],
"where": {
"conditions": [
{
"logic": "AND",
"conditions": [
{"field": "users.age", "operator": ">", "value": 21},
{"field": "users.status", "operator": "=", "value": "active"}
]
},
{
"logic": "OR",
"conditions": [
{"field": "users.name", "operator": "=", "value": "Alice"},
{"field": "users.name", "operator": "=", "value": "Carol"}
]
}
],
"logic": "AND"
},
"group_by": ["users.name", "users.email"],
"having": {
"conditions": [
{"field": "post_count", "operator": ">", "value": 1}
], "logic": "AND" },
"order_by": [
{"field": "users.name", "direction": "ASC"}
],
"limit": 10,
"offset": 0
}
三、JAVA代码实现JSON转SQL


一开始大模型使用org.json库来解释JSON,它可以快速按我的指示,使用fastjson库来实现。
四、JAVA代码实现DuckDB读写



1、完整的Java代码
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.Statement;
public class JSONToSQL {
public static void main(String[] args) {
try {
// 示例JSON数据
String jsonString = "{\n" +
" \"select\": [\"users.name\", \"users.email\", {\"function\": \"COUNT\", \"field\": \"*\", \"alias\": \"post_count\"}],\n" +
" \"from\": \"users\",\n" +
" \"joins\": [\n" +
" {\n" +
" \"type\": \"INNER JOIN\",\n" +
" \"table\": \"posts\",\n" +
" \"on\": {\n" +
" \"conditions\": [\n" +
" {\"left\": \"users.id\", \"operator\": \"=\", \"right\": \"posts.user_id\"},\n" +
" {\"left\": \"users.status\", \"operator\": \"=\", \"right\": \"posts.status\"}\n" +
" ],\n" +
" \"logic\": \"AND\"\n" +
" }\n" +
" }\n" +
" ],\n" +
" \"where\": {\n" +
" \"conditions\": [\n" +
" {\n" +
" \"logic\": \"AND\",\n" +
" \"conditions\": [\n" +
" {\"field\": \"users.age\", \"operator\": \">\", \"value\": 21},\n" +
" {\"field\": \"users.status\", \"operator\": \"=\", \"value\": \"active\"}\n" +
" ]\n" +
" },\n" +
" {\n" +
" \"logic\": \"OR\",\n" +
" \"conditions\": [\n" +
" {\"field\": \"users.name\", \"operator\": \"=\", \"value\": \"Alice\"},\n" +
" {\"field\": \"users.name\", \"operator\": \"=\", \"value\": \"Carol\"}\n" +
" ]\n" +
" }\n" +
" ],\n" +
" \"logic\": \"AND\"\n" +
" },\n" +
" \"group_by\": [\"users.name\", \"users.email\"],\n" +
" \"having\": {\n" +
" \"conditions\": [\n" +
" {\"field\": \"post_count\", \"operator\": \">\", \"value\": 1}\n" +
" ],\n" +
" \"logic\": \"AND\"\n" +
" },\n" +
" \"order_by\": [\n" +
" {\"field\": \"users.name\", \"direction\": \"ASC\"}\n" +
" ],\n" +
" \"limit\": 10,\n" +
" \"offset\": 0\n" +
"}";
// 解析JSON并生成SQL查询
JSONObject json = JSONObject.parseObject(jsonString);
String sqlQuery = jsonToSQL(json);
System.out.println("Generated SQL Query: " + sqlQuery);
// 创建DuckDB连接
Connection conn = DriverManager.getConnection("jdbc:duckdb:");
Statement stmt = conn.createStatement();
// 创建示例数据表
stmt.execute("CREATE TABLE users (id INTEGER, name VARCHAR, email VARCHAR, age INTEGER, status VARCHAR);");
stmt.execute("CREATE TABLE posts (id INTEGER, user_id INTEGER, title VARCHAR, content VARCHAR, status VARCHAR);");
// 插入示例数据
stmt.execute("INSERT INTO users VALUES (1, 'Alice', 'alice@example.com', 25, 'active');");
stmt.execute("INSERT INTO users VALUES (2, 'Bob', 'bob@example.com', 30, 'inactive');");
stmt.execute("INSERT INTO users VALUES (3, 'Carol', 'carol@example.com', 22, 'active');");
stmt.execute("INSERT INTO posts VALUES (1, 1, 'Post 1', 'Content 1', 'active');");
stmt.execute("INSERT INTO posts VALUES (2, 1, 'Post 2', 'Content 2', 'active');");
stmt.execute("INSERT INTO posts VALUES (3, 2, 'Post 3', 'Content 3', 'inactive');");
stmt.execute("INSERT INTO posts VALUES (4, 3, 'Post 4', 'Content 4', 'active');");
stmt.execute("INSERT INTO posts VALUES (5, 3, 'Post 5', 'Content 5', 'active');");
// 执行生成的SQL查询
ResultSet rs = stmt.executeQuery(sqlQuery);
// 显示查询结果
while (rs.next()) {
System.out.println("Name: " + rs.getString("name"));
System.out.println("Email: " + rs.getString("email"));
System.out.println("Post Count: " + rs.getInt("post_count"));
System.out.println("----");
}
// 关闭连接
stmt.close();
conn.close();
} catch (Exception e) {
e.printStackTrace();
}
}
public static String jsonToSQL(JSONObject json) {
StringBuilder sql = new StringBuilder();
// SELECT clause
sql.append("SELECT ");
JSONArray selectArray = json.getJSONArray("select");
for (int i = 0; i < selectArray.size(); i++) {
Object item = selectArray.get(i);
if (item instanceof String) {
sql.append(item);
} else if (item instanceof JSONObject) {
JSONObject functionObj = (JSONObject) item;
sql.append(functionObj.getString("function"))
.append("(")
.append(functionObj.getString("field"))
.append(") AS ")
.append(functionObj.getString("alias"));
}
if (i < selectArray.size() - 1) {
sql.append(", ");
}
}
// FROM clause
sql.append(" FROM ").append(json.getString("from"));
// JOINS clause
if (json.containsKey("joins")) {
JSONArray joinsArray = json.getJSONArray("joins");
for (int i = 0; i < joinsArray.size(); i++) {
JSONObject joinObj = joinsArray.getJSONObject(i);
sql.append(" ").append(joinObj.getString("type"))
.append(" ").append(joinObj.getString("table"))
.append(" ON ");
JSONObject onObj = joinObj.getJSONObject("on");
JSONArray conditionsArray = onObj.getJSONArray("conditions");
String logic = onObj.getString("logic");
for (int j = 0; j < conditionsArray.size(); j++) {
JSONObject condition = conditionsArray.getJSONObject(j);
sql.append(condition.getString("left"))
.append(" ").append(condition.getString("operator"))
.append(" ").append(condition.getString("right"));
if (j < conditionsArray.size() - 1) {
sql.append(" ").append(logic).append(" ");
}
}
}
}
// WHERE clause
if (json.containsKey("where")) {
sql.append(" WHERE ");
appendConditions(sql, json.getJSONObject("where"));
}
// GROUP BY clause
if (json.containsKey("group_by")) {
sql.append(" GROUP BY ");
JSONArray groupByArray = json.getJSONArray("group_by");
for (int i = 0; i < groupByArray.size(); i++) {
sql.append(groupByArray.getString(i));
if (i < groupByArray.size() - 1) {
sql.append(", ");
}
}
}
// HAVING clause
if (json.containsKey("having")) {
sql.append(" HAVING ");
appendConditions(sql, json.getJSONObject("having"));
}
// ORDER BY clause
if (json.containsKey("order_by")) {
sql.append(" ORDER BY ");
JSONArray orderByArray = json.getJSONArray("order_by");
for (int i = 0; i < orderByArray.size(); i++) {
JSONObject orderBy = orderByArray.getJSONObject(i);
sql.append(orderBy.getString("field"))
.append(" ").append(orderBy.getString("direction"));
if (i < orderByArray.size() - 1) {
sql.append(", ");
}
}
}
// LIMIT clause
if (json.containsKey("limit")) {
sql.append(" LIMIT ").append(json.getIntValue("limit"));
}
// OFFSET clause
if (json.containsKey("offset")) {
sql.append(" OFFSET ").append(json.getIntValue("offset"));
}
sql.append(";");
return sql.toString();
}
private static void appendConditions(StringBuilder sql,
JSONObject conditionObj) { JSONArray conditionsArray = conditionObj.getJSONArray("conditions");
String logic = conditionObj.getString("logic");
for (int i = 0; i < conditionsArray.size(); i++) {
Object item = conditionsArray.get(i);
if (item instanceof JSONObject && ((JSONObject) item).containsKey("conditions")) {
sql.append("(");
appendConditions(sql, (JSONObject) item);
sql.append(")");
} else {
JSONObject condition = (JSONObject) item;
sql.append(condition.getString("field"))
.append(" ").append(condition.getString("operator"))
.append(" ");
Object value = condition.get("value");
if (value instanceof String) {
sql.append("'").append(value).append("'");
} else {
sql.append(value);
}
}
if (i < conditionsArray.size() - 1) {
sql.append(" ").append(logic).append(" ");
}
}
}
}
五、单元测试


1、单元测试代码
import com.alibaba.fastjson.JSONObject;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.Statement;
import static org.junit.Assert.*;
public class JSONToSQLTest {
private Connection conn;
private Statement stmt;
@Before
public void setUp() throws Exception {
// 创建DuckDB连接
conn = DriverManager.getConnection("jdbc:duckdb:");
stmt = conn.createStatement();
// 创建示例数据表
stmt.execute("CREATE TABLE users (id INTEGER, name VARCHAR, email VARCHAR, age INTEGER, status VARCHAR);");
stmt.execute("CREATE TABLE posts (id INTEGER, user_id INTEGER, title VARCHAR, content VARCHAR, status VARCHAR);");
// 插入示例数据
stmt.execute("INSERT INTO users VALUES (1, 'Alice', 'alice@example.com', 25, 'active');");
stmt.execute("INSERT INTO users VALUES (2, 'Bob', 'bob@example.com', 30, 'inactive');");
stmt.execute("INSERT INTO users VALUES (3, 'Carol', 'carol@example.com', 22, 'active');");
stmt.execute("INSERT INTO posts VALUES (1, 1, 'Post 1', 'Content 1', 'active');");
stmt.execute("INSERT INTO posts VALUES (2, 1, 'Post 2', 'Content 2', 'active');");
stmt.execute("INSERT INTO posts VALUES (3, 2, 'Post 3', 'Content 3', 'inactive');");
stmt.execute("INSERT INTO posts VALUES (4, 3, 'Post 4', 'Content 4', 'active');");
stmt.execute("INSERT INTO posts VALUES (5, 3, 'Post 5', 'Content 5', 'active');");
}
@After
public void tearDown() throws Exception {
stmt.execute("DROP TABLE users;");
stmt.execute("DROP TABLE posts;");
stmt.close();
conn.close();
}
@Test
public void testJsonToSQL() throws Exception {
// 示例JSON数据
String jsonString = "{\n" +
" \"select\": [\"users.name\", \"users.email\", {\"function\": \"COUNT\", \"field\": \"*\", \"alias\": \"post_count\"}],\n" +
" \"from\": \"users\",\n" +
" \"joins\": [\n" +
" {\n" +
" \"type\": \"INNER JOIN\",\n" +
" \"table\": \"posts\",\n" +
" \"on\": {\n" +
" \"conditions\": [\n" +
" {\"left\": \"users.id\", \"operator\": \"=\", \"right\": \"posts.user_id\"},\n" +
" {\"left\": \"users.status\", \"operator\": \"=\", \"right\": \"posts.status\"}\n" +
" ],\n" +
" \"logic\": \"AND\"\n" +
" }\n" +
" }\n" +
" ],\n" +
" \"where\": {\n" +
" \"conditions\": [\n" +
" {\n" +
" \"logic\": \"AND\",\n" +
" \"conditions\": [\n" +
" {\"field\": \"users.age\", \"operator\": \">\", \"value\": 21},\n" +
" {\"field\": \"users.status\", \"operator\": \"=\", \"value\": \"active\"}\n" +
" ]\n" +
" },\n" +
" {\n" +
" \"logic\": \"OR\",\n" +
" \"conditions\": [\n" +
" {\"field\": \"users.name\", \"operator\": \"=\", \"value\": \"Alice\"},\n" +
" {\"field\": \"users.name\", \"operator\": \"=\", \"value\": \"Carol\"}\n" +
" ]\n" +
" }\n" +
" ],\n" +
" \"logic\": \"AND\"\n" +
" },\n" +
" \"group_by\": [\"users.name\", \"users.email\"],\n" +
" \"having\": {\n" +
" \"conditions\": [\n" +
" {\"field\": \"post_count\", \"operator\": \">\", \"value\": 1}\n" +
" ],\n" +
" \"logic\": \"AND\"\n" +
" },\n" +
" \"order_by\": [\n" +
" {\"field\": \"users.name\", \"direction\": \"ASC\"}\n" +
" ],\n" +
" \"limit\": 10,\n" +
" \"offset\": 0\n" +
"}";
JSONObject json = JSONObject.parseObject(jsonString);
String sqlQuery = JSONToSQL.jsonToSQL(json);
System.out.println("Generated SQL Query: " + sqlQuery);
// 验证生成的SQL语句是否正确
String expectedSQL = "SELECT users.name, users.email, COUNT(*) AS post_count FROM users " +
"INNER JOIN posts ON users.id = posts.user_id AND users.status = posts.status " +
"WHERE (users.age > 21 AND users.status = 'active') " +
"AND (users.name = 'Alice' OR users.name = 'Carol') " +
"GROUP BY users.name, users.email " +
"HAVING post_count > 1 " +
"ORDER BY users.name ASC " +
"LIMIT 10 OFFSET 0;";
assertEquals(expectedSQL, sqlQuery);
// 执行生成的SQL查询
ResultSet rs = stmt.executeQuery(sqlQuery);
// 验证查询结果
assertTrue(rs.next());
assertEquals("Alice", rs.getString("name"));
assertEquals("alice@example.com", rs.getString("email"));
assertEquals(2, rs.getInt("post_count"));
assertTrue(rs.next());
assertEquals("Carol", rs.getString("name"));
assertEquals("carol@example.com", rs.getString("email"));
assertEquals(2, rs.getInt("post_count"));
assertFalse(rs.next());
}
}
六、在Idea工具运行代码


将代码复制到Idea,直接运行成功。绝大部分程序员应该是做不到的。
我们也可以进一步让大模型优化一些细节,比如校验JSON参数是否符合规范、生成更大的数据集进行测试、生成更复杂的测试用例等等;
如何学习AI大模型?
作为一名热心肠的互联网老兵,我决定把宝贵的AI知识分享给大家。 至于能学习到多少就看你的学习毅力和能力了 。我已将重要的AI大模型资料包括AI大模型入门学习思维导图、精品AI大模型学习书籍手册、视频教程、实战学习等录播视频免费分享出来。
这份完整版的大模型 AI 学习资料已经上传优快云,朋友们如果需要可以微信扫描下方优快云官方认证二维码免费领取【保证100%免费】
一、全套AGI大模型学习路线
AI大模型时代的学习之旅:从基础到前沿,掌握人工智能的核心技能!

二、640套AI大模型报告合集
这套包含640份报告的合集,涵盖了AI大模型的理论研究、技术实现、行业应用等多个方面。无论您是科研人员、工程师,还是对AI大模型感兴趣的爱好者,这套报告合集都将为您提供宝贵的信息和启示。

三、AI大模型经典PDF籍
随着人工智能技术的飞速发展,AI大模型已经成为了当今科技领域的一大热点。这些大型预训练模型,如GPT-3、BERT、XLNet等,以其强大的语言理解和生成能力,正在改变我们对人工智能的认识。 那以下这些PDF籍就是非常不错的学习资源。

四、AI大模型商业化落地方案

作为普通人,入局大模型时代需要持续学习和实践,不断提高自己的技能和认知水平,同时也需要有责任感和伦理意识,为人工智能的健康发展贡献力量。
1441

被折叠的 条评论
为什么被折叠?



