2.3. Catalog_Exercise 2

本文介绍了一个简单的数据库目录类实现,该类用于跟踪数据库中所有可用的表及其相关模式。文章详细解释了如何添加新表、获取表信息以及读取表内容,并提供了完整的代码示例。

The catalog (class Catalog in SimpleDB) consists of a list of the tables and schemas of the tables that are currently in the database. You will need to support the ability to add a new table, as well as getting information about a particular table. Associated with each table is a TupleDesc object that allows operators to determine the types and number of fields in a table.
The global catalog is a single instance of Catalog that is allocated for the entire SimpleDB process. The global catalog can be retrieved via the method Database.getCatalog(), and the same goes for the global buffer pool (using Database.getBufferPool()).

Exercise 2. Implement the skeleton methods in:
src/java/simpledb/Catalog.java
At this point, your code should pass the unit tests in CatalogTest.

这会是写好目录,这个类里头有所有的表和表的数据。代码如下

package simpledb;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.*;

/**
 * The Catalog keeps track of all available tables in the database and their
 * associated schemas.
 * For now, this is a stub catalog that must be populated with tables by a
 * user program before it can be used -- eventually, this should be converted
 * to a catalog that reads a catalog table from disk.
 */

public class Catalog {

    /**
     * Constructor.
     * Creates a new, empty catalog.
     */
    private List<table> tables ;
    static class table{
        public DbFile dbFile;
        public String name;
        public String pkeyField;
        public int tableId;

    }
    public Catalog() {
        // some code goes here
        tables = new ArrayList<>();
    }

    /**
     * Add a new table to the catalog.
     * This table's contents are stored in the specified DbFile.
     * @param file the contents of the table to add;  file.getId() is the identfier of
     *    this file/tupledesc param for the calls getTupleDesc and getFile
     * @param name the name of the table -- may be an empty string.  May not be null.  If a name
     * @param pkeyField the name of the primary key field
     * conflict exists, use the last table to be added as the table for a given name.
     */
    public void addTable(DbFile file, String name, String pkeyField) {
        // some code goes here
        table newtable = new table();
        newtable.dbFile = file;
        newtable.name = name;
        newtable.pkeyField = pkeyField;
        newtable.tableId = file.getId();
        for (int i=0;i<tables.size();i++){
            table t = tables.get(i);
            if (t.name==name){
                newtable.tableId = t.tableId;
                tables.set(i,newtable);
                return;
            }
        }
        this.tables.add(newtable);
        System.out.println(newtable);
    }

    public void addTable(DbFile file, String name) {
        addTable(file, name, "");
    }

    /**
     * Add a new table to the catalog.
     * This table has tuples formatted using the specified TupleDesc and its
     * contents are stored in the specified DbFile.
     * @param file the contents of the table to add;  file.getId() is the identfier of
     *    this file/tupledesc param for the calls getTupleDesc and getFile
     */
    public void addTable(DbFile file) {
        addTable(file, (UUID.randomUUID()).toString());
    }

    /**
     * Return the id of the table with a specified name,
     * @throws NoSuchElementException if the table doesn't exist
     */
    public int getTableId(String name) throws NoSuchElementException {
        // some code goes here
        for (table t:tables){
            if (t.name==name){
                return t.tableId;
            }
        }
        throw new NoSuchElementException();
    }

    /**
     * Returns the tuple descriptor (schema) of the specified table
     * @param tableid The id of the table, as specified by the DbFile.getId()
     *     function passed to addTable
     * @throws NoSuchElementException if the table doesn't exist
     */
    public TupleDesc getTupleDesc(int tableid) throws NoSuchElementException {
        // some code goes here
        for (table t :tables){
            if (t.tableId==tableid){
                return t.dbFile.getTupleDesc();
            }
        }
        return null;
    }

    /**
     * Returns the DbFile that can be used to read the contents of the
     * specified table.
     * @param tableid The id of the table, as specified by the DbFile.getId()
     *     function passed to addTable
     */
    public DbFile getDbFile(int tableid) throws NoSuchElementException {
        // some code goes here
        for (table t :tables){
            if (t.tableId==tableid){
                return t.dbFile;
            }
        }
        return null;
    }

    public String getPrimaryKey(int tableid) {
        // some code goes here
        for (table t :tables){
            if (t.tableId==tableid){
                return t.pkeyField;
            }
        }
        return null;
    }

    public Iterator<Integer> tableIdIterator() {
        // some code goes here
        return  new Iterator<Integer>() {
            private int index=0;
            @Override
            public boolean hasNext() {
                return index<tables.size();
            }

            @Override
            public Integer next() {
                int count = index;
                index++;
                return tables.get(count).tableId;
            }
        };
    }

    public String getTableName(int id) {
        // some code goes here
        for (table t :tables){
            if (t.tableId==id){
                return t.name;
            }
        }
        return null;
    }

    /** Delete all tables from the catalog */
    public void clear() {
        // some code goes here
        tables = new ArrayList<>();
    }

    /**
     * Reads the schema from a file and creates the appropriate tables in the database.
     * @param catalogFile
     */
    public void loadSchema(String catalogFile) {
        String line = "";
        String baseFolder=new File(catalogFile).getParent();
        try {
            BufferedReader br = new BufferedReader(new FileReader(new File(catalogFile)));

            while ((line = br.readLine()) != null) {
                //assume line is of the format name (field type, field type, ...)
                String name = line.substring(0, line.indexOf("(")).trim();
                //System.out.println("TABLE NAME: " + name);
                String fields = line.substring(line.indexOf("(") + 1, line.indexOf(")")).trim();
                String[] els = fields.split(",");
                ArrayList<String> names = new ArrayList<String>();
                ArrayList<Type> types = new ArrayList<Type>();
                String primaryKey = "";
                for (String e : els) {
                    String[] els2 = e.trim().split(" ");
                    names.add(els2[0].trim());
                    if (els2[1].trim().toLowerCase().equals("int"))
                        types.add(Type.INT_TYPE);
                    else if (els2[1].trim().toLowerCase().equals("string"))
                        types.add(Type.STRING_TYPE);
                    else {
                        System.out.println("Unknown type " + els2[1]);
                        System.exit(0);
                    }
                    if (els2.length == 3) {
                        if (els2[2].trim().equals("pk"))
                            primaryKey = els2[0].trim();
                        else {
                            System.out.println("Unknown annotation " + els2[2]);
                            System.exit(0);
                        }
                    }
                }
                Type[] typeAr = types.toArray(new Type[0]);
                String[] namesAr = names.toArray(new String[0]);
                TupleDesc t = new TupleDesc(typeAr, namesAr);
                HeapFile tabHf = new HeapFile(new File(baseFolder+"/"+name + ".dat"), t);
                addTable(tabHf,name,primaryKey);
                System.out.println("Added table : " + name + " with schema " + t);
            }
        } catch (IOException e) {
            e.printStackTrace();
            System.exit(0);
        } catch (IndexOutOfBoundsException e) {
            System.out.println ("Invalid catalog entry : " + line);
            System.exit(0);
        }
    }
}

这个可以还是不可以通过测试,因为我前面的TupleDesc出了点问题,导致第一个test过不了,就是equals方法出了个问题,得补上TDItem的equals方法

public static class TDItem implements Serializable {

        private static final long serialVersionUID = 1L;

        /**
         * The type of the field
         */
        Type fieldType;

        /**
         * The name of the field
         */
        String fieldName;

        public TDItem(Type t, String n) {
            this.fieldName = n;
            this.fieldType = t;
        }

        public String toString() {
            return fieldName + "(" + fieldType + ")";
        }

        @Override
        public boolean equals(Object obj) {
            if(obj==null)return false;
            if (obj==this)return true;
            if (getClass() !=obj.getClass()){
                return false;
            }
            TDItem tdItem = (TDItem) obj;

            return tdItem.fieldName.equals(this.fieldName)&&tdItem.fieldType.equals(this.fieldType);
        }
    }

这样三个测试都可以过了。

感谢补充!你刚才说明: > “正常是有的” —— 表示这些被注释的字段其实是 **需要输出的**,只是在测试时临时注释掉了。 这意味着我们**不能删除 `CG` 和 `CGC` 的 JOIN**,而必须保留并优化它们。之前的建议中移除了这两个表的连接是错误的前提,现在我们要重新调整策略,在 **保证正确输出的前提下进行性能优化**--- ## ✅ 目标 在以下前提下提升 SQL 性能: - 必须返回配置组信息(`CG.GROUP_NAME`) - 必须返回选中的目录项(`CGC.CATALOG_NO`, `DESCRIPTION`, `QTY` 等) - 当前查询因多层 LEFT JOIN 关联大表导致极慢 --- ## 🔍 问题根源分析 你现在有如下结构: ```sql LEFT JOIN TB_SVCEPOP_CONFIGURATION_DATA D ON ... LEFT JOIN TB_SVCEPOP_CONFIGURATION_GROUP CG ON CG.CONFIGURATION_ID = D.ID AND CG.DEL_FLAG = &#39;0&#39; LEFT JOIN TB_SVCEPOP_CONFIGURATION_GROUP_CATALOG CGC ON CGC.CONFIGURATION_GROUP_ID = CG.ID AND CGC.IS_SELECTED = &#39;0&#39; ``` ### ❌ 导致慢的原因: 1. **一对多 → 多对多膨胀** - 一个 `D.ID` 可能对应多个 `CG`(配置组) - 每个 `CG` 又可能对应多个 `CGC` 记录且 `IS_SELECTED=&#39;0&#39;` - 最终结果集行数急剧膨胀(笛卡尔积效应),即使你只想要“一行主数据” 2. **缺乏索引支持** - `CG(CONFIGURATION_ID, DEL_FLAG)` 是否存在? - `CGC(CONFIGURATION_GROUP_ID, IS_SELECTED)` 是否存在? 3. **未聚合或去重,数据库要做大量工作** 4. **最终 SELECT 中没有使用聚合函数,但实际业务可能是“取一条代表记录”或“合并所有 Catalog** --- ## ✅ 解决方案:根据业务语义优化 JOIN 方式 你需要先明确一个问题: > 💬 **每个主单据(B.MASTER_NO)应该对应多少条 CG/CGC 数据?** ### ✅ 场景一:每笔订单只有一个有效配置组和其下的多个 Catalog 项(最常见) 👉 你需要的是:**列出该订单下所有被选中的 Catalog 条目(明细级扩展)** #### ✔️ 正确做法:允许结果膨胀(即一行变多行),但确保高效 JOIN ```sql -- 示例片段(整合进主查询) SELECT ... CG.GROUP_NAME AS GROUP_NAME, &#39;&#39; AS IB_QTY, -- 若无意义可保持为空 CGC.CATALOG_NO AS CATALOG_NAME, CGC.CATALOG_NAME AS DESCRIPTION, CGC.QTY AS Qty, CGC.QTY AS TOTAL_QTY ... FROM TB_SVCEPOP_BASIC_INFO B ... LEFT JOIN TB_SVCEPOP_CONFIGURATION_DATA D ON ... LEFT JOIN TB_SVCEPOP_CONFIGURATION_GROUP CG ON CG.CONFIGURATION_ID = D.ID AND CG.DEL_FLAG = &#39;0&#39; LEFT JOIN TB_SVCEPOP_CONFIGURATION_GROUP_CATALOG CGC ON CGC.CONFIGURATION_GROUP_ID = CG.ID AND CGC.IS_SELECTED = &#39;0&#39; ``` ✅ 这种写法是对的,只要你: - 加了索引 - 接受“主表一行 → 输出多行”的事实(报表类需求常见) --- ### ✅ 场景二:只想展示“第一个”或“任意一个”配置组 + 其 catalog(避免膨胀) 如果你只是要填充字段,并不想让结果膨胀成多行,那就不能直接 LEFT JOIN 多对多! #### ❌ 错误方式: ```sql LEFT JOIN CG ... LEFT JOIN CGC ... -- → 一条变成几十条,SUM 被放大,GROUP BY 成本飙升 ``` #### ✅ 正确方式:使用子查询或 LATERAL 提前聚合 / 取样 ##### ✅ 推荐改法:用 `APPLY` (SQL Server) 或 `LATERAL` (Oracle 12c+/PostgreSQL) 取第一条 Catalog 但由于你可能是 Oracle,这里提供 **Oracle 兼容写法(使用 `OUTER APPLY` 类似逻辑 + 子查询)**: ```sql LEFT JOIN ( SELECT CG.CONFIGURATION_ID, CG.GROUP_NAME, CG.ID AS CONFIG_GROUP_ID FROM TB_SVCEPOP_CONFIGURATION_GROUP CG WHERE CG.DEL_FLAG = &#39;0&#39; AND ROW_NUMBER() OVER (PARTITION BY CG.CONFIGURATION_ID ORDER BY CG.ID) = 1 ) CG ON CG.CONFIGURATION_ID = D.ID LEFT JOIN ( SELECT CGC.CONFIGURATION_GROUP_ID, CGC.CATALOG_NO, CGC.CATALOG_NAME, CGC.QTY, ROW_NUMBER() OVER (PARTITION BY CGC.CONFIGURATION_GROUP_ID ORDER BY CGC.ID) AS RN FROM TB_SVCEPOP_CONFIGURATION_GROUP_CATALOG CGC WHERE CGC.IS_SELECTED = &#39;0&#39; ) CGC ON CGC.CONFIGURATION_GROUP_ID = CG.CONFIG_GROUP_ID AND CGC.RN = 1 ``` 📌 这样可以保证: - 每个 `CONFIGURATION_ID` 最多出一条 `CG` - 每个 `CG` 最多出一条 `CGC`(第一条) - 不会导致主表记录膨胀 --- ### ✅ 场景三:你想把多个 Catalog 合并为逗号分隔字符串(如 CATALOG_NO 列拼接) 使用 `LISTAGG`: ```sql LEFT JOIN ( SELECT CG.CONFIGURATION_ID, MAX(CG.GROUP_NAME) AS GROUP_NAME FROM TB_SVCEPOP_CONFIGURATION_GROUP CG WHERE CG.DEL_FLAG = &#39;0&#39; GROUP BY CG.CONFIGURATION_ID ) CG ON CG.CONFIGURATION_ID = D.ID LEFT JOIN ( SELECT CGC.CONFIGURATION_GROUP_ID, LISTAGG(CGC.CATALOG_NO, &#39;; &#39;) WITHIN GROUP (ORDER BY CGC.ID) AS CATALOG_NAME, LISTAGG(CGC.CATALOG_NAME, &#39;; &#39;) WITHIN GROUP (ORDER BY CGC.ID) AS DESCRIPTION, SUM(CGC.QTY) AS TOTAL_QTY FROM TB_SVCEPOP_CONFIGURATION_GROUP_CATALOG CGC WHERE CGC.IS_SELECTED = &#39;0&#39; GROUP BY CGC.CONFIGURATION_GROUP_ID ) CGC_SUM ON CGC_SUM.CONFIGURATION_GROUP_ID = CG.ID ``` 然后你在 SELECT 中就可以安全地显示汇总值而不膨胀主表。 --- ## ✅ 综合推荐方案(兼顾性能与功能) 假设你的需求是: ➡️ 展示每一个被选中的 Catalog 明细(即允许主表扩展为多行) 那么你应该保留原始 JOIN 结构,但做以下优化: ### ✅ 1. 确保创建以下索引(关键!) ```sql -- 支持 D → CG 关联 CREATE INDEX IDX_CG_CFGID_DEL ON TB_SVCEPOP_CONFIGURATION_GROUP(CONFIGURATION_ID, DEL_FLAG); -- 支持 CG → CGC 关联 + 过滤 CREATE INDEX IDX_CGC_GRPSEL ON TB_SVCEPOP_CONFIGURATION_GROUP_CATALOG(CONFIGURATION_GROUP_ID, IS_SELECTED); -- 支持配置数据基础查询 CREATE INDEX IDX_CFGDATA_RECVER ON TB_SVCEPOP_CONFIGURATION_DATA(RECORD_NO, VERSION, DEL_FLAG); ``` 如果没有这些索引,每次都是全表扫描,速度必然极慢。 --- ### ✅ 2. 使用 CTE 预处理复杂部分(提高执行计划稳定性) ```sql WITH CONFIG_DETAILS AS ( SELECT D.RECORD_NO, D.VERSION, CG.GROUP_NAME, CGC.CATALOG_NO, CGC.CATALOG_NAME, CGC.QTY FROM TB_SVCEPOP_CONFIGURATION_DATA D INNER JOIN TB_SVCEPOP_CONFIGURATION_GROUP CG ON CG.CONFIGURATION_ID = D.ID AND CG.DEL_FLAG = &#39;0&#39; INNER JOIN TB_SVCEPOP_CONFIGURATION_GROUP_CATALOG CGC ON CGC.CONFIGURATION_GROUP_ID = CG.ID AND CGC.IS_SELECTED = &#39;0&#39; WHERE D.DEL_FLAG = &#39;0&#39; ), FL_START AS ( SELECT F.NO, MIN(p.START_TIME_) AS START_TIME_ FROM ACT_HI_PROCINST p JOIN TB_BUSINESS_TO_FLOW F ON F.BUSINESS_ID = p.BUSINESS_KEY_ GROUP BY F.NO ), ORDER_DATES AS ( SELECT MODULE_NO, VERSION, MAX(CASE WHEN ORDER_STATUS = &#39;ORDER_LOCKED&#39; THEN CREATE_TIME END) AS ORDER_LOCKED_DATE, MAX(CASE WHEN ORDER_STATUS = &#39;ORDER_BOOKED&#39; THEN CREATE_TIME END) AS ORDER_BOOKED_DATE FROM SYS_PROCESS_LOG WHERE RECORD_TYPE = &#39;1&#39; AND ORDER_STATUS IN (&#39;ORDER_LOCKED&#39;, &#39;ORDER_BOOKED&#39;) GROUP BY MODULE_NO, VERSION ) SELECT B.MASTER_NO AS PACKAGE_NO, C.VERSION AS HEADER_VERSION, B.QUOTE_NO AS QUOTE_NO, B.CONTRACT_NO AS CONTRACT_NO, &#39;&#39; as OC_VERSION, CASE WHEN C.CHANNEL = &#39;Direct&#39; THEN &#39;No&#39; ELSE &#39;Yes&#39; END AS INVOLVE_DEALER, C.CUSTOMER_PROVINCE_NAME AS CUSTOMER_PROVINCE, C.BUYER_TYPE_NAME AS CUSTOMER_TYPE, B.END_USER_NAME AS END_USER_NAME, B.END_USER_PROVINCE AS END_USER_PROVINCE, B.SALES_FUNCTION AS SALES_REGION, B.SVC_SALES AS SVC_SALES, B.SALES_REP_SSO AS CREATER_SSO, B.TYPE AS TYPE, B.SEGMENTATION AS SEGMENTATION, B.OFFERING AS OFFERING, B.IB_MODALITY AS IB_MODALITY, SYS.SYSTEM_ID AS SYSTEM_ID, CD.GROUP_NAME AS GROUP_NAME, &#39;&#39; AS IB_QTY, CD.CATALOG_NO AS CATALOG_NAME, CD.CATALOG_NAME AS DESCRIPTION, CD.QTY AS Qty, CD.QTY AS TOTAL_QTY, B.PL AS P_L, &#39;&#39; AS PJM_NAME, &#39;&#39; AS PJM_SSO, B.START_TIME AS START_DATE, B.END_TIME AS END_DATE, B.CONTRACT_PRICE AS CONTRACT_PRICE, B.CURRENCY AS CURRENCY, ROUND(((TO_DATE(B.end_time, &#39;YYYY-MM-DD&#39;) - TO_DATE(B.start_time, &#39;YYYY-MM-DD&#39;) +1)/365), 5) AS SERVICE_YEAR, C.PAYMENT_CYCLE AS PAYMENT_CYCLE, OD.ORDER_LOCKED_DATE, R.ORDER_STATUS, OD.ORDER_BOOKED_DATE, &#39;&#39; AS OC_STATUS, FL.START_TIME_ FROM TB_SVCEPOP_BASIC_INFO B INNER JOIN SYS_DYNAMIC_FORM_RECORD R ON B.RECORD_NO = R.MASTER_NO AND B.VERSION = R.VERSION AND R.DEL_FLAG = &#39;0&#39; INNER JOIN TB_SVCEPOP_CUSTOMER_INFO C ON C.RECORD_NO = B.MASTER_NO AND C.VERSION = B.VERSION AND C.DEL_FLAG = &#39;0&#39; LEFT JOIN CONFIG_DETAILS CD ON CD.RECORD_NO = R.MASTER_NO AND CD.VERSION = R.VERSION LEFT JOIN ( SELECT SD.RECORD_NO, SD.VERSION, SCG.SYSTEM_ID FROM TB_SVCEPOP_CONFIGURATION_DATA SD JOIN TB_SVCEPOP_CONFIGURATION_GROUP SCG ON SD.ID = SCG.CONFIGURATION_ID UNION SELECT BAISC_NO, VERSION, SYSTEM_ID_V AS SYSTEM_ID FROM TB_SVCEPOP_BASIC_SYSTEM WHERE DEL_FLAG = &#39;0&#39; ) SYS ON B.RECORD_NO = SYS.RECORD_NO AND B.VERSION = SYS.VERSION JOIN FL_START FL ON FL.NO = B.QUOTE_NO LEFT JOIN ORDER_DATES OD ON OD.MODULE_NO = R.MASTER_NO AND OD.VERSION = R.VERSION WHERE R.RECORD_STATUS != &#39;DRAFT&#39;; ``` --- ## ✅ 小结:你现在应该怎么做? | 动作 | 建议 | |------|------| | ✅ 添加索引 | `IDX_CG_CFGID_DEL`, `IDX_CGC_GRPSEL` 等 | | ✅ 使用 CTE 分解逻辑 | 提升可读性和执行效率 | | ✅ 明确是否接受“一行变多行” | 决定是否需要聚合或限流 | | ✅ 如果不需要明细,则用子查询取单条或聚合 | 避免结果膨胀 | | ✅ 定期分析统计信息 | `ANALYZE TABLE` 或由 DBA 维护 | ---
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值