mybatis的分页插件连接hive数据库报错分析

原创已于 2025-08-26 11:19:11 修改 · 1.7k 阅读

0 ·

CC 4.0 BY-SA版权

文章标签：

#Java #pagehelper

于 2019-07-28 17:45:46 首次发布

java 专栏收录该内容

19 篇文章

订阅专栏

MyBatis分页插件连接Hive数据库报错分析指南

概述

MyBatis分页插件（PageHelper）是一个常用的分页工具，但在连接Hive数据库时经常会遇到各种报错问题。Hive作为基于Hadoop的数据仓库，其SQL语法和特性与关系型数据库存在差异，这导致分页插件在Hive环境下可能出现兼容性问题。

常见报错类型

1. SQL语法错误

1.1 LIMIT语法不支持

-- 错误示例：Hive不支持LIMIT语法
SELECT * FROM users LIMIT 10 OFFSET 20;

-- 错误信息
Error: Error while compiling statement: FAILED: ParseException line 1:31 
mismatched input 'OFFSET' expecting EOF near '10'

1.2 子查询语法错误

-- 错误示例：Hive子查询语法限制
SELECT * FROM (
    SELECT ROW_NUMBER() OVER() as rn, * FROM users
) t WHERE rn BETWEEN 1 AND 10;

-- 错误信息
Error: Error while compiling statement: FAILED: ParseException line 1:1 
mismatched input 'SELECT' expecting 'EOF'

1.3 窗口函数不支持

-- 错误示例：Hive版本不支持窗口函数
SELECT ROW_NUMBER() OVER(ORDER BY id) as rn, * FROM users;

-- 错误信息
Error: Error while compiling statement: FAILED: ParseException line 1:1 
mismatched input 'OVER' expecting 'EOF'

2. 数据类型错误

2.1 分页参数类型不匹配

// 错误示例：分页参数类型错误
PageHelper.startPage(1, 10); // 参数类型不匹配
List<User> users = userMapper.selectUsers();

// 错误信息
java.lang.ClassCastException: java.lang.Integer cannot be cast to java.lang.Long

2.2 Hive数据类型转换错误

-- 错误示例：数据类型转换问题
SELECT CAST(id AS BIGINT) as user_id FROM users;

-- 错误信息
Error: Error while compiling statement: FAILED: SemanticException 
Invalid type cast from INT to BIGINT

3. 连接配置错误

3.1 驱动类配置错误

<!-- 错误示例：驱动类配置错误 -->
<property name="driverClassName" value="org.apache.hive.jdbc.HiveDriver"/>
<property name="url" value="jdbc:hive2://localhost:10000/default"/>

<!-- 错误信息 -->
java.lang.ClassNotFoundException: org.apache.hive.jdbc.HiveDriver

3.2 连接参数配置错误

# 错误示例：连接参数配置错误
hive.server2.proxy.user=admin
hive.server2.authentication=KERBEROS
hive.server2.kerberos.principal=hive/_HOST@REALM

# 错误信息
Authentication failed: GSS initiate failed

4. 分页插件配置错误

4.1 插件配置不兼容

<!-- 错误示例：分页插件配置不兼容 -->
<plugin interceptor="com.github.pagehelper.PageInterceptor">
    <property name="helperDialect" value="mysql"/>
    <property name="reasonable" value="true"/>
    <property name="supportMethodsArguments" value="true"/>
</plugin>

<!-- 错误信息 -->
java.lang.RuntimeException: Unsupported database type: hive

4.2 方言配置错误

# 错误示例：方言配置错误
pagehelper.helper-dialect=hive
pagehelper.reasonable=true
pagehelper.support-methods-arguments=true

# 错误信息
java.lang.IllegalArgumentException: Unsupported database type: hive

错误原因分析

1. 技术架构差异

1.1 SQL标准差异

-- Hive SQL特点
1. 不支持OFFSET语法
2. 子查询语法限制
3. 窗口函数支持有限
4. 不支持某些标准SQL特性

-- 传统关系型数据库
1. 完整支持LIMIT OFFSET
2. 支持复杂子查询
3. 完整窗口函数支持
4. 标准SQL兼容性好

1.2 执行引擎差异

// Hive执行特点
1. 基于MapReduce或Tez
2. 延迟执行（Lazy Evaluation）
3. 不支持事务
4. 查询优化有限

// 传统数据库执行特点
1. 基于B+树索引
2. 即时执行
3. 支持ACID事务
4. 查询优化器强大

2. 分页插件兼容性

2.1 数据库方言支持

// PageHelper支持的数据库方言
public enum Dialect {
    mysql, mariadb, oracle, db2, postgresql, 
    sqlserver, sqlite, hsqldb, informix, 
    h2, phoenix, derby, kingbasees, 
    mongo, sysbase, db2_390, clickhouse
}

// 注意：没有hive方言支持

2.2 SQL重写机制

// PageHelper SQL重写过程
1. 解析原始SQL
2. 识别数据库类型
3. 根据方言重写SQL
4. 添加分页逻辑

// Hive环境下的问题
1. 无法识别Hive方言
2. SQL重写失败
3. 分页逻辑无法添加

3. 版本兼容性问题

3.1 Hive版本差异

# Hive版本差异
Hive 1.x: 基础功能，SQL支持有限
Hive 2.x: 改进的SQL支持，部分窗口函数
Hive 3.x: 更好的SQL兼容性，LLAP支持

# 分页插件版本差异
PageHelper 4.x: 基础分页功能
PageHelper 5.x: 增强的分页功能，更好的兼容性
PageHelper 6.x: 最新版本，支持更多数据库

3.2 JDBC驱动版本

<!-- JDBC驱动版本差异 -->
<dependency>
    <groupId>org.apache.hive</groupId>
    <artifactId>hive-jdbc</artifactId>
    <version>2.3.9</version> <!-- 旧版本 -->
</dependency>

<dependency>
    <groupId>org.apache.hive</groupId>
    <artifactId>hive-jdbc</artifactId>
    <version>3.1.3</version> <!-- 新版本 -->
</dependency>

解决方案

1. 自定义分页实现

1.1 手动分页实现

// 自定义分页实现
public class HivePaginationHelper {
  
    /**
     * 手动分页查询
     */
    public static <T> PageInfo<T> getPageInfo(List<T> allData, int pageNum, int pageSize) {
        PageInfo<T> pageInfo = new PageInfo<>();
      
        int total = allData.size();
        int startIndex = (pageNum - 1) * pageSize;
        int endIndex = Math.min(startIndex + pageSize, total);
      
        if (startIndex < total) {
            List<T> pageData = allData.subList(startIndex, endIndex);
            pageInfo.setList(pageData);
        } else {
            pageInfo.setList(new ArrayList<>());
        }
      
        pageInfo.setTotal(total);
        pageInfo.setPageNum(pageNum);
        pageInfo.setPageSize(pageSize);
        pageInfo.setPages((total + pageSize - 1) / pageSize);
      
        return pageInfo;
    }
  
    /**
     * 使用ROW_NUMBER()实现分页
     */
    public static String buildPaginationSQL(String originalSQL, int pageNum, int pageSize) {
        StringBuilder sql = new StringBuilder();
        sql.append("SELECT * FROM (");
        sql.append("SELECT ROW_NUMBER() OVER() as rn, t.* FROM (");
        sql.append(originalSQL);
        sql.append(") t");
        sql.append(") paginated WHERE rn BETWEEN ");
        sql.append((pageNum - 1) * pageSize + 1);
        sql.append(" AND ");
        sql.append(pageNum * pageSize);
      
        return sql.toString();
    }
}

1.2 使用RowBounds分页

// 使用RowBounds进行分页
@Service
public class UserService {
  
    @Autowired
    private UserMapper userMapper;
  
    public List<User> getUsersByPage(int pageNum, int pageSize) {
        int offset = (pageNum - 1) * pageSize;
        RowBounds rowBounds = new RowBounds(offset, pageSize);
      
        // 注意：Hive可能不支持RowBounds
        return userMapper.selectUsers(rowBounds);
    }
  
    public PageInfo<User> getUsersPageInfo(int pageNum, int pageSize) {
        // 先查询总数
        int total = userMapper.countUsers();
      
        // 再查询分页数据
        List<User> users = getUsersByPage(pageNum, pageSize);
      
        // 手动构建PageInfo
        return HivePaginationHelper.getPageInfo(users, pageNum, pageSize);
    }
}

2. 配置优化

2.1 禁用PageHelper插件

<!-- 在Hive环境下禁用PageHelper插件 -->
<!-- 
<plugin interceptor="com.github.pagehelper.PageInterceptor">
    <property name="helperDialect" value="mysql"/>
    <property name="reasonable" value="true"/>
    <property name="supportMethodsArguments" value="true"/>
</plugin>
-->

2.2 自定义分页拦截器

// 自定义分页拦截器
@Intercepts({
    @Signature(type = Executor.class, method = "query", 
               args = {MappedStatement.class, Object.class, RowBounds.class, ResultHandler.class})
})
public class HivePaginationInterceptor implements Interceptor {
  
    @Override
    public Object intercept(Invocation invocation) throws Throwable {
        Object[] args = invocation.getArgs();
        MappedStatement ms = (MappedStatement) args[0];
        Object parameter = args[1];
        RowBounds rowBounds = (RowBounds) args[2];
      
        // 检查是否需要分页
        if (rowBounds != RowBounds.DEFAULT) {
            // 获取原始SQL
            BoundSql boundSql = ms.getBoundSql(parameter);
            String originalSQL = boundSql.getSql();
          
            // 构建分页SQL
            String paginationSQL = buildHivePaginationSQL(originalSQL, rowBounds);
          
            // 创建新的BoundSql
            BoundSql newBoundSql = new BoundSql(ms.getConfiguration(), paginationSQL, 
                                               boundSql.getParameterMappings(), parameter);
          
            // 创建新的MappedStatement
            MappedStatement newMs = copyFromMappedStatement(ms, newBoundSql);
            args[0] = newMs;
        }
      
        return invocation.proceed();
    }
  
    private String buildHivePaginationSQL(String originalSQL, RowBounds rowBounds) {
        StringBuilder sql = new StringBuilder();
        sql.append("SELECT * FROM (");
        sql.append("SELECT ROW_NUMBER() OVER() as rn, t.* FROM (");
        sql.append(originalSQL);
        sql.append(") t");
        sql.append(") paginated WHERE rn BETWEEN ");
        sql.append(rowBounds.getOffset() + 1);
        sql.append(" AND ");
        sql.append(rowBounds.getOffset() + rowBounds.getLimit());
      
        return sql.toString();
    }
  
    private MappedStatement copyFromMappedStatement(MappedStatement ms, BoundSql newBoundSql) {
        MappedStatement.Builder builder = new MappedStatement.Builder(ms.getConfiguration(), 
                                                                    ms.getId(), ms.getSqlSource(), 
                                                                    ms.getSqlCommandType());
        builder.resource(ms.getResource());
        builder.fetchSize(ms.getFetchSize());
        builder.statementType(ms.getStatementType());
        builder.keyGenerator(ms.getKeyGenerator());
        builder.timeout(ms.getTimeout());
        builder.parameterMap(ms.getParameterMap());
        builder.resultMaps(ms.getResultMaps());
        builder.resultSetType(ms.getResultSetType());
        builder.cache(ms.getCache());
        builder.flushCacheRequired(ms.isFlushCacheRequired());
        builder.useCache(ms.isUseCache());
      
        return builder.build();
    }
}

3. 数据库连接优化

3.1 正确的Hive连接配置

<!-- 正确的Hive连接配置 -->
<bean id="hiveDataSource" class="com.alibaba.druid.pool.DruidDataSource">
    <property name="driverClassName" value="org.apache.hive.jdbc.HiveDriver"/>
    <property name="url" value="jdbc:hive2://localhost:10000/default"/>
    <property name="username" value="hive"/>
    <property name="password" value=""/>
  
    <!-- 连接池配置 -->
    <property name="initialSize" value="5"/>
    <property name="minIdle" value="5"/>
    <property name="maxActive" value="20"/>
    <property name="maxWait" value="60000"/>
  
    <!-- Hive特定配置 -->
    <property name="connectionProperties" value="hive.server2.proxy.user=admin"/>
</bean>

3.2 Hive连接参数优化

# Hive连接参数优化
hive.server2.proxy.user=admin
hive.server2.authentication=NONE
hive.server2.transport.mode=binary
hive.server2.thrift.min.worker.threads=5
hive.server2.thrift.max.worker.threads=500
hive.server2.thrift.port=10000
hive.server2.thrift.bind.host=localhost

配置优化

1. MyBatis配置优化

1.1 禁用自动分页

<!-- MyBatis配置优化 -->
<configuration>
    <!-- 禁用自动分页 -->
    <settings>
        <setting name="defaultExecutorType" value="SIMPLE"/>
        <setting name="defaultStatementTimeout" value="300"/>
        <setting name="mapUnderscoreToCamelCase" value="true"/>
    </settings>
  
    <!-- 使用自定义分页拦截器 -->
    <plugins>
        <plugin interceptor="com.example.interceptor.HivePaginationInterceptor"/>
    </plugins>
</configuration>

1.2 优化SQL执行

<!-- 优化SQL执行配置 -->
<mapper namespace="com.example.mapper.UserMapper">
    <!-- 使用fetchSize优化大数据量查询 -->
    <select id="selectUsers" resultType="com.example.entity.User" fetchSize="1000">
        SELECT * FROM users
    </select>
  
    <!-- 分页查询 -->
    <select id="selectUsersByPage" resultType="com.example.entity.User">
        SELECT * FROM (
            SELECT ROW_NUMBER() OVER() as rn, t.* FROM (
                SELECT * FROM users
            ) t
        ) paginated 
        WHERE rn BETWEEN #{startRow} AND #{endRow}
    </select>
</mapper>

2. 应用层配置优化

2.1 分页参数配置

// 分页参数配置
@Configuration
public class PaginationConfig {
  
    @Bean
    public PaginationProperties paginationProperties() {
        PaginationProperties properties = new PaginationProperties();
        properties.setDefaultPageSize(20);
        properties.setMaxPageSize(1000);
        properties.setEnableCountQuery(true);
        return properties;
    }
}

2.2 缓存配置

// 缓存配置优化
@Configuration
@EnableCaching
public class CacheConfig {
  
    @Bean
    public CacheManager cacheManager() {
        RedisCacheManager cacheManager = RedisCacheManager.builder(redisConnectionFactory())
            .cacheDefaults(defaultConfig())
            .build();
      
        return cacheManager;
    }
  
    private RedisCacheConfiguration defaultConfig() {
        return RedisCacheConfiguration.defaultCacheConfig()
            .entryTtl(Duration.ofMinutes(30))
            .serializeKeysWith(RedisSerializationContext.SerializationPair.fromSerializer(new StringRedisSerializer()))
            .serializeValuesWith(RedisSerializationContext.SerializationPair.fromSerializer(new GenericJackson2JsonRedisSerializer()));
    }
}

代码示例

1. 基础分页实现

1.1 分页查询接口

// 分页查询接口
public interface UserMapper {
  
    /**
     * 查询用户总数
     */
    int countUsers();
  
    /**
     * 分页查询用户
     */
    List<User> selectUsersByPage(@Param("startRow") int startRow, 
                                @Param("endRow") int endRow);
  
    /**
     * 查询所有用户（用于手动分页）
     */
    List<User> selectAllUsers();
}

1.2 分页查询实现

// 分页查询实现
@Repository
public class UserMapperImpl implements UserMapper {
  
    @Autowired
    private SqlSessionTemplate sqlSessionTemplate;
  
    @Override
    public int countUsers() {
        return sqlSessionTemplate.selectOne("UserMapper.countUsers");
    }
  
    @Override
    public List<User> selectUsersByPage(@Param("startRow") int startRow, 
                                       @Param("endRow") int endRow) {
        Map<String, Object> params = new HashMap<>();
        params.put("startRow", startRow);
        params.put("endRow", endRow);
      
        return sqlSessionTemplate.selectList("UserMapper.selectUsersByPage", params);
    }
  
    @Override
    public List<User> selectAllUsers() {
        return sqlSessionTemplate.selectList("UserMapper.selectAllUsers");
    }
}

1.3 分页服务实现

// 分页服务实现
@Service
public class UserService {
  
    @Autowired
    private UserMapper userMapper;
  
    /**
     * 获取分页用户信息
     */
    public PageInfo<User> getUsersByPage(int pageNum, int pageSize) {
        // 计算分页参数
        int startRow = (pageNum - 1) * pageSize + 1;
        int endRow = pageNum * pageSize;
      
        // 查询总数
        int total = userMapper.countUsers();
      
        // 查询分页数据
        List<User> users = userMapper.selectUsersByPage(startRow, endRow);
      
        // 构建分页信息
        PageInfo<User> pageInfo = new PageInfo<>();
        pageInfo.setList(users);
        pageInfo.setTotal(total);
        pageInfo.setPageNum(pageNum);
        pageInfo.setPageSize(pageSize);
        pageInfo.setPages((total + pageSize - 1) / pageSize);
      
        return pageInfo;
    }
  
    /**
     * 手动分页实现
     */
    public PageInfo<User> getUsersByPageManual(int pageNum, int pageSize) {
        // 查询所有数据
        List<User> allUsers = userMapper.selectAllUsers();
      
        // 手动分页
        return HivePaginationHelper.getPageInfo(allUsers, pageNum, pageSize);
    }
}

2. 高级分页功能

2.1 条件分页查询

// 条件分页查询
public interface UserMapper {
  
    /**
     * 条件分页查询用户
     */
    List<User> selectUsersByCondition(@Param("condition") UserCondition condition,
                                    @Param("startRow") int startRow,
                                    @Param("endRow") int endRow);
  
    /**
     * 条件查询用户总数
     */
    int countUsersByCondition(@Param("condition") UserCondition condition);
}

// 用户查询条件
public class UserCondition {
    private String name;
    private String email;
    private String status;
    private Date startDate;
    private Date endDate;
  
    // getter和setter方法
}

2.2 动态SQL构建

<!-- 动态SQL构建 -->
<select id="selectUsersByCondition" resultType="com.example.entity.User">
    SELECT * FROM (
        SELECT ROW_NUMBER() OVER() as rn, t.* FROM (
            SELECT * FROM users
            <where>
                <if test="condition.name != null and condition.name != ''">
                    AND name LIKE CONCAT('%', #{condition.name}, '%')
                </if>
                <if test="condition.email != null and condition.email != ''">
                    AND email LIKE CONCAT('%', #{condition.email}, '%')
                </if>
                <if test="condition.status != null and condition.status != ''">
                    AND status = #{condition.status}
                </if>
                <if test="condition.startDate != null">
                    AND created_at >= #{condition.startDate}
                </if>
                <if test="condition.endDate != null">
                    AND created_at <= #{condition.endDate}
                </if>
            </where>
        ) t
    ) paginated 
    WHERE rn BETWEEN #{startRow} AND #{endRow}
</select>

<select id="countUsersByCondition" resultType="int">
    SELECT COUNT(*) FROM users
    <where>
        <if test="condition.name != null and condition.name != ''">
            AND name LIKE CONCAT('%', #{condition.name}, '%')
        </if>
        <if test="condition.email != null and condition.email != ''">
            AND email LIKE CONCAT('%', #{condition.email}, '%')
        </if>
        <if test="condition.status != null and condition.status != ''">
            AND status = #{condition.status}
        </if>
        <if test="condition.startDate != null">
            AND created_at >= #{condition.startDate}
        </if>
        <if test="condition.endDate != null">
            AND created_at <= #{condition.endDate}
        </if>
    </where>
</select>

最佳实践

1. 分页策略选择

1.1 小数据量分页

// 小数据量分页策略
public class SmallDataPaginationStrategy implements PaginationStrategy {
  
    @Override
    public PageInfo<User> paginate(List<User> allData, int pageNum, int pageSize) {
        // 直接使用内存分页
        return HivePaginationHelper.getPageInfo(allData, pageNum, pageSize);
    }
}

1.2 大数据量分页

// 大数据量分页策略
public class LargeDataPaginationStrategy implements PaginationStrategy {
  
    @Override
    public PageInfo<User> paginate(List<User> allData, int pageNum, int pageSize) {
        // 使用数据库分页
        int startRow = (pageNum - 1) * pageSize + 1;
        int endRow = pageNum * pageSize;
      
        List<User> pageData = userMapper.selectUsersByPage(startRow, endRow);
        int total = userMapper.countUsers();
      
        return buildPageInfo(pageData, total, pageNum, pageSize);
    }
}

2. 性能优化策略

2.1 查询优化

// 查询优化策略
@Service
public class OptimizedUserService {
  
    @Autowired
    private UserMapper userMapper;
  
    /**
     * 优化后的分页查询
     */
    public PageInfo<User> getUsersOptimized(int pageNum, int pageSize) {
        // 1. 使用缓存减少重复查询
        String cacheKey = "users:page:" + pageNum + ":" + pageSize;
        PageInfo<User> cachedResult = cacheManager.get(cacheKey);
        if (cachedResult != null) {
            return cachedResult;
        }
      
        // 2. 并行查询总数和分页数据
        CompletableFuture<Integer> totalFuture = CompletableFuture.supplyAsync(() -> 
            userMapper.countUsers());
      
        CompletableFuture<List<User>> dataFuture = CompletableFuture.supplyAsync(() -> {
            int startRow = (pageNum - 1) * pageSize + 1;
            int endRow = pageNum * pageSize;
            return userMapper.selectUsersByPage(startRow, endRow);
        });
      
        // 3. 等待结果并构建分页信息
        try {
            int total = totalFuture.get(10, TimeUnit.SECONDS);
            List<User> users = dataFuture.get(10, TimeUnit.SECONDS);
          
            PageInfo<User> pageInfo = buildPageInfo(users, total, pageNum, pageSize);
          
            // 4. 缓存结果
            cacheManager.put(cacheKey, pageInfo, Duration.ofMinutes(5));
          
            return pageInfo;
        } catch (Exception e) {
            throw new RuntimeException("分页查询失败", e);
        }
    }
}

2.2 索引优化

-- Hive索引优化
-- 1. 创建分区表
CREATE TABLE users (
    id INT,
    name STRING,
    email STRING,
    status STRING,
    created_at TIMESTAMP
) PARTITIONED BY (year INT, month INT, day INT);

-- 2. 使用分区查询
SELECT * FROM users 
WHERE year = 2024 AND month = 1 AND day = 15
ORDER BY created_at DESC;

-- 3. 使用ORC格式存储
CREATE TABLE users_orc (
    id INT,
    name STRING,
    email STRING,
    status STRING,
    created_at TIMESTAMP
) STORED AS ORC;

3. 错误处理策略

3.1 异常处理

// 异常处理策略
@ControllerAdvice
public class GlobalExceptionHandler {
  
    /**
     * 分页查询异常处理
     */
    @ExceptionHandler(PaginationException.class)
    public ResponseEntity<ErrorResponse> handlePaginationException(PaginationException e) {
        ErrorResponse error = new ErrorResponse();
        error.setCode("PAGINATION_ERROR");
        error.setMessage("分页查询失败: " + e.getMessage());
        error.setTimestamp(new Date());
      
        return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(error);
    }
  
    /**
     * Hive连接异常处理
     */
    @ExceptionHandler(HiveConnectionException.class)
    public ResponseEntity<ErrorResponse> handleHiveConnectionException(HiveConnectionException e) {
        ErrorResponse error = new ErrorResponse();
        error.setCode("HIVE_CONNECTION_ERROR");
        error.setMessage("Hive连接失败: " + e.getMessage());
        error.setTimestamp(new Date());
      
        return ResponseEntity.status(HttpStatus.SERVICE_UNAVAILABLE).body(error);
    }
}

3.2 降级策略

// 降级策略实现
@Service
public class UserServiceWithFallback {
  
    @Autowired
    private UserMapper userMapper;
  
    /**
     * 带降级的分页查询
     */
    public PageInfo<User> getUsersWithFallback(int pageNum, int pageSize) {
        try {
            // 尝试正常分页查询
            return getUsersByPage(pageNum, pageSize);
        } catch (Exception e) {
            log.warn("分页查询失败，使用降级策略", e);
          
            // 降级到手动分页
            return getUsersByPageManual(pageNum, pageSize);
        }
    }
  
    /**
     * 手动分页降级
     */
    private PageInfo<User> getUsersByPageManual(int pageNum, int pageSize) {
        try {
            // 查询所有数据
            List<User> allUsers = userMapper.selectAllUsers();
          
            // 手动分页
            return HivePaginationHelper.getPageInfo(allUsers, pageNum, pageSize);
        } catch (Exception e) {
            log.error("手动分页也失败", e);
          
            // 返回空结果
            PageInfo<User> emptyPage = new PageInfo<>();
            emptyPage.setList(new ArrayList<>());
            emptyPage.setTotal(0);
            emptyPage.setPageNum(pageNum);
            emptyPage.setPageSize(pageSize);
            emptyPage.setPages(0);
          
            return emptyPage;
        }
    }
}

故障排查

1. 常见问题排查

1.1 连接问题排查

#!/bin/bash
# Hive连接问题排查脚本

echo "=== Hive连接问题排查 ==="

# 1. 检查Hive服务状态
echo "1. 检查Hive服务状态:"
jps | grep -E "(HiveServer2|HiveMetaStore)"

# 2. 检查端口监听
echo -e "\n2. 检查端口监听:"
netstat -tlnp | grep 10000

# 3. 测试Hive连接
echo -e "\n3. 测试Hive连接:"
beeline -u "jdbc:hive2://localhost:10000/default" -n hive -p "" -e "SHOW TABLES;" 2>/dev/null || echo "连接失败"

# 4. 检查日志
echo -e "\n4. 检查Hive日志:"
tail -20 /var/log/hive/hiveserver2.log | grep -i error

# 5. 检查网络连通性
echo -e "\n5. 检查网络连通性:"
ping -c 3 localhost
telnet localhost 10000

1.2 分页问题排查

// 分页问题排查工具
@Component
public class PaginationDebugger {
  
    private static final Logger log = LoggerFactory.getLogger(PaginationDebugger.class);
  
    /**
     * 调试分页查询
     */
    public void debugPagination(String sql, int pageNum, int pageSize) {
        log.info("=== 分页查询调试 ===");
        log.info("原始SQL: {}", sql);
        log.info("页码: {}, 页大小: {}", pageNum, pageSize);
      
        try {
            // 1. 检查SQL语法
            validateSQL(sql);
          
            // 2. 构建分页SQL
            String paginationSQL = buildPaginationSQL(sql, pageNum, pageSize);
            log.info("分页SQL: {}", paginationSQL);
          
            // 3. 执行查询
            executeQuery(paginationSQL);
          
        } catch (Exception e) {
            log.error("分页查询调试失败", e);
            handlePaginationError(e);
        }
    }
  
    /**
     * 验证SQL语法
     */
    private void validateSQL(String sql) {
        // 检查Hive支持的语法
        if (sql.contains("OFFSET")) {
            throw new IllegalArgumentException("Hive不支持OFFSET语法");
        }
      
        if (sql.contains("LIMIT") && !sql.contains("ROW_NUMBER()")) {
            throw new IllegalArgumentException("Hive的LIMIT语法有限制");
        }
    }
  
    /**
     * 构建分页SQL
     */
    private String buildPaginationSQL(String sql, int pageNum, int pageSize) {
        StringBuilder paginationSQL = new StringBuilder();
        paginationSQL.append("SELECT * FROM (");
        paginationSQL.append("SELECT ROW_NUMBER() OVER() as rn, t.* FROM (");
        paginationSQL.append(sql);
        paginationSQL.append(") t");
        paginationSQL.append(") paginated WHERE rn BETWEEN ");
        paginationSQL.append((pageNum - 1) * pageSize + 1);
        paginationSQL.append(" AND ");
        paginationSQL.append(pageNum * pageSize);
      
        return paginationSQL.toString();
    }
  
    /**
     * 执行查询
     */
    private void executeQuery(String sql) {
        log.info("执行查询: {}", sql);
        // 这里可以添加实际的查询执行逻辑
    }
  
    /**
     * 处理分页错误
     */
    private void handlePaginationError(Exception e) {
        if (e instanceof IllegalArgumentException) {
            log.error("SQL语法错误: {}", e.getMessage());
        } else if (e instanceof SQLException) {
            log.error("数据库执行错误: {}", e.getMessage());
        } else {
            log.error("未知错误: {}", e.getMessage());
        }
    }
}

2. 性能问题排查

2.1 查询性能分析

// 查询性能分析工具
@Component
public class QueryPerformanceAnalyzer {
  
    private static final Logger log = LoggerFactory.getLogger(QueryPerformanceAnalyzer.class);
  
    /**
     * 分析查询性能
     */
    public QueryPerformanceResult analyzeQueryPerformance(String sql, int pageNum, int pageSize) {
        QueryPerformanceResult result = new QueryPerformanceResult();
        result.setSql(sql);
        result.setPageNum(pageNum);
        result.setPageSize(pageSize);
      
        long startTime = System.currentTimeMillis();
      
        try {
            // 1. 执行查询
            List<User> users = executeQuery(sql);
            result.setResultCount(users.size());
          
            // 2. 记录执行时间
            long executionTime = System.currentTimeMillis() - startTime;
            result.setExecutionTime(executionTime);
          
            // 3. 分析性能
            analyzePerformance(result, users);
          
        } catch (Exception e) {
            result.setError(e.getMessage());
            log.error("查询性能分析失败", e);
        }
      
        return result;
    }
  
    /**
     * 分析性能指标
     */
    private void analyzePerformance(QueryPerformanceResult result, List<User> users) {
        // 1. 内存使用分析
        long memoryUsage = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory();
        result.setMemoryUsage(memoryUsage);
      
        // 2. 数据量分析
        if (users.size() > 1000) {
            result.addWarning("查询结果数据量较大，建议优化分页策略");
        }
      
        // 3. 执行时间分析
        if (result.getExecutionTime() > 5000) {
            result.addWarning("查询执行时间较长，建议优化SQL或添加索引");
        }
    }
}

// 查询性能结果
public class QueryPerformanceResult {
    private String sql;
    private int pageNum;
    private int pageSize;
    private int resultCount;
    private long executionTime;
    private long memoryUsage;
    private String error;
    private List<String> warnings = new ArrayList<>();
  
    // getter和setter方法
  
    public void addWarning(String warning) {
        warnings.add(warning);
    }
}

性能优化

1. 查询优化

1.1 SQL优化策略

-- Hive SQL优化策略

-- 1. 使用分区查询
SELECT * FROM users 
WHERE year = 2024 AND month = 1 
ORDER BY created_at DESC;

-- 2. 使用列式存储
CREATE TABLE users_optimized (
    id INT,
    name STRING,
    email STRING,
    status STRING,
    created_at TIMESTAMP
) STORED AS ORC 
TBLPROPERTIES ("orc.compress"="SNAPPY");

-- 3. 使用分桶表
CREATE TABLE users_bucketed (
    id INT,
    name STRING,
    email STRING,
    status STRING,
    created_at TIMESTAMP
) CLUSTERED BY (id) INTO 32 BUCKETS
STORED AS ORC;

-- 4. 优化JOIN查询
SELECT /*+ MAPJOIN(u) */ u.name, o.order_id
FROM users u JOIN orders o ON u.id = o.user_id
WHERE u.status = 'active';

1.2 索引优化

-- Hive索引优化

-- 1. 创建索引
CREATE INDEX user_status_idx ON TABLE users (status) 
AS 'org.apache.hadoop.hive.ql.index.compact.CompactIndexHandler'
WITH DEFERRED REBUILD;

-- 2. 重建索引
ALTER INDEX user_status_idx ON users REBUILD;

-- 3. 使用索引
SET hive.optimize.index.filter=true;
SELECT * FROM users WHERE status = 'active';

2. 缓存优化

2.1 查询结果缓存

// 查询结果缓存
@Service
public class CachedUserService {
  
    @Autowired
    private UserMapper userMapper;
  
    @Autowired
    private CacheManager cacheManager;
  
    /**
     * 带缓存的分页查询
     */
    @Cacheable(value = "users", key = "#pageNum + '_' + #pageSize")
    public PageInfo<User> getUsersWithCache(int pageNum, int pageSize) {
        return getUsersByPage(pageNum, pageSize);
    }
  
    /**
     * 清除缓存
     */
    @CacheEvict(value = "users", allEntries = true)
    public void clearCache() {
        log.info("清除用户查询缓存");
    }
  
    /**
     * 更新缓存
     */
    @CachePut(value = "users", key = "#pageNum + '_' + #pageSize")
    public PageInfo<User> updateCache(int pageNum, int pageSize) {
        return getUsersByPage(pageNum, pageSize);
    }
}

2.2 连接池优化

<!-- 连接池优化配置 -->
<bean id="hiveDataSource" class="com.alibaba.druid.pool.DruidDataSource">
    <property name="driverClassName" value="org.apache.hive.jdbc.HiveDriver"/>
    <property name="url" value="jdbc:hive2://localhost:10000/default"/>
    <property name="username" value="hive"/>
    <property name="password" value=""/>
  
    <!-- 连接池优化参数 -->
    <property name="initialSize" value="10"/>
    <property name="minIdle" value="10"/>
    <property name="maxActive" value="50"/>
    <property name="maxWait" value="60000"/>
    <property name="timeBetweenEvictionRunsMillis" value="60000"/>
    <property name="minEvictableIdleTimeMillis" value="300000"/>
    <property name="validationQuery" value="SELECT 1"/>
    <property name="testWhileIdle" value="true"/>
    <property name="testOnBorrow" value="false"/>
    <property name="testOnReturn" value="false"/>
  
    <!-- 连接池监控 -->
    <property name="filters" value="stat"/>
    <property name="connectionProperties" value="druid.stat.mergeSql=true;druid.stat.slowSqlMillis=5000"/>
</bean>