1.启动gpfdist服务:相关参数
gpfdist -d /export/gpdata/gpfdist/ -p 8001 -l /home/gpadmin/gpAdminLogs/gpfdist.log &
-d:存放外部表的目录
-p:端口号
-l:日志文件
gpfdist服务的验证:使用jobs命令,验证结果如下:
[gpadmin@kt_bigdata2 qiyf]$ jobs
[2]+ Running gpfdist -d /export/gpdata/gpfdist/ -p 8001 -l /home/gpadmin/gpAdminLogs/gpfdist.log & (wd: /export/gpdata/gpfdist)
2.把文件上传到gpfdist所在服务器对应的目录:
可用wget或者scp等多种方式
3.创建外部表:
create external table e2
(
id serial, #(创建外部表时,此列没有意义)
fts timestamp,
day_id int,
fmacid varchar(32),
fcategory varchar(32),
fname varchar(32),
fcnt int,
ftime smallint,
f202001 decimal(8,2),
f202003 decimal(8,2),
f202005 decimal(8,2),
f202007 decimal(8,2),
f202009 decimal(8,2),
f20200j decimal(8,2),
f20200l decimal(8,2),
f20200n varchar(20),
f20200o varchar(20),
f20200d varchar(20),
f20200e varchar(20),
f20200f varchar(20),
f20200g varchar(20),
f20200h varchar(20),
f20200i varchar(20),
f20200jj varchar(20),
f2000zt varchar(20),
f2000zu varchar(20),
f2000zv varchar(20),
f2000zw varchar(20),
f2000zx varchar(20),
f2000zy varchar(20),
f2000zz varchar(20),
f602001 varchar(20),
f602002 varchar(20),
f602003 varchar(20),
f602004 varchar(20),
f602007 varchar(20),
f602008 varchar(20),
f602009 varchar(20),
f6020zv varchar(20)
) location ('gpfdist://10.159.161.33:8001/e2.txt') format 'text' (DELIMITER ',');
4.把外部表中的数据导入到内部表:
insert into XXXXXXX(fts,day_id,fmacid,fcategory,fname,fcnt,ftime,f202001,f202003,f202005,f202007,f202009,f20200j,f20200l,f20200n,f20200o,f20200d,f20200e,f20200f,f20200g,f20200h,f20200i,f20200jj,f2000zt,f2000zu,f2000zv,f2000zw,f2000zx,f2000zy,f2000zz,f602001,f602002,f602003,f602004,f602007,f602008,f602009,f6020zv)
select fts,day_id,fmacid,fcategory,fname,fcnt,ftime,f202001,f202003,f202005,f202007,f202009,f20200j,f20200l,f20200n,f20200o,f20200d,f20200e,f20200f,f20200g,f20200h,f20200i,f20200jj,f2000zt,f2000zu,f2000zv,f2000zw,f2000zx,f2000zy,f2000zz,f602001,f602002,f602003,f602004,f602007,f602008,f602009,f6020zv
from e2;