一、概述
在数据处理过程中,分页是一个常见的需求,尤其是在处理大数据集时。Kettle(Pentaho Data Integration)提供了强大的数据集成能力,通过开发自定义的分页组件,可以实现对数据的高效抽取和管理。本文将介绍如何开发一个Kettle分页组件,包括主要的元数据类、数据类、处理类和对话框类的实现。
二、组件结构
一个完整的Kettle分页组件通常包括以下几个部分:
-
元数据类(SplitPageMeta):定义组件的配置参数和字段信息。
-
数据类(SplitPageData):保存组件在运行时的数据和状态信息。
-
处理类(SplitPage):实现组件的具体处理逻辑。
-
对话框类(SplitPageDialog):提供组件的图形化配置界面。
三、组件实现
1. 元数据类(SplitPageMeta)
元数据类用于定义组件的配置参数,包括开始字段、结束字段、分页大小等。以下是SplitPageMeta
类的主要部分:
public class SplitPageMeta extends BaseStepMeta implements StepMetaInterface {
private String startFiledName;
private String endFiledName;
private long pageSize;
private String split_type;
private String countName;
public void setDefault() {
startFiledName = "start_Field";
endFiledName = "end_Field";
pageSize = 1000L;
split_type = "Mysql";
countName = "";
}
@Override
public StepInterface getStep(StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr,
TransMeta transMeta, Trans trans) {
return new SplitPage(stepMeta, stepDataInterface, copyNr, transMeta, trans);
}
@Override
public StepDataInterface getStepData() {
return new SplitPageData();
}
public String getXML() {
StringBuilder retval = new StringBuilder();
retval.append(" ").append(XMLHandler.addTagValue("startFiledName", startFiledName));
retval.append(" ").append(XMLHandler.addTagValue("endFiledName", endFiledName));
retval.append(" ").append(XMLHandler.addTagValue("pageSize", pageSize));
retval.append(" ").append(XMLHandler.addTagValue("split_type", split_type));
retval.append(" ").append(XMLHandler.addTagValue("countName", countName));
return retval.toString();
}
}
2. 数据类(SplitPageData)
数据类用于保存组件在运行时的数据和状态信息。以下是SplitPageData
类的主要部分:
public class SplitPageData extends BaseStepData implements StepDataInterface {
public long pagesize = 1000L;
public Object[] readrow;
public RowMetaInterface inputRowMeta;
public RowMetaInterface outputRowMeta;
public int countindex = 0;
public SplitPageData() {
}
}
3. 处理类(SplitPage)
处理类用于实现组件的具体处理逻辑。以下是SplitPage
类的主要部分:
public class SplitPage extends BaseStep implements StepInterface {
private SplitPageMeta meta;
private SplitPageData data;
public SplitPage(StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta,
Trans trans) {
super(stepMeta, stepDataInterface, copyNr, transMeta, trans);
}
public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException {
this.meta = (SplitPageMeta) smi;
this.data = (SplitPageData) sdi;
data.readrow = getRow();
if (data.readrow == null) {
setOutputDone();
return false;
}
if (first) {
first = false;
data.inputRowMeta = getInputRowMeta();
data.outputRowMeta = data.inputRowMeta.clone();
data.countindex = data.inputRowMeta.indexOfValue(meta.getCountName());
meta.getFields(data.outputRowMeta, getStepname(), null, null, this, repository, metaStore);
}
long count = Long.parseLong(data.readrow[data.countindex].toString());
int pageSum = (int) ((count - 1) / data.pagesize + 1);
long start = 1;
long end;
for (int i = 1; i < pageSum; i++) {
end = start + data.pagesize - 1;
Object[] r = {start, end};
putRow(data.outputRowMeta, r);
start = end + 1;
}
if (count % data.pagesize > 0) {
Object[] r = {start, count};
putRow(data.outputRowMeta, r);
}
setOutputDone();
return false;
}
public boolean init(StepMetaInterface smi, StepDataInterface sdi) {
this.meta = (SplitPageMeta) smi;
this.data = (SplitPageData) sdi;
if (super.init(smi, sdi)) {
data.pagesize = meta.getPageSize();
return true;
}
return false;
}
}
4. 对话框类(SplitPageDialog)
对话框类用于提供组件的图形化配置界面。以下是SplitPageDialog
类的主要部分:
public class SplitPageDialog extends BaseStepDialog {
private SplitPageMeta input;
private Label wlType;
private CCombo wType;
private Label wlCountName;
private CCombo wCountName;
private Label wlStartFiledName;
private TextVar wStartFiledName;
private Label wlEndFiledName;
private TextVar wEndFiledName;
private Label wlPageSizeName;
private TextVar wPageSizeName;
private Label wlPageSize;
private TextVar wPageSize;
public SplitPageDialog(Shell parent, Object in, TransMeta transMeta, String sname) {
super(parent, (BaseStepMeta) in, transMeta, sname);
input = (SplitPageMeta) in;
}
@Override
public String open() {
Shell parent = getParent();
Display display = parent.getDisplay();
shell = new Shell(parent, SWT.DIALOG_TRIM | SWT.RESIZE | SWT.MAX | SWT.MIN);
props.setLook(shell);
setShellImage(shell, input);
// Layout and components initialization
FormLayout formLayout = new FormLayout();
formLayout.marginWidth = Const.FORM_MARGIN;
formLayout.marginHeight = Const.FORM_MARGIN;
shell.setLayout(formLayout);
shell.setText("分页设置");
// Step name field
wlStepname = new Label(shell, SWT.RIGHT);
wlStepname.setText("步骤名称 ");
props.setLook(wlStepname);
FormData fdlStepname = new FormData();
fdlStepname.left = new FormAttachment(0, 0);
fdlStepname.right = new FormAttachment(middle, -margin);
fdlStepname.top = new FormAttachment(0, margin);
wlStepname.setLayoutData(fdlStepname);
wStepname = new Text(shell, SWT.SINGLE | SWT.LEFT | SWT.BORDER);
wStepname.setText(stepname);
props.setLook(wStepname);
wStepname.addModifyListener(lsMod);
FormData fdStepname = new FormData();
fdStepname.left = new FormAttachment(middle, 0);
fdStepname.top = new FormAttachment(0, margin);
fdStepname.right = new FormAttachment(100, 0);
wStepname.setLayoutData(fdStepname);
// Pagination type field
wlType = new Label(shell, SWT.RIGHT);
wlType.setText("分页类型 ");
props.setLook(wlType);
FormData fdlType = new FormData();
fdlType.left = new FormAttachment(0, 0);
fdlType.right = new FormAttachment(middle, -margin);
fdlType.top = new FormAttachment(wStepname, margin * 2);
wlType.setLayoutData(fdlType);
wType = new CCombo(shell, SWT.BORDER | SWT.READ_ONLY);
wType.setEditable(true);
props.setLook(wType);
wType.addModifyListener(lsMod);
FormData fdType = new FormData();
fdType.left = new FormAttachment(middle, -margin);
fdType.top = new FormAttachment(wStepname, margin);
fdType.right = new FormAttachment(100, -margin);
wType.setLayoutData(fdType);
wType.add("Mysql");
wType.add("Oracle");
// Total count field
wlCountName = new Label(shell, SWT.RIGHT);
wlCountName.setText("总行数字段 ");
props.setLook(wlCountName);
FormData fdlCountName = new FormData();
fdlCountName.left = new FormAttachment(0, 0);
fdlCountName.right = new FormAttachment(middle, -margin);
fdlCountName.top = new FormAttachment(wType, margin * 2);
wlCountName.setLayoutData(fdlCountName);
wCountName = new CCombo(shell, SWT.BORDER | SWT.READ_ONLY);
wCountName.setEditable(true);
props.setLook(wCountName);
wCountName.addModifyListener(lsMod);
FormData fdCountName = new FormData();
fdCountName.left = new FormAttachment(middle, -margin);
fdCountName.top = new FormAttachment(wType, margin);
fdCountName.right = new FormAttachment(100, -margin);
wCountName.setLayoutData(fdCountName);
// Start field name
wlStartFiledName = new Label(shell, SWT.RIGHT);
wlStartFiledName.setText("开始字段 ");
props.setLook(wlStartFiledName);
FormData fdlStartFiledName = new FormData();
fdlStartFiledName.left = new FormAttachment(0, 0);
fdlStartFiledName.right = new FormAttachment(middle, -margin);
fdlStartFiledName.top = new FormAttachment(wCountName, margin * 2);
wlStartFiledName.setLayoutData(fdlStartFiledName);
wStartFiledName = new TextVar(transMeta, shell, SWT.SINGLE | SWT.LEFT | SWT.BORDER);
props.setLook(wStartFiledName);
wStartFiledName.addModifyListener(lsMod);
FormData fdStartFiledName = new FormData();
fdStartFiledName.left = new FormAttachment(middle, -margin);
fdStartFiledName.top = new FormAttachment(wCountName, margin);
fdStartFiledName.right = new FormAttachment(100, -margin);
wStartFiledName.setLayoutData(fdStartFiledName);
// End field name
wlEndFiledName = new Label(shell, SWT.RIGHT);
wlEndFiledName.setText("结束字段 ");
props.setLook(wlEndFiledName);
FormData fdlEndFiledName = new FormData();
fdlEndFiledName.left = new FormAttachment(0, 0);
fdlEndFiledName.right = new FormAttachment(middle, -margin);
fdlEndFiledName.top = new FormAttachment(wStartFiledName, margin * 2);
wlEndFiledName.setLayoutData(fdlEndFiledName);
wEndFiledName = new TextVar(transMeta, shell, SWT.SINGLE | SWT.LEFT | SWT.BORDER);
props.setLook(wEndFiledName);
wEndFiledName.addModifyListener(lsMod);
FormData fdEndFiledName = new FormData();
fdEndFiledName.left = new FormAttachment(middle, -margin);
fdEndFiledName.top = new FormAttachment(wStartFiledName, margin);
fdEndFiledName.right = new FormAttachment(100, -margin);
wEndFiledName.setLayoutData(fdEndFiledName);
// Page size name
wlPageSizeName = new Label(shell, SWT.RIGHT);
wlPageSizeName.setText("分页字段 ");
props.setLook(wlPageSizeName);
FormData fdlPageSizeName = new FormData();
fdlPageSizeName.left = new FormAttachment(0, 0);
fdlPageSizeName.right = new FormAttachment(middle, -margin);
fdlPageSizeName.top = new FormAttachment(wEndFiledName, margin * 2);
wlPageSizeName.setLayoutData(fdlPageSizeName);
wPageSizeName = new TextVar(transMeta, shell, SWT.SINGLE | SWT.LEFT | SWT.BORDER);
props.setLook(wPageSizeName);
wPageSizeName.addModifyListener(lsMod);
FormData fdPageSizeName = new FormData();
fdPageSizeName.left = new FormAttachment(middle, -margin);
fdPageSizeName.top = new FormAttachment(wEndFiledName, margin);
fdPageSizeName.right = new FormAttachment(100, -margin);
wPageSizeName.setLayoutData(fdPageSizeName);
// Page size
wlPageSize = new Label(shell, SWT.RIGHT);
wlPageSize.setText("分页大小 ");
props.setLook(wlPageSize);
FormData fdlPageSize = new FormData();
fdlPageSize.left = new FormAttachment(0, 0);
fdlPageSize.right = new FormAttachment(middle, -margin);
fdlPageSize.top = new FormAttachment(wPageSizeName, margin * 2);
wlPageSize.setLayoutData(fdlPageSize);
wPageSize = new TextVar(transMeta, shell, SWT.SINGLE | SWT.LEFT | SWT.BORDER);
props.setLook(wPageSize);
wPageSize.addModifyListener(lsMod);
FormData fdPageSize = new FormData();
fdPageSize.left = new FormAttachment(middle, -margin);
fdPageSize.top = new FormAttachment(wPageSizeName, margin);
fdPageSize.right = new FormAttachment(100, -margin);
wPageSize.setLayoutData(fdPageSize);
// Buttons
wOK = new Button(shell, SWT.PUSH);
wOK.setText("确定");
wCancel = new Button(shell, SWT.PUSH);
wCancel.setText("取消");
setButtonPositions(new Button[]{wOK, wCancel}, margin, wPageSize);
// Listeners
lsOK = new Listener() {
public void handleEvent(Event e) {
ok();
}
};
lsCancel = new Listener() {
public void handleEvent(Event e) {
cancel();
}
};
wOK.addListener(SWT.Selection, lsOK);
wCancel.addListener(SWT.Selection, lsCancel);
// Set size and getData
setSize();
getData();
input.setChanged(backupChanged);
shell.open();
while (!shell.isDisposed()) {
if (!display.readAndDispatch()) {
display.sleep();
}
}
return stepname;
}
private void getData() {
if (input.getStartFiledName() != null) {
wStartFiledName.setText(input.getStartFiledName());
}
if (input.getEndFiledName() != null) {
wEndFiledName.setText(input.getEndFiledName());
}
if (input.getPageSizeName() != null) {
wPageSizeName.setText(input.getPageSizeName());
}
if (input.getPageSize() > 0) {
wPageSize.setText(input.getPageSize() + "");
}
if (input.getSplit_type() != null) {
wType.setText(input.getSplit_type());
}
if (input.getCountName() != null) {
wCountName.setText(input.getCountName());
}
}
private void ok() {
if (Utils.isEmpty(wStepname.getText())) {
return;
}
long n = Long.parseLong(wPageSize.getText());
if (n <= 0) {
MessageBox mb = new MessageBox(shell, SWT.OK | SWT.ICON_ERROR);
mb.setMessage("分页大小不能小于1");
mb.setText("错误");
mb.open();
return;
}
stepname = wStepname.getText();
getInfo(input);
dispose();
}
private void getInfo(SplitPageMeta info) {
input.setStartFiledName(wStartFiledName.getText());
input.setEndFiledName(wEndFiledName.getText());
input.setPageSizeName(wPageSizeName.getText());
input.setPageSize(Long.parseLong(wPageSize.getText()));
input.setCountName(wCountName.getText());
input.setSplit_type(wType.getText());
}
private void cancel() {
stepname = null;
input.setChanged(backupChanged);
dispose();
}
}
四、总结
通过上述实现,我们开发了一个Kettle分页组件,包括元数据类、数据类、处理类和对话框类。这个组件可以用于在Kettle中实现分页逻辑,支持MySQL和Oracle数据库。通过图形化界面配置分页参数,用户可以轻松地实现数据的分页抽取和处理。组件会生成分页信息传递给后面的组件,后面的组件可以根据分页信息进行分页查询结合并发设置,将一个大的查询任务拆分成多个小的任务来执行。