因为Supersonic没有Query的解析,所以我们想要实现Query必须自己调用Supersonic的操作函数编程实现。下面是我实现的一个简单的Query查询:
Query:
select
l_returnflag,
sum(l_quantity) as sum_qty,
from
lineitem
where
l_shipdate <= date '1998-12-01'
group by
l_returnflag,
order by
l_returnflag;
Table:
Column Name Datatype
L_ORDERKEY INT32
L_PARTKEY INT32
L_SUPPKEY INT32
L_LINENUMBER INT32
L_QUANTITY FLOAT
L_EXTENDEDPRICE FLOAT
L_DISCOUNT FLOAT
L_TAX FLOAT
L_RETURNFLAG STRING
L_LINESTATUS STRING
L_SHIPDATE DATE
L_COMMITDATE DATE
L_RECEIPTDATE DATE
L_SHIPINSTRUCT STRING
L_SHIPMODE STRING
L_COMMENT STRING
实现代码如下:
#include <map>
using std::map;using std::multimap;
#include <set>
using std::multiset;
using std::set;
#include <utility>
using std::make_pair;
using std::pair;
#include "gtest/gtest.h"
#include "supersonic/supersonic.h"
#include "supersonic/cursor/core/sort.h"
#include "supersonic/cursor/infrastructure/ordering.h"
#include "supersonic/utils/strings/stringpiece.h"
// Include some map utilities to use for result verification.
#include "supersonic/utils/map-util.h"
using supersonic::Attribute;
using supersonic::Block;
using supersonic::Cursor;
using supersonic::Operation;
using supersonic::FailureOr;
using supersonic::FailureOrOwned;
using supersonic::GetConstantExpressionValue;
using supersonic::TupleSchema;
using supersonic::Table;
using supersonic::TableRowWriter;
using supersonic::View;
using supersonic::ViewCopier;
using supersonic::HashJoinOperation;
using supersonic::HeapBufferAllocator;
using supersonic::JoinType;
using supersonic::ProjectNamedAttribute;
using supersonic::ProjectNamedAttributeAs;
using supersonic::rowid_t;
using supersonic::SingleSourceProjector;
using supersonic::MultiSourceProjector;
using supersonic::CompoundSingleSourceProjector;
using supersonic::CompoundMultiSourceProjector;
using supersonic::ResultView;
using supersonic::ScanView;
using supersonic::SucceedOrDie;
using supersonic::NamedAttribute;
using supersonic::AggregationSpecification;
using supersonic::SortOrder;
using supersonic::Sort;
using supersonic::Filter;
using supersonic::ProjectAllAttributes;
using supersonic::If;
using supersonic::IfNull;
using supersonic::Less;
using supersonic::CompoundExpression;
using supersonic::Expression;
using supersonic::Compute;
using supersonic::Generate;
using supersonic::ParseStringNulling;
using supersonic::ConstBool;
using supersonic::ConstString;
using supersonic::ConstInt32;
using supersonic::ConstDate;
using supersonic::Null;
using supersonic::BoundExpressionTree;
using supersonic::EvaluationResult;
using supersonic::ParseStringNulling;
using supersonic::GetConstantExpressionValue;
using supersonic::INNER;
using supersonic::UNIQUE;
using supersonic::SUM;
using supersonic::ASCENDING;
using supersonic::INT32;
using supersonic::NOT_NULLABLE;
using supersonic::NULLABLE;
using supersonic::STRING;
using supersonic::DATE;
using supersonic::BOOL;
using supersonic::FLOAT;
using supersonic::rowcount_t;
class QueryOneTest {
public:
void SetUp() {
lineitem_schema.add_attribute(Attribute("L_ORDERKEY", INT32, NOT_NULLABLE));
lineitem_schema.add_attribute(Attribute("L_PARTKEY", INT32, NOT_NULLABLE));
lineitem_schema.add_attribute(Attribute("L_SUPPKEY", INT32, NOT_NULLABLE));
lineitem_schema.add_attribute(Attribute("L_LINENUMBER", INT32, NOT_NULLABLE));
lineitem_schema.add_attribute(Attribute("L_QUANTITY", FLOAT, NOT_NULLABLE));
lineitem_schema.add_attribute(Attribute("L_EXTENDEDPRICE", FLOAT, NOT_NULLABLE));
lineitem_schema.add_attribute(Attribute("L_DISCOUNT", FLOAT, NOT_NULLABLE));
lineitem_schema.add_attribute(Attribute("L_TAX", FLOAT, NOT_NULLABLE));
lineitem_schema.add_attribute(Attribute("L_RETURNFLAG", STRING, NOT_NULLABLE));
lineitem_schema.add_attribute(Attribute("L_LINESTATUS", STRING, NOT_NULLABLE));
lineitem_schema.add_attribute(Attribute("L_SHIPDATE", DATE, NOT_NULLABLE));
lineitem_schema.add_attribute(Attribute("L_COMMITDATE", DATE, NOT_NULLABLE));
lineitem_schema.add_attribute(Attribute("L_RECEIPTDATE", DATE, NOT_NULLABLE));
lineitem_schema.add_attribute(Attribute("L_SHIPINSTRUCT", STRING, NOT_NULLABLE));
lineitem_schema.add_attribute(Attribute("L_SHIPMODE", STRING, NOT_NULLABLE));
lineitem_schema.add_attribute(Attribute("L_COMMENT", STRING, NOT_NULLABLE));
lineitem_table.reset(new Table(lineitem_schema,
HeapBufferAllocator::Get()));
lineitem_table_writer.reset(new TableRowWriter(lineitem_table.get()));
}
int32 AddData(int32 L_ORDERKEY,int32 L_PARTKEY,int32 L_SUPPKEY,int32 L_LINENUMBER,
float L_QUANTITY,float L_EXTENDEDPRICE,float L_DISCOUNT,float L_TAX,
const StringPiece& L_RETURNFLAG,const StringPiece& L_LINESTATUS,
const StringPiece& L_SHIPDATE,const StringPiece& L_COMMITDATE,const StringPiece& L_RECEIPTDATE,
const StringPiece& L_SHIPINSTRUCT,const StringPiece& L_SHIPMODE,
const StringPiece& L_COMMENT) {
scoped_ptr<const Expression> date_or_null1(
ParseStringNulling(DATE, ConstString(L_SHIPDATE)));
bool L_SHIPDATE_is_null = false;
FailureOr<int32> L_SHIPDATE_as_int32 =
GetConstantExpressionValue<DATE>(*date_or_null1,
&L_SHIPDATE_is_null);
scoped_ptr<const Expression> date_or_null2(
ParseStringNulling(DATE, ConstString(L_COMMITDATE)));
bool L_COMMITDATE_is_null = false;
FailureOr<int32> L_COMMITDATE_as_int32 =
GetConstantExpressionValue<DATE>(*date_or_null2,
&L_COMMITDATE_is_null);
scoped_ptr<const Expression> date_or_null3(
ParseStringNulling(DATE, ConstString(L_RECEIPTDATE)));
bool L_RECEIPTDATE_is_null = false;
FailureOr<int32> L_RECEIPTDATE_as_int32 =
GetConstantExpressionValue<DATE>(*date_or_null3,
&L_RECEIPTDATE_is_null);
lineitem_table_writer
->AddRow().Int32(L_ORDERKEY).Int32(L_PARTKEY).Int32(L_SUPPKEY).Int32(L_LINENUMBER)
.Float(L_QUANTITY).Float(L_EXTENDEDPRICE).Float(L_DISCOUNT).Float(L_TAX)
.String(L_RETURNFLAG).String(L_LINESTATUS)
.Date(L_SHIPDATE_as_int32.get()).Date(L_COMMITDATE_as_int32.get()).Date(L_RECEIPTDATE_as_int32.get())
.String(L_SHIPINSTRUCT).String(L_SHIPMODE).String(L_COMMENT)
.CheckSuccess();
return L_LINENUMBER;
}
void TestResults() {
//检查结果是否满足需求,首先,我们必须把轮询rows,将它们放到一个内存块里。
Operation * scan = ScanView(lineitem_table->view());
result_cursor.reset(SucceedOrDie(scan->CreateCursor()));
/*Filter Start*/
const Expression * LOE = LessOrEqual( NamedAttribute("L_SHIPDATE"),ConstDate(10561));
scoped_ptr<Operation> filter(
Filter(LOE,ProjectAllAttributes(), scan));
FailureOrOwned<Cursor> cursor = filter.release()->CreateCursor();
ResultView FiterResult(cursor->Next(-1));//数据量大之后不确定是否有问题。
std::cout<<"The Result of Filter is :"<<std::endl;
std::cout<<"column count is:"<<FiterResult.view().column_count()<<std::endl;
std::cout<<"row count is:"<<FiterResult.view().row_count()<<std::endl;
// std::cout<<result.view().column_count()<<"\t";
std::cout<<std::endl;
/*Filter End*/
/*Group函数*/
Operation * ScanforGroup = ScanView(FiterResult.view());
scoped_ptr<AggregationSpecification> specification(
new AggregationSpecification());
specification-> AddAggregation(SUM, "L_QUANTITY", "data_sums");//data是要做相加运算的列的名称,data_sums是输出名称
scoped_ptr< const SingleSourceProjector >
key_projector(ProjectNamedAttribute( "L_RETURNFLAG"));
//对表input_view以key为关键字分组,做specification运算。NULL是为分组聚集的选项,可具体看参数。
scoped_ptr< Operation> aggregation(GroupAggregate(key_projector.release(),
specification.release(),
NULL,
ScanforGroup));
scoped_ptr< Cursor> bound_aggregation(
SucceedOrDie(aggregation->CreateCursor()));
ResultView result(bound_aggregation->Next(-1));
std::cout<<"The Result of Group is :"<<std::endl;
std::cout<<"column count is:"<<result.view().column_count()<<std::endl;
std::cout<<"row count is:"<<result.view().row_count()<<std::endl;
for (int32 k=0;k<result.view().column_count();k++)
{
std::cout<<result.view().schema().attribute(k).name()<<"\t";
}
std::cout<<std::endl;
for(int j=0; j<result.view().row_count();j++)
{
std::cout<<result.view().column(0).typed_data<STRING>()[j]<<"\t";
std::cout<<result.view().column(1).typed_data<FLOAT>()[j]<<"\t";
std::cout<<std::endl;
}
/*Sort函数*/
Operation * ScanforSort = ScanView(result.view());
scoped_ptr< const SingleSourceProjector >projector(ProjectNamedAttribute( "L_RETURNFLAG"));//选出排序属性
scoped_ptr< SortOrder> sort_order(new SortOrder());
sort_order-> add(projector.release(), ASCENDING);//指定排序顺序
const size_t mem_limit = 128;//限定内存大小,
scoped_ptr< Operation> sort(Sort(sort_order.release(),
NULL,
mem_limit,
ScanforSort));//调用排序算法,其中的input_view是scoped_ptr<View>input_view。
scoped_ptr<Cursor> Tresult_cursor;
Tresult_cursor.reset(SucceedOrDie(sort->CreateCursor()));//scoped_ptr<Cursor> result_cursor;
ResultView Tresult(Tresult_cursor->Next(-1));
std::cout<<"The Result of Sort is :"<<std::endl;
std::cout<<"column count is:"<<Tresult.view().column_count()<<std::endl;
std::cout<<"row count is:"<<Tresult.view().row_count()<<std::endl;
for (int32 k=0;k<Tresult.view().column_count();k++)
{
std::cout<<Tresult.view().schema().attribute(k).name()<<"\t";
}
std::cout<<std::endl;
for(int j=0; j<Tresult.view().row_count();j++)
{
std::cout<<Tresult.view().column(0).typed_data<STRING>()[j]<<"\t";
std::cout<<Tresult.view().column(1).typed_data<FLOAT>()[j]<<"\t";
std::cout<<std::endl;
}
}
// Supersonic objects.
scoped_ptr<Cursor> result_cursor;
TupleSchema lineitem_schema;
scoped_ptr<Table> lineitem_table;
scoped_ptr<TableRowWriter> lineitem_table_writer;
};
int main(void) {
QueryOneTest test;
test.SetUp();
test.AddData(1,155190,7706,1,17,21168.23,0.04,0.02,"N","O","1996/03/13","1996/03/13","1996/03/13","DELIVER IN PERSON","TRUCK","egular courts above the");
test.AddData(1,67310,7311,5,36,45983.16,0.09,0.06,"Y","O","1996/04/12","1996/03/13","1996/03/13","TAKE BACK RETURN","MAIL","ly final dependencies: slyly bold");
test.AddData(1,67310,7311,1,8,13309.60,0.10,0.02,"N","O","1998/12/01","1996/03/13","1996/03/13","TAKE BACK RETURN","MAIL","ly final dependencies: slyly bold");
test.AddData(7,67310,7311,3,17,21168.23,0.04,0.02,"Y","O","2001/04/13","1996/03/13","1996/03/13","TAKE BACK RETURN","MAIL","ly final dependencies: slyly bold");
test.AddData(9,67310,7311,1,36,45983.16,0.09,0.06,"Z","O","2001/04/15","1996/03/13","1996/03/13","TAKE BACK RETURN","MAIL","ly final dependencies: slyly bold");
test.AddData(5,67310,7311,1,8,13309.60,0.10,0.02,"A","O","2002/01/01","1996/03/13","1996/03/13","TAKE BACK RETURN","MAIL","ly final dependencies: slyly bold");
//test.PrepareJoin();
test.TestResults();
return 0;
}
执行结果:
bash-4.1# ./tmp
The Result of Filter is :
column count is:16
row count is:3
The Result of Group is :
column count is:2
row count is:2
L_RETURNFLAG data_sums
N 25
Y 36
The Result of Sort is :
column count is:2
row count is:2
L_RETURNFLAG data_sums
N 25
Y 36