//这是包含一个稀疏矩阵中的一些行//适用于流式算法
/*!
* \brief a block of data, containing several rows in sparse matrix
* This is useful for (streaming-sxtyle) algorithms that scans through rows of data
* examples include: SGD, GD, L-BFGS, kmeans
*
* The size of batch is usually large enough so that parallelizing over the rows
* can give significant speedup
* \tparam IndexType typetostoretheindexusedinrowbatch
*/
template<typename IndexType>
struct RowBlock {
/*! \brief batch size */
size_t size;
/*! \brief array[size+1], row pointer to beginning of each rows */
const size_t *offset;
/*! \brief array[size] label of each instance */
const real_t *label;
/*! \brief With weight: array[size] label of each instance, otherwise nullptr */
const real_t *weight;
/*! \brief feature index */
const IndexType *index;
/*! \brief feature value, can be NULL, indicating all values are 1 */
const real_t *value;
/*!
* \brief get specific rows in the batch
* \param rowid the rowid in that row
* \return the instance corresponding to the row
*/
inline Row<IndexType> operator[](size_t rowid) const;
/*! \return memory cost of the block in bytes */
inline size_t MemCostBytes(void) const {
size_t cost = size * (sizeof(size_t) + sizeof(real_t));
if (weight != NULL) cost += size * sizeof(real_t);
size_t ndata = offset[size] - offset[0];
if (index != NULL) cost += ndata * sizeof(IndexType);
if (value != NULL) cost += ndata * sizeof(real_t);
return cost;
}
/*!
* \brief slice a RowBlock to get rows in [begin, end)
* \param begin the begin row index
* \param end the end row index
* \return the sliced RowBlock
*/
inline RowBlock Slice(size_t begin, size_t end) const {
CHECK(begin <= end && end < size);
RowBlock ret;
ret.size = end - begin;
ret.label = label + begin;
if (weight != NULL) {
ret.weight = weight + begin;
} else {
ret.weight = NULL;
}
ret.offset = offset + begin;
ret.index = index;
ret.value = value;
return ret;
}
};