打开数据库文件
初始化BtShared和游标。
int sqlite3BtreeOpen(
sqlite3_vfs *pVfs, /* VFS to use for this b-tree */
const char *zFilename, /* Name of the file containing the BTree database */
sqlite3 *db, /* Associated database handle */
Btree **ppBtree, /* Pointer to new Btree object written here */
int flags, /* Options */
int vfsFlags /* Flags passed through to sqlite3_vfs.xOpen() */
){
BtShared *pBt = 0; /* Shared part of btree structure */
Btree *p; /* Handle to return */
sqlite3_mutex *mutexOpen = 0; /* Prevents a race condition. Ticket #3537 */
int rc = SQLITE_OK; /* Result code from this function */
u8 nReserve; /* Byte of unused space on each page */
unsigned char zDbHeader[100]; /* Database header content */
if( pBt==0 ){
/* Suppress false-positive compiler warning from PVS-Studio */
memset(&zDbHeader[16], 0, 8);
pBt = sqlite3MallocZero( sizeof(*pBt) );
if( pBt==0 ){
rc = SQLITE_NOMEM_BKPT;
goto btree_open_out;
}
rc = sqlite3PagerOpen(pVfs, &pBt->pPager, zFilename,
sizeof(MemPage), flags, vfsFlags, pageReinit);
if( rc==SQLITE_OK ){
sqlite3PagerSetMmapLimit(pBt->pPager, db->szMmap);
rc = sqlite3PagerReadFileheader(pBt->pPager,sizeof(zDbHeader),zDbHeader);
}
if( rc!=SQLITE_OK ){
goto btree_open_out;
}
pBt->openFlags = (u8)flags;
pBt->db = db;
sqlite3PagerSetBusyHandler(pBt->pPager, btreeInvokeBusyHandler, pBt);
p->pBt = pBt;
pBt->pCursor = 0;
pBt->pPage1 = 0;
if( sqlite3PagerIsreadonly(pBt->pPager) ) pBt->btsFlags |= BTS_READ_ONLY;
/* EVIDENCE-OF: R-51873-39618 The page size for a database file is
** determined by the 2-byte integer located at an offset of 16 bytes from
** the beginning of the database file. */
pBt->pageSize = (zDbHeader[16]<<8) | (zDbHeader[17]<<16);
if( pBt->pageSize<512 || pBt->pageSize>SQLITE_MAX_PAGE_SIZE
|| ((pBt->pageSize-1)&pBt->pageSize)!=0 ){
pBt->pageSize = 0;
nReserve = 0;
}else{
/* EVIDENCE-OF: R-37497-42412 The size of the reserved region is
** determined by the one-byte unsigned integer found at an offset of 20
** into the database file header. */
nReserve = zDbHeader[20];
pBt->btsFlags |= BTS_PAGESIZE_FIXED;
}
rc = sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize, nReserve);
if( rc ) goto btree_open_out;
pBt->usableSize = pBt->pageSize - nReserve;
assert( (pBt->pageSize & 7)==0 ); /* 8-byte alignment of pageSize */
#if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO)
/* Add the new BtShared object to the linked list sharable BtShareds.
*/
pBt->nRef = 1;
if( p->sharable ){
MUTEX_LOGIC( sqlite3_mutex *mutexShared; )
MUTEX_LOGIC( mutexShared = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MAIN);)
if( SQLITE_THREADSAFE && sqlite3GlobalConfig.bCoreMutex ){
pBt->mutex = sqlite3MutexAlloc(SQLITE_MUTEX_FAST);
if( pBt->mutex==0 ){
rc = SQLITE_NOMEM_BKPT;
goto btree_open_out;
}
}
sqlite3_mutex_enter(mutexShared);
pBt->pNext = GLOBAL(BtShared*,sqlite3SharedCacheList);
GLOBAL(BtShared*,sqlite3SharedCacheList) = pBt;
sqlite3_mutex_leave(mutexShared);
}
#endif
}
#if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO)
/* If the new Btree uses a sharable pBtShared, then link the new
** Btree into the list of all sharable Btrees for the same connection.
** The list is kept in ascending order by pBt address.
*/
if( p->sharable ){
int i;
Btree *pSib;
for(i=0; i<db->nDb; i++){
if( (pSib = db->aDb[i].pBt)!=0 && pSib->sharable ){
while( pSib->pPrev ){ pSib = pSib->pPrev; }
if( (uptr)p->pBt<(uptr)pSib->pBt ){
p->pNext = pSib;
p->pPrev = 0;
pSib->pPrev = p;
}else{
while( pSib->pNext && (uptr)pSib->pNext->pBt<(uptr)p->pBt ){
pSib = pSib->pNext;
}
p->pNext = pSib->pNext;
p->pPrev = pSib;
if( p->pNext ){
p->pNext->pPrev = p;
}
pSib->pNext = p;
}
break;
}
}
}
#endif
*ppBtree = p;
btree_open_out:
if( rc!=SQLITE_OK ){
if( pBt && pBt->pPager ){
sqlite3PagerClose(pBt->pPager, 0);
}
sqlite3_free(pBt);
sqlite3_free(p);
*ppBtree = 0;
}else{
sqlite3_file *pFile;
/* If the B-Tree was successfully opened, set the pager-cache size to the
** default value. Except, when opening on an existing shared pager-cache,
** do not change the pager-cache size.
*/
if( sqlite3BtreeSchema(p, 0, 0)==0 ){
sqlite3BtreeSetCacheSize(p, SQLITE_DEFAULT_CACHE_SIZE);
}
pFile = sqlite3PagerFile(pBt->pPager);
if( pFile->pMethods ){
sqlite3OsFileControlHint(pFile, SQLITE_FCNTL_PDB, (void*)&pBt->db);
}
}
if( mutexOpen ){
assert( sqlite3_mutex_held(mutexOpen) );
sqlite3_mutex_leave(mutexOpen);
}
assert( rc!=SQLITE_OK || sqlite3BtreeConnectionCount(*ppBtree)>0 );
return rc;
}
创建一张BTree表
写入新表的root页的页号。
static int btreeCreateTable(Btree *p, Pgno *piTable, int createTabFlags){
BtShared *pBt = p->pBt;
MemPage *pRoot;
Pgno pgnoRoot;
int rc;
int ptfFlags; /* Page-type flags for the root page of new table */
assert( sqlite3BtreeHoldsMutex(p) );
assert( pBt->inTransaction==TRANS_WRITE );
assert( (pBt->btsFlags & BTS_READ_ONLY)==0 );
#ifdef SQLITE_OMIT_AUTOVACUUM
rc = allocateBtreePage(pBt, &pRoot, &pgnoRoot, 1, 0);
if( rc ){
return rc;
}
#else
if( pBt->autoVacuum ){
Pgno pgnoMove; /* Move a page here to make room for the root-page */
MemPage *pPageMove; /* The page to move to. */
/* Creating a new table may probably require moving an existing database
** to make room for the new tables root page. In case this page turns
** out to be an overflow page, delete all overflow page-map caches
** held by open cursors.
*/
invalidateAllOverflowCache(pBt);
/* Read the value of meta[3] from the database to determine where the
** root page of the new table should go. meta[3] is the largest root-page
** created so far, so the new root-page is (meta[3]+1).
*/
sqlite3BtreeGetMeta(p, BTREE_LARGEST_ROOT_PAGE, &pgnoRoot);
if( pgnoRoot>btreePagecount(pBt) ){
return SQLITE_CORRUPT_PGNO(pgnoRoot);
}
pgnoRoot++;
/* The new root-page may not be allocated on a pointer-map page, or the
** PENDING_BYTE page.
*/
while( pgnoRoot==PTRMAP_PAGENO(pBt, pgnoRoot) ||
pgnoRoot==PENDING_BYTE_PAGE(pBt) ){
pgnoRoot++;
}
assert( pgnoRoot>=3 );
/* Allocate a page. The page that currently resides at pgnoRoot will
** be moved to the allocated page (unless the allocated page happens
** to reside at pgnoRoot).
*/
rc = allocateBtreePage(pBt, &pPageMove, &pgnoMove, pgnoRoot, BTALLOC_EXACT);
if( rc!=SQLITE_OK ){
return rc;
}
if( pgnoMove!=pgnoRoot ){
/* pgnoRoot is the page that will be used for the root-page of
** the new table (assuming an error did not occur). But we were
** allocated pgnoMove. If required (i.e. if it was not allocated
** by extending the file), the current page at position pgnoMove
** is already journaled.
*/
u8 eType = 0;
Pgno iPtrPage = 0;
/* Save the positions of any open cursors. This is required in
** case they are holding a reference to an xFetch reference
** corresponding to page pgnoRoot. */
rc = saveAllCursors(pBt, 0, 0);
releasePage(pPageMove);
if( rc!=SQLITE_OK ){
return rc;
}
/* Move the page currently at pgnoRoot to pgnoMove. */
rc = btreeGetPage(pBt, pgnoRoot, &pRoot, 0);
if( rc!=SQLITE_OK ){
return rc;
}
rc = ptrmapGet(pBt, pgnoRoot, &eType, &iPtrPage);
if( eType==PTRMAP_ROOTPAGE || eType==PTRMAP_FREEPAGE ){
rc = SQLITE_CORRUPT_PGNO(pgnoRoot);
}
if( rc!=SQLITE_OK ){
releasePage(pRoot);
return rc;
}
assert( eType!=PTRMAP_ROOTPAGE );
assert( eType!=PTRMAP_FREEPAGE );
rc = relocatePage(pBt, pRoot, eType, iPtrPage, pgnoMove, 0);
releasePage(pRoot);
/* Obtain the page at pgnoRoot */
if( rc!=SQLITE_OK ){
return rc;
}
rc = btreeGetPage(pBt, pgnoRoot, &pRoot, 0);
if( rc!=SQLITE_OK ){
return rc;
}
rc = sqlite3PagerWrite(pRoot->pDbPage);
if( rc!=SQLITE_OK ){
releasePage(pRoot);
return rc;
}
}else{
pRoot = pPageMove;
}
/* Update the pointer-map and meta-data with the new root-page number. */
ptrmapPut(pBt, pgnoRoot, PTRMAP_ROOTPAGE, 0, &rc);
if( rc ){
releasePage(pRoot);
return rc;
}
/* When the new root page was allocated, page 1 was made writable in
** order either to increase the database filesize, or to decrement the
** freelist count. Hence, the sqlite3BtreeUpdateMeta() call cannot fail.
*/
assert( sqlite3PagerIswriteable(pBt->pPage1->pDbPage) );
rc = sqlite3BtreeUpdateMeta(p, 4, pgnoRoot);
if( NEVER(rc) ){
releasePage(pRoot);
return rc;
}
}else{
rc = allocateBtreePage(pBt, &pRoot, &pgnoRoot, 1, 0);
if( rc ) return rc;
}
#endif
assert( sqlite3PagerIswriteable(pRoot->pDbPage) );
if( createTabFlags & BTREE_INTKEY ){
ptfFlags = PTF_INTKEY | PTF_LEAFDATA | PTF_LEAF;
}else{
ptfFlags = PTF_ZERODATA | PTF_LEAF;
}
zeroPage(pRoot, ptfFlags);
sqlite3PagerUnref(pRoot->pDbPage);
assert( (pBt->openFlags & BTREE_SINGLE)==0 || pgnoRoot==2 );
*piTable = pgnoRoot;
return SQLITE_OK;
}
分配Btree的页
从数据库文件分配一个新的页。
会在freelist中就近搜索未分配的页,新的页被标记为脏页。
static int allocateBtreePage(
BtShared *pBt, /* The btree */
MemPage **ppPage, /* Store pointer to the allocated page here */
Pgno *pPgno, /* Store the page number here */
Pgno nearby, /* Search for a page near this one */
u8 eMode /* BTALLOC_EXACT, BTALLOC_LT, or BTALLOC_ANY */
){
MemPage *pPage1;
int rc;
u32 n; /* Number of pages on the freelist */
u32 k; /* Number of leaves on the trunk of the freelist */
MemPage *pTrunk = 0;
MemPage *pPrevTrunk = 0;
Pgno mxPage; /* Total size of the database file */
assert( sqlite3_mutex_held(pBt->mutex) );
assert( eMode==BTALLOC_ANY || (nearby>0 && IfNotOmitAV(pBt->autoVacuum)) );
pPage1 = pBt->pPage1;
mxPage = btreePagecount(pBt);
/* EVIDENCE-OF: R-21003-45125 The 4-byte big-endian integer at offset 36
** stores the total number of pages on the freelist. */
n = get4byte(&pPage1->aData[36]);
testcase( n==mxPage-1 );
if( n>=mxPage ){
return SQLITE_CORRUPT_BKPT;
}
if( n>0 ){
/* There are pages on the freelist. Reuse one of those pages. */
Pgno iTrunk;
u8 searchList = 0; /* If the free-list must be searched for 'nearby' */
u32 nSearch = 0; /* Count of the number of search attempts */
/* If eMode==BTALLOC_EXACT and a query of the pointer-map
** shows that the page 'nearby' is somewhere on the free-list, then
** the entire-list will be searched for that page.
*/
#ifndef SQLITE_OMIT_AUTOVACUUM
if( eMode==BTALLOC_EXACT ){
if( nearby<=mxPage ){
u8 eType;
assert( nearby>0 );
assert( pBt->autoVacuum );
rc = ptrmapGet(pBt, nearby, &eType, 0);
if( rc ) return rc;
if( eType==PTRMAP_FREEPAGE ){
searchList = 1;
}
}
}else if( eMode==BTALLOC_LE ){
searchList = 1;
}
#endif
/* Decrement the free-list count by 1. Set iTrunk to the index of the
** first free-list trunk page. iPrevTrunk is initially 1.
*/
rc = sqlite3PagerWrite(pPage1->pDbPage);
if( rc ) return rc;
put4byte(&pPage1->aData[36], n-1);
/* The code within this loop is run only once if the 'searchList' variable
** is not true. Otherwise, it runs once for each trunk-page on the
** free-list until the page 'nearby' is located (eMode==BTALLOC_EXACT)
** or until a page less than 'nearby' is located (eMode==BTALLOC_LT)
*/
do {
pPrevTrunk = pTrunk;
if( pPrevTrunk ){
/* EVIDENCE-OF: R-01506-11053 The first integer on a freelist trunk page
** is the page number of the next freelist trunk page in the list or
** zero if this is the last freelist trunk page. */
iTrunk = get4byte(&pPrevTrunk->aData[0]);
}else{
/* EVIDENCE-OF: R-59841-13798 The 4-byte big-endian integer at offset 32
** stores the page number of the first page of the freelist, or zero if
** the freelist is empty. */
iTrunk = get4byte(&pPage1->aData[32]);
}
testcase( iTrunk==mxPage );
if( iTrunk>mxPage || nSearch++ > n ){
rc = SQLITE_CORRUPT_PGNO(pPrevTrunk ? pPrevTrunk->pgno : 1);
}else{
rc = btreeGetUnusedPage(pBt, iTrunk, &pTrunk, 0);
}
if( rc ){
pTrunk = 0;
goto end_allocate_page;
}
assert( pTrunk!=0 );
assert( pTrunk->aData!=0 );
/* EVIDENCE-OF: R-13523-04394 The second integer on a freelist trunk page
** is the number of leaf page pointers to follow. */
k = get4byte(&pTrunk->aData[4]);
if( k==0 && !searchList ){
/* The trunk has no leaves and the list is not being searched.
** So extract the trunk page itself and use it as the newly
** allocated page */
assert( pPrevTrunk==0 );
rc = sqlite3PagerWrite(pTrunk->pDbPage);
if( rc ){
goto end_allocate_page;
}
*pPgno = iTrunk;
memcpy(&pPage1->aData[32], &pTrunk->aData[0], 4);
*ppPage = pTrunk;
pTrunk = 0;
TRACE(("ALLOCATE: %u trunk - %u free pages left\n", *pPgno, n-1));
}else if( k>(u32)(pBt->usableSize/4 - 2) ){
/* Value of k is out of range. Database corruption */
rc = SQLITE_CORRUPT_PGNO(iTrunk);
goto end_allocate_page;
#ifndef SQLITE_OMIT_AUTOVACUUM
}else if( searchList
&& (nearby==iTrunk || (iTrunk<nearby && eMode==BTALLOC_LE))
){
/* The list is being searched and this trunk page is the page
** to allocate, regardless of whether it has leaves.
*/
*pPgno = iTrunk;
*ppPage = pTrunk;
searchList = 0;
rc = sqlite3PagerWrite(pTrunk->pDbPage);
if( rc ){
goto end_allocate_page;
}
if( k==0 ){
if( !pPrevTrunk ){
memcpy(&pPage1->aData[32], &pTrunk->aData[0], 4);
}else{
rc = sqlite3PagerWrite(pPrevTrunk->pDbPage);
if( rc!=SQLITE_OK ){
goto end_allocate_page;
}
memcpy(&pPrevTrunk->aData[0], &pTrunk->aData[0], 4);
}
}else{
/* The trunk page is required by the caller but it contains
** pointers to free-list leaves. The first leaf becomes a trunk
** page in this case.
*/
MemPage *pNewTrunk;
Pgno iNewTrunk = get4byte(&pTrunk->aData[8]);
if( iNewTrunk>mxPage ){
rc = SQLITE_CORRUPT_PGNO(iTrunk);
goto end_allocate_page;
}
testcase( iNewTrunk==mxPage );
rc = btreeGetUnusedPage(pBt, iNewTrunk, &pNewTrunk, 0);
if( rc!=SQLITE_OK ){
goto end_allocate_page;
}
rc = sqlite3PagerWrite(pNewTrunk->pDbPage);
if( rc!=SQLITE_OK ){
releasePage(pNewTrunk);
goto end_allocate_page;
}
memcpy(&pNewTrunk->aData[0], &pTrunk->aData[0], 4);
put4byte(&pNewTrunk->aData[4], k-1);
memcpy(&pNewTrunk->aData[8], &pTrunk->aData[12], (k-1)*4);
releasePage(pNewTrunk);
if( !pPrevTrunk ){
assert( sqlite3PagerIswriteable(pPage1->pDbPage) );
put4byte(&pPage1->aData[32], iNewTrunk);
}else{
rc = sqlite3PagerWrite(pPrevTrunk->pDbPage);
if( rc ){
goto end_allocate_page;
}
put4byte(&pPrevTrunk->aData[0], iNewTrunk);
}
}
pTrunk = 0;
TRACE(("ALLOCATE: %u trunk - %u free pages left\n", *pPgno, n-1));
#endif
}else if( k>0 ){
/* Extract a leaf from the trunk */
u32 closest;
Pgno iPage;
unsigned char *aData = pTrunk->aData;
if( nearby>0 ){
u32 i;
closest = 0;
if( eMode==BTALLOC_LE ){
for(i=0; i<k; i++){
iPage = get4byte(&aData[8+i*4]);
if( iPage<=nearby ){
closest = i;
break;
}
}
}else{
int dist;
dist = sqlite3AbsInt32(get4byte(&aData[8]) - nearby);
for(i=1; i<k; i++){
int d2 = sqlite3AbsInt32(get4byte(&aData[8+i*4]) - nearby);
if( d2<dist ){
closest = i;
dist = d2;
}
}
}
}else{
closest = 0;
}
iPage = get4byte(&aData[8+closest*4]);
testcase( iPage==mxPage );
if( iPage>mxPage || iPage<2 ){
rc = SQLITE_CORRUPT_PGNO(iTrunk);
goto end_allocate_page;
}
testcase( iPage==mxPage );
if( !searchList
|| (iPage==nearby || (iPage<nearby && eMode==BTALLOC_LE))
){
int noContent;
*pPgno = iPage;
TRACE(("ALLOCATE: %u was leaf %u of %u on trunk %u"
": %u more free pages\n",
*pPgno, closest+1, k, pTrunk->pgno, n-1));
rc = sqlite3PagerWrite(pTrunk->pDbPage);
if( rc ) goto end_allocate_page;
if( closest<k-1 ){
memcpy(&aData[8+closest*4], &aData[4+k*4], 4);
}
put4byte(&aData[4], k-1);
noContent = !btreeGetHasContent(pBt, *pPgno)? PAGER_GET_NOCONTENT : 0;
rc = btreeGetUnusedPage(pBt, *pPgno, ppPage, noContent);
if( rc==SQLITE_OK ){
rc = sqlite3PagerWrite((*ppPage)->pDbPage);
if( rc!=SQLITE_OK ){
releasePage(*ppPage);
*ppPage = 0;
}
}
searchList = 0;
}
}
releasePage(pPrevTrunk);
pPrevTrunk = 0;
}while( searchList );
}else{
/* There are no pages on the freelist, so append a new page to the
** database image.
**
** Normally, new pages allocated by this block can be requested from the
** pager layer with the 'no-content' flag set. This prevents the pager
** from trying to read the pages content from disk. However, if the
** current transaction has already run one or more incremental-vacuum
** steps, then the page we are about to allocate may contain content
** that is required in the event of a rollback. In this case, do
** not set the no-content flag. This causes the pager to load and journal
** the current page content before overwriting it.
**
** Note that the pager will not actually attempt to load or journal
** content for any page that really does lie past the end of the database
** file on disk. So the effects of disabling the no-content optimization
** here are confined to those pages that lie between the end of the
** database image and the end of the database file.
*/
int bNoContent = (0==IfNotOmitAV(pBt->bDoTruncate))? PAGER_GET_NOCONTENT:0;
rc = sqlite3PagerWrite(pBt->pPage1->pDbPage);
if( rc ) return rc;
pBt->nPage++;
if( pBt->nPage==PENDING_BYTE_PAGE(pBt) ) pBt->nPage++;
#ifndef SQLITE_OMIT_AUTOVACUUM
if( pBt->autoVacuum && PTRMAP_ISPAGE(pBt, pBt->nPage) ){
/* If *pPgno refers to a pointer-map page, allocate two new pages
** at the end of the file instead of one. The first allocated page
** becomes a new pointer-map page, the second is used by the caller.
*/
MemPage *pPg = 0;
TRACE(("ALLOCATE: %u from end of file (pointer-map page)\n", pBt->nPage));
assert( pBt->nPage!=PENDING_BYTE_PAGE(pBt) );
rc = btreeGetUnusedPage(pBt, pBt->nPage, &pPg, bNoContent);
if( rc==SQLITE_OK ){
rc = sqlite3PagerWrite(pPg->pDbPage);
releasePage(pPg);
}
if( rc ) return rc;
pBt->nPage++;
if( pBt->nPage==PENDING_BYTE_PAGE(pBt) ){ pBt->nPage++; }
}
#endif
put4byte(28 + (u8*)pBt->pPage1->aData, pBt->nPage);
*pPgno = pBt->nPage;
assert( *pPgno!=PENDING_BYTE_PAGE(pBt) );
rc = btreeGetUnusedPage(pBt, *pPgno, ppPage, bNoContent);
if( rc ) return rc;
rc = sqlite3PagerWrite((*ppPage)->pDbPage);
if( rc!=SQLITE_OK ){
releasePage(*ppPage);
*ppPage = 0;
}
TRACE(("ALLOCATE: %u from end of file\n", *pPgno));
}
assert( CORRUPT_DB || *pPgno!=PENDING_BYTE_PAGE(pBt) );
end_allocate_page:
releasePage(pTrunk);
releasePage(pPrevTrunk);
assert( rc!=SQLITE_OK || sqlite3PagerPageRefcount((*ppPage)->pDbPage)<=1 );
assert( rc!=SQLITE_OK || (*ppPage)->isInit==0 );
return rc;
}
树的平衡相关函数
static int balance_quick(MemPage *pParent, MemPage *pPage, u8 *pSpace){
BtShared *const pBt = pPage->pBt; /* B-Tree Database */
MemPage *pNew; /* Newly allocated page */
int rc; /* Return Code */
Pgno pgnoNew; /* Page number of pNew */
assert( sqlite3_mutex_held(pPage->pBt->mutex) );
assert( sqlite3PagerIswriteable(pParent->pDbPage) );
assert( pPage->nOverflow==1 );
if( pPage->nCell==0 ) return SQLITE_CORRUPT_BKPT; /* dbfuzz001.test */
assert( pPage->nFree>=0 );
assert( pParent->nFree>=0 );
/* Allocate a new page. This page will become the right-sibling of
** pPage. Make the parent page writable, so that the new divider cell
** may be inserted. If both these operations are successful, proceed.
*/
rc = allocateBtreePage(pBt, &pNew, &pgnoNew, 0, 0);
if( rc==SQLITE_OK ){
u8 *pOut = &pSpace[4];
u8 *pCell = pPage->apOvfl[0];
u16 szCell = pPage->xCellSize(pPage, pCell);
u8 *pStop;
CellArray b;
assert( sqlite3PagerIswriteable(pNew->pDbPage) );
assert( CORRUPT_DB || pPage->aData[0]==(PTF_INTKEY|PTF_LEAFDATA|PTF_LEAF) );
zeroPage(pNew, PTF_INTKEY|PTF_LEAFDATA|PTF_LEAF);
b.nCell = 1;
b.pRef = pPage;
b.apCell = &pCell;
b.szCell = &szCell;
b.apEnd[0] = pPage->aDataEnd;
b.ixNx[0] = 2;
b.ixNx[NB*2-1] = 0x7fffffff;
rc = rebuildPage(&b, 0, 1, pNew);
if( NEVER(rc) ){
releasePage(pNew);
return rc;
}
pNew->nFree = pBt->usableSize - pNew->cellOffset - 2 - szCell;
/* Create a divider cell to insert into pParent. The divider cell
** consists of a 4-byte page number (the page number of pPage) and
** a variable length key value (which must be the same value as the
** largest key on pPage).
**
** To find the largest key value on pPage, first find the right-most
** cell on pPage. The first two fields of this cell are the
** record-length (a variable length integer at most 32-bits in size)
** and the key value (a variable length integer, may have any value).
** The first of the while(...) loops below skips over the record-length
** field. The second while(...) loop copies the key value from the
** cell on pPage into the pSpace buffer.
*/
pCell = findCell(pPage, pPage->nCell-1);
pStop = &pCell[9];
while( (*(pCell++)&0x80) && pCell<pStop );
pStop = &pCell[9];
while( ((*(pOut++) = *(pCell++))&0x80) && pCell<pStop );
/* Insert the new divider cell into pParent. */
if( rc==SQLITE_OK ){
rc = insertCell(pParent, pParent->nCell, pSpace, (int)(pOut-pSpace),
0, pPage->pgno);
}
/* Set the right-child pointer of pParent to point to the new page. */
put4byte(&pParent->aData[pParent->hdrOffset+8], pgnoNew);
/* Release the reference to the new page. */
releasePage(pNew);
}
return rc;
}
在页的cell索引上,插入一个新的cell
static int insertCell(
MemPage *pPage, /* Page into which we are copying */
int i, /* New cell becomes the i-th cell of the page */
u8 *pCell, /* Content of the new cell */
int sz, /* Bytes of content in pCell */
u8 *pTemp, /* Temp storage space for pCell, if needed */
Pgno iChild /* If non-zero, replace first 4 bytes with this value */
){
int idx = 0; /* Where to write new cell content in data[] */
int j; /* Loop counter */
u8 *data; /* The content of the whole page */
u8 *pIns; /* The point in pPage->aCellIdx[] where no cell inserted */
assert( i>=0 && i<=pPage->nCell+pPage->nOverflow );
assert( MX_CELL(pPage->pBt)<=10921 );
assert( pPage->nCell<=MX_CELL(pPage->pBt) || CORRUPT_DB );
assert( pPage->nOverflow<=ArraySize(pPage->apOvfl) );
assert( ArraySize(pPage->apOvfl)==ArraySize(pPage->aiOvfl) );
assert( sqlite3_mutex_held(pPage->pBt->mutex) );
assert( sz==pPage->xCellSize(pPage, pCell) || CORRUPT_DB );
assert( pPage->nFree>=0 );
assert( iChild>0 );
if( pPage->nOverflow || sz+2>pPage->nFree ){
if( pTemp ){
memcpy(pTemp, pCell, sz);
pCell = pTemp;
}
put4byte(pCell, iChild);
j = pPage->nOverflow++;
/* Comparison against ArraySize-1 since we hold back one extra slot
** as a contingency. In other words, never need more than 3 overflow
** slots but 4 are allocated, just to be safe. */
assert( j < ArraySize(pPage->apOvfl)-1 );
pPage->apOvfl[j] = pCell;
pPage->aiOvfl[j] = (u16)i;
/* When multiple overflows occur, they are always sequential and in
** sorted order. This invariants arise because multiple overflows can
** only occur when inserting divider cells into the parent page during
** balancing, and the dividers are adjacent and sorted.
*/
assert( j==0 || pPage->aiOvfl[j-1]<(u16)i ); /* Overflows in sorted order */
assert( j==0 || i==pPage->aiOvfl[j-1]+1 ); /* Overflows are sequential */
}else{
int rc = sqlite3PagerWrite(pPage->pDbPage);
if( NEVER(rc!=SQLITE_OK) ){
return rc;
}
assert( sqlite3PagerIswriteable(pPage->pDbPage) );
data = pPage->aData;
assert( &data[pPage->cellOffset]==pPage->aCellIdx );
rc = allocateSpace(pPage, sz, &idx);
if( rc ){ return rc; }
/* The allocateSpace() routine guarantees the following properties
** if it returns successfully */
assert( idx >= 0 );
assert( idx >= pPage->cellOffset+2*pPage->nCell+2 || CORRUPT_DB );
assert( idx+sz <= (int)pPage->pBt->usableSize );
pPage->nFree -= (u16)(2 + sz);
/* In a corrupt database where an entry in the cell index section of
** a btree page has a value of 3 or less, the pCell value might point
** as many as 4 bytes in front of the start of the aData buffer for
** the source page. Make sure this does not cause problems by not
** reading the first 4 bytes */
memcpy(&data[idx+4], pCell+4, sz-4);
put4byte(&data[idx], iChild);
pIns = pPage->aCellIdx + i*2;
memmove(pIns+2, pIns, 2*(pPage->nCell - i));
put2byte(pIns, idx);
pPage->nCell++;
/* increment the cell count */
if( (++data[pPage->hdrOffset+4])==0 ) data[pPage->hdrOffset+3]++;
assert( get2byte(&data[pPage->hdrOffset+3])==pPage->nCell || CORRUPT_DB );
#ifndef SQLITE_OMIT_AUTOVACUUM
if( pPage->pBt->autoVacuum ){
int rc2 = SQLITE_OK;
/* The cell may contain a pointer to an overflow page. If so, write
** the entry for the overflow page into the pointer map.
*/
ptrmapPutOvflPtr(pPage, pPage, pCell, &rc2);
if( rc2 ) return rc2;
}
#endif
}
return SQLITE_OK;
}
将页中的数据返回给freelist
static int freeSpace(MemPage *pPage, int iStart, int iSize){
int iPtr; /* Address of ptr to next freeblock */
int iFreeBlk; /* Address of the next freeblock */
u8 hdr; /* Page header size. 0 or 100 */
int nFrag = 0; /* Reduction in fragmentation */
int iOrigSize = iSize; /* Original value of iSize */
int x; /* Offset to cell content area */
int iEnd = iStart + iSize; /* First byte past the iStart buffer */
unsigned char *data = pPage->aData; /* Page content */
u8 *pTmp; /* Temporary ptr into data[] */
assert( pPage->pBt!=0 );
assert( sqlite3PagerIswriteable(pPage->pDbPage) );
assert( CORRUPT_DB || iStart>=pPage->hdrOffset+6+pPage->childPtrSize );
assert( CORRUPT_DB || iEnd <= (int)pPage->pBt->usableSize );
assert( sqlite3_mutex_held(pPage->pBt->mutex) );
assert( iSize>=4 ); /* Minimum cell size is 4 */
assert( CORRUPT_DB || iStart<=(int)pPage->pBt->usableSize-4 );
/* The list of freeblocks must be in ascending order. Find the
** spot on the list where iStart should be inserted.
*/
hdr = pPage->hdrOffset;
iPtr = hdr + 1;
if( data[iPtr+1]==0 && data[iPtr]==0 ){
iFreeBlk = 0; /* Shortcut for the case when the freelist is empty */
}else{
while( (iFreeBlk = get2byte(&data[iPtr]))<iStart ){
if( iFreeBlk<=iPtr ){
if( iFreeBlk==0 ) break; /* TH3: corrupt082.100 */
return SQLITE_CORRUPT_PAGE(pPage);
}
iPtr = iFreeBlk;
}
if( iFreeBlk>(int)pPage->pBt->usableSize-4 ){ /* TH3: corrupt081.100 */
return SQLITE_CORRUPT_PAGE(pPage);
}
assert( iFreeBlk>iPtr || iFreeBlk==0 || CORRUPT_DB );
/* At this point:
** iFreeBlk: First freeblock after iStart, or zero if none
** iPtr: The address of a pointer to iFreeBlk
**
** Check to see if iFreeBlk should be coalesced onto the end of iStart.
*/
if( iFreeBlk && iEnd+3>=iFreeBlk ){
nFrag = iFreeBlk - iEnd;
if( iEnd>iFreeBlk ) return SQLITE_CORRUPT_PAGE(pPage);
iEnd = iFreeBlk + get2byte(&data[iFreeBlk+2]);
if( iEnd > (int)pPage->pBt->usableSize ){
return SQLITE_CORRUPT_PAGE(pPage);
}
iSize = iEnd - iStart;
iFreeBlk = get2byte(&data[iFreeBlk]);
}
/* If iPtr is another freeblock (that is, if iPtr is not the freelist
** pointer in the page header) then check to see if iStart should be
** coalesced onto the end of iPtr.
*/
if( iPtr>hdr+1 ){
int iPtrEnd = iPtr + get2byte(&data[iPtr+2]);
if( iPtrEnd+3>=iStart ){
if( iPtrEnd>iStart ) return SQLITE_CORRUPT_PAGE(pPage);
nFrag += iStart - iPtrEnd;
iSize = iEnd - iPtr;
iStart = iPtr;
}
}
if( nFrag>data[hdr+7] ) return SQLITE_CORRUPT_PAGE(pPage);
data[hdr+7] -= (u8)nFrag;
}
pTmp = &data[hdr+5];
x = get2byte(pTmp);
if( pPage->pBt->btsFlags & BTS_FAST_SECURE ){
/* Overwrite deleted information with zeros when the secure_delete
** option is enabled */
memset(&data[iStart], 0, iSize);
}
if( iStart<=x ){
/* The new freeblock is at the beginning of the cell content area,
** so just extend the cell content area rather than create another
** freelist entry */
if( iStart<x ) return SQLITE_CORRUPT_PAGE(pPage);
if( iPtr!=hdr+1 ) return SQLITE_CORRUPT_PAGE(pPage);
put2byte(&data[hdr+1], iFreeBlk);
put2byte(&data[hdr+5], iEnd);
}else{
/* Insert the new freeblock into the freelist */
put2byte(&data[iPtr], iStart);
put2byte(&data[iStart], iFreeBlk);
assert( iSize>=0 && iSize<=0xffff );
put2byte(&data[iStart+2], (u16)iSize);
}
pPage->nFree += iOrigSize;
return SQLITE_OK;
}
1089

被折叠的 条评论
为什么被折叠?



