000001  /*
000002  ** 2008 August 05
000003  **
000004  ** The author disclaims copyright to this source code.  In place of
000005  ** a legal notice, here is a blessing:
000006  **
000007  **    May you do good and not evil.
000008  **    May you find forgiveness for yourself and forgive others.
000009  **    May you share freely, never taking more than you give.
000010  **
000011  *************************************************************************
000012  ** This file implements that page cache.
000013  */
000014  #include "sqliteInt.h"
000015  
000016  /*
000017  ** A complete page cache is an instance of this structure.  Every
000018  ** entry in the cache holds a single page of the database file.  The
000019  ** btree layer only operates on the cached copy of the database pages.
000020  **
000021  ** A page cache entry is "clean" if it exactly matches what is currently
000022  ** on disk.  A page is "dirty" if it has been modified and needs to be
000023  ** persisted to disk.
000024  **
000025  ** pDirty, pDirtyTail, pSynced:
000026  **   All dirty pages are linked into the doubly linked list using
000027  **   PgHdr.pDirtyNext and pDirtyPrev. The list is maintained in LRU order
000028  **   such that p was added to the list more recently than p->pDirtyNext.
000029  **   PCache.pDirty points to the first (newest) element in the list and
000030  **   pDirtyTail to the last (oldest).
000031  **
000032  **   The PCache.pSynced variable is used to optimize searching for a dirty
000033  **   page to eject from the cache mid-transaction. It is better to eject
000034  **   a page that does not require a journal sync than one that does. 
000035  **   Therefore, pSynced is maintained so that it *almost* always points
000036  **   to either the oldest page in the pDirty/pDirtyTail list that has a
000037  **   clear PGHDR_NEED_SYNC flag or to a page that is older than this one
000038  **   (so that the right page to eject can be found by following pDirtyPrev
000039  **   pointers).
000040  */
000041  struct PCache {
000042    PgHdr *pDirty, *pDirtyTail;         /* List of dirty pages in LRU order */
000043    PgHdr *pSynced;                     /* Last synced page in dirty page list */
000044    i64 nRefSum;                        /* Sum of ref counts over all pages */
000045    int szCache;                        /* Configured cache size */
000046    int szSpill;                        /* Size before spilling occurs */
000047    int szPage;                         /* Size of every page in this cache */
000048    int szExtra;                        /* Size of extra space for each page */
000049    u8 bPurgeable;                      /* True if pages are on backing store */
000050    u8 eCreate;                         /* eCreate value for for xFetch() */
000051    int (*xStress)(void*,PgHdr*);       /* Call to try make a page clean */
000052    void *pStress;                      /* Argument to xStress */
000053    sqlite3_pcache *pCache;             /* Pluggable cache module */
000054  };
000055  
000056  /********************************** Test and Debug Logic **********************/
000057  /*
000058  ** Debug tracing macros.  Enable by by changing the "0" to "1" and
000059  ** recompiling.
000060  **
000061  ** When sqlite3PcacheTrace is 1, single line trace messages are issued.
000062  ** When sqlite3PcacheTrace is 2, a dump of the pcache showing all cache entries
000063  ** is displayed for many operations, resulting in a lot of output.
000064  */
000065  #if defined(SQLITE_DEBUG) && 0
000066    int sqlite3PcacheTrace = 2;       /* 0: off  1: simple  2: cache dumps */
000067    int sqlite3PcacheMxDump = 9999;   /* Max cache entries for pcacheDump() */
000068  # define pcacheTrace(X) if(sqlite3PcacheTrace){sqlite3DebugPrintf X;}
000069    static void pcachePageTrace(int i, sqlite3_pcache_page *pLower){
000070      PgHdr *pPg;
000071      unsigned char *a;
000072      int j;
000073      if( pLower==0 ){
000074        printf("%3d: NULL\n", i);
000075      }else{
000076        pPg = (PgHdr*)pLower->pExtra;
000077        printf("%3d: nRef %2lld flgs %02x data ", i, pPg->nRef, pPg->flags);
000078        a = (unsigned char *)pLower->pBuf;
000079        for(j=0; j<12; j++) printf("%02x", a[j]);
000080        printf(" ptr %p\n", pPg);
000081      }
000082    }
000083    static void pcacheDump(PCache *pCache){
000084      int N;
000085      int i;
000086      sqlite3_pcache_page *pLower;
000087    
000088      if( sqlite3PcacheTrace<2 ) return;
000089      if( pCache->pCache==0 ) return;
000090      N = sqlite3PcachePagecount(pCache);
000091      if( N>sqlite3PcacheMxDump ) N = sqlite3PcacheMxDump;
000092      for(i=1; i<=N; i++){
000093         pLower = sqlite3GlobalConfig.pcache2.xFetch(pCache->pCache, i, 0);
000094         pcachePageTrace(i, pLower);
000095         if( pLower && ((PgHdr*)pLower)->pPage==0 ){
000096           sqlite3GlobalConfig.pcache2.xUnpin(pCache->pCache, pLower, 0);
000097         }
000098      }
000099    }
000100  #else
000101  # define pcacheTrace(X)
000102  # define pcachePageTrace(PGNO, X)
000103  # define pcacheDump(X)
000104  #endif
000105  
000106  /*
000107  ** Return 1 if pPg is on the dirty list for pCache.  Return 0 if not.
000108  ** This routine runs inside of assert() statements only.
000109  */
000110  #if defined(SQLITE_ENABLE_EXPENSIVE_ASSERT)
000111  static int pageOnDirtyList(PCache *pCache, PgHdr *pPg){
000112    PgHdr *p;
000113    for(p=pCache->pDirty; p; p=p->pDirtyNext){
000114      if( p==pPg ) return 1;
000115    }
000116    return 0;
000117  }
000118  static int pageNotOnDirtyList(PCache *pCache, PgHdr *pPg){
000119    PgHdr *p;
000120    for(p=pCache->pDirty; p; p=p->pDirtyNext){
000121      if( p==pPg ) return 0;
000122    }
000123    return 1;
000124  }
000125  #else
000126  # define pageOnDirtyList(A,B)    1
000127  # define pageNotOnDirtyList(A,B) 1
000128  #endif
000129  
000130  /*
000131  ** Check invariants on a PgHdr entry.  Return true if everything is OK.
000132  ** Return false if any invariant is violated.
000133  **
000134  ** This routine is for use inside of assert() statements only.  For
000135  ** example:
000136  **
000137  **          assert( sqlite3PcachePageSanity(pPg) );
000138  */
000139  #ifdef SQLITE_DEBUG
000140  int sqlite3PcachePageSanity(PgHdr *pPg){
000141    PCache *pCache;
000142    assert( pPg!=0 );
000143    assert( pPg->pgno>0 || pPg->pPager==0 );    /* Page number is 1 or more */
000144    pCache = pPg->pCache;
000145    assert( pCache!=0 );      /* Every page has an associated PCache */
000146    if( pPg->flags & PGHDR_CLEAN ){
000147      assert( (pPg->flags & PGHDR_DIRTY)==0 );/* Cannot be both CLEAN and DIRTY */
000148      assert( pageNotOnDirtyList(pCache, pPg) );/* CLEAN pages not on dirtylist */
000149    }else{
000150      assert( (pPg->flags & PGHDR_DIRTY)!=0 );/* If not CLEAN must be DIRTY */
000151      assert( pPg->pDirtyNext==0 || pPg->pDirtyNext->pDirtyPrev==pPg );
000152      assert( pPg->pDirtyPrev==0 || pPg->pDirtyPrev->pDirtyNext==pPg );
000153      assert( pPg->pDirtyPrev!=0 || pCache->pDirty==pPg );
000154      assert( pageOnDirtyList(pCache, pPg) );
000155    }
000156    /* WRITEABLE pages must also be DIRTY */
000157    if( pPg->flags & PGHDR_WRITEABLE ){
000158      assert( pPg->flags & PGHDR_DIRTY );     /* WRITEABLE implies DIRTY */
000159    }
000160    /* NEED_SYNC can be set independently of WRITEABLE.  This can happen,
000161    ** for example, when using the sqlite3PagerDontWrite() optimization:
000162    **    (1)  Page X is journalled, and gets WRITEABLE and NEED_SEEK.
000163    **    (2)  Page X moved to freelist, WRITEABLE is cleared
000164    **    (3)  Page X reused, WRITEABLE is set again
000165    ** If NEED_SYNC had been cleared in step 2, then it would not be reset
000166    ** in step 3, and page might be written into the database without first
000167    ** syncing the rollback journal, which might cause corruption on a power
000168    ** loss.
000169    **
000170    ** Another example is when the database page size is smaller than the
000171    ** disk sector size.  When any page of a sector is journalled, all pages
000172    ** in that sector are marked NEED_SYNC even if they are still CLEAN, just
000173    ** in case they are later modified, since all pages in the same sector
000174    ** must be journalled and synced before any of those pages can be safely
000175    ** written.
000176    */
000177    return 1;
000178  }
000179  #endif /* SQLITE_DEBUG */
000180  
000181  
000182  /********************************** Linked List Management ********************/
000183  
000184  /* Allowed values for second argument to pcacheManageDirtyList() */
000185  #define PCACHE_DIRTYLIST_REMOVE   1    /* Remove pPage from dirty list */
000186  #define PCACHE_DIRTYLIST_ADD      2    /* Add pPage to the dirty list */
000187  #define PCACHE_DIRTYLIST_FRONT    3    /* Move pPage to the front of the list */
000188  
000189  /*
000190  ** Manage pPage's participation on the dirty list.  Bits of the addRemove
000191  ** argument determines what operation to do.  The 0x01 bit means first
000192  ** remove pPage from the dirty list.  The 0x02 means add pPage back to
000193  ** the dirty list.  Doing both moves pPage to the front of the dirty list.
000194  */
000195  static void pcacheManageDirtyList(PgHdr *pPage, u8 addRemove){
000196    PCache *p = pPage->pCache;
000197  
000198    pcacheTrace(("%p.DIRTYLIST.%s %d\n", p,
000199                  addRemove==1 ? "REMOVE" : addRemove==2 ? "ADD" : "FRONT",
000200                  pPage->pgno));
000201    if( addRemove & PCACHE_DIRTYLIST_REMOVE ){
000202      assert( pPage->pDirtyNext || pPage==p->pDirtyTail );
000203      assert( pPage->pDirtyPrev || pPage==p->pDirty );
000204    
000205      /* Update the PCache1.pSynced variable if necessary. */
000206      if( p->pSynced==pPage ){
000207        p->pSynced = pPage->pDirtyPrev;
000208      }
000209    
000210      if( pPage->pDirtyNext ){
000211        pPage->pDirtyNext->pDirtyPrev = pPage->pDirtyPrev;
000212      }else{
000213        assert( pPage==p->pDirtyTail );
000214        p->pDirtyTail = pPage->pDirtyPrev;
000215      }
000216      if( pPage->pDirtyPrev ){
000217        pPage->pDirtyPrev->pDirtyNext = pPage->pDirtyNext;
000218      }else{
000219        /* If there are now no dirty pages in the cache, set eCreate to 2. 
000220        ** This is an optimization that allows sqlite3PcacheFetch() to skip
000221        ** searching for a dirty page to eject from the cache when it might
000222        ** otherwise have to.  */
000223        assert( pPage==p->pDirty );
000224        p->pDirty = pPage->pDirtyNext;
000225        assert( p->bPurgeable || p->eCreate==2 );
000226        if( p->pDirty==0 ){         /*OPTIMIZATION-IF-TRUE*/
000227          assert( p->bPurgeable==0 || p->eCreate==1 );
000228          p->eCreate = 2;
000229        }
000230      }
000231    }
000232    if( addRemove & PCACHE_DIRTYLIST_ADD ){
000233      pPage->pDirtyPrev = 0;
000234      pPage->pDirtyNext = p->pDirty;
000235      if( pPage->pDirtyNext ){
000236        assert( pPage->pDirtyNext->pDirtyPrev==0 );
000237        pPage->pDirtyNext->pDirtyPrev = pPage;
000238      }else{
000239        p->pDirtyTail = pPage;
000240        if( p->bPurgeable ){
000241          assert( p->eCreate==2 );
000242          p->eCreate = 1;
000243        }
000244      }
000245      p->pDirty = pPage;
000246  
000247      /* If pSynced is NULL and this page has a clear NEED_SYNC flag, set
000248      ** pSynced to point to it. Checking the NEED_SYNC flag is an 
000249      ** optimization, as if pSynced points to a page with the NEED_SYNC
000250      ** flag set sqlite3PcacheFetchStress() searches through all newer 
000251      ** entries of the dirty-list for a page with NEED_SYNC clear anyway.  */
000252      if( !p->pSynced 
000253       && 0==(pPage->flags&PGHDR_NEED_SYNC)   /*OPTIMIZATION-IF-FALSE*/
000254      ){
000255        p->pSynced = pPage;
000256      }
000257    }
000258    pcacheDump(p);
000259  }
000260  
000261  /*
000262  ** Wrapper around the pluggable caches xUnpin method. If the cache is
000263  ** being used for an in-memory database, this function is a no-op.
000264  */
000265  static void pcacheUnpin(PgHdr *p){
000266    if( p->pCache->bPurgeable ){
000267      pcacheTrace(("%p.UNPIN %d\n", p->pCache, p->pgno));
000268      sqlite3GlobalConfig.pcache2.xUnpin(p->pCache->pCache, p->pPage, 0);
000269      pcacheDump(p->pCache);
000270    }
000271  }
000272  
000273  /*
000274  ** Compute the number of pages of cache requested.   p->szCache is the
000275  ** cache size requested by the "PRAGMA cache_size" statement.
000276  */
000277  static int numberOfCachePages(PCache *p){
000278    if( p->szCache>=0 ){
000279      /* IMPLEMENTATION-OF: R-42059-47211 If the argument N is positive then the
000280      ** suggested cache size is set to N. */
000281      return p->szCache;
000282    }else{
000283      i64 n;
000284      /* IMPLEMENTATION-OF: R-59858-46238 If the argument N is negative, then the
000285      ** number of cache pages is adjusted to be a number of pages that would
000286      ** use approximately abs(N*1024) bytes of memory based on the current
000287      ** page size. */
000288      n = ((-1024*(i64)p->szCache)/(p->szPage+p->szExtra));
000289      if( n>1000000000 ) n = 1000000000;
000290      return (int)n;
000291    }
000292  }
000293  
000294  /*************************************************** General Interfaces ******
000295  **
000296  ** Initialize and shutdown the page cache subsystem. Neither of these 
000297  ** functions are threadsafe.
000298  */
000299  int sqlite3PcacheInitialize(void){
000300    if( sqlite3GlobalConfig.pcache2.xInit==0 ){
000301      /* IMPLEMENTATION-OF: R-26801-64137 If the xInit() method is NULL, then the
000302      ** built-in default page cache is used instead of the application defined
000303      ** page cache. */
000304      sqlite3PCacheSetDefault();
000305      assert( sqlite3GlobalConfig.pcache2.xInit!=0 );
000306    }
000307    return sqlite3GlobalConfig.pcache2.xInit(sqlite3GlobalConfig.pcache2.pArg);
000308  }
000309  void sqlite3PcacheShutdown(void){
000310    if( sqlite3GlobalConfig.pcache2.xShutdown ){
000311      /* IMPLEMENTATION-OF: R-26000-56589 The xShutdown() method may be NULL. */
000312      sqlite3GlobalConfig.pcache2.xShutdown(sqlite3GlobalConfig.pcache2.pArg);
000313    }
000314  }
000315  
000316  /*
000317  ** Return the size in bytes of a PCache object.
000318  */
000319  int sqlite3PcacheSize(void){ return sizeof(PCache); }
000320  
000321  /*
000322  ** Create a new PCache object. Storage space to hold the object
000323  ** has already been allocated and is passed in as the p pointer. 
000324  ** The caller discovers how much space needs to be allocated by 
000325  ** calling sqlite3PcacheSize().
000326  **
000327  ** szExtra is some extra space allocated for each page.  The first
000328  ** 8 bytes of the extra space will be zeroed as the page is allocated,
000329  ** but remaining content will be uninitialized.  Though it is opaque
000330  ** to this module, the extra space really ends up being the MemPage
000331  ** structure in the pager.
000332  */
000333  int sqlite3PcacheOpen(
000334    int szPage,                  /* Size of every page */
000335    int szExtra,                 /* Extra space associated with each page */
000336    int bPurgeable,              /* True if pages are on backing store */
000337    int (*xStress)(void*,PgHdr*),/* Call to try to make pages clean */
000338    void *pStress,               /* Argument to xStress */
000339    PCache *p                    /* Preallocated space for the PCache */
000340  ){
000341    memset(p, 0, sizeof(PCache));
000342    p->szPage = 1;
000343    p->szExtra = szExtra;
000344    assert( szExtra>=8 );  /* First 8 bytes will be zeroed */
000345    p->bPurgeable = bPurgeable;
000346    p->eCreate = 2;
000347    p->xStress = xStress;
000348    p->pStress = pStress;
000349    p->szCache = 100;
000350    p->szSpill = 1;
000351    pcacheTrace(("%p.OPEN szPage %d bPurgeable %d\n",p,szPage,bPurgeable));
000352    return sqlite3PcacheSetPageSize(p, szPage);
000353  }
000354  
000355  /*
000356  ** Change the page size for PCache object. The caller must ensure that there
000357  ** are no outstanding page references when this function is called.
000358  */
000359  int sqlite3PcacheSetPageSize(PCache *pCache, int szPage){
000360    assert( pCache->nRefSum==0 && pCache->pDirty==0 );
000361    if( pCache->szPage ){
000362      sqlite3_pcache *pNew;
000363      pNew = sqlite3GlobalConfig.pcache2.xCreate(
000364                  szPage, pCache->szExtra + ROUND8(sizeof(PgHdr)),
000365                  pCache->bPurgeable
000366      );
000367      if( pNew==0 ) return SQLITE_NOMEM_BKPT;
000368      sqlite3GlobalConfig.pcache2.xCachesize(pNew, numberOfCachePages(pCache));
000369      if( pCache->pCache ){
000370        sqlite3GlobalConfig.pcache2.xDestroy(pCache->pCache);
000371      }
000372      pCache->pCache = pNew;
000373      pCache->szPage = szPage;
000374      pcacheTrace(("%p.PAGESIZE %d\n",pCache,szPage));
000375    }
000376    return SQLITE_OK;
000377  }
000378  
000379  /*
000380  ** Try to obtain a page from the cache.
000381  **
000382  ** This routine returns a pointer to an sqlite3_pcache_page object if
000383  ** such an object is already in cache, or if a new one is created.
000384  ** This routine returns a NULL pointer if the object was not in cache
000385  ** and could not be created.
000386  **
000387  ** The createFlags should be 0 to check for existing pages and should
000388  ** be 3 (not 1, but 3) to try to create a new page.
000389  **
000390  ** If the createFlag is 0, then NULL is always returned if the page
000391  ** is not already in the cache.  If createFlag is 1, then a new page
000392  ** is created only if that can be done without spilling dirty pages
000393  ** and without exceeding the cache size limit.
000394  **
000395  ** The caller needs to invoke sqlite3PcacheFetchFinish() to properly
000396  ** initialize the sqlite3_pcache_page object and convert it into a
000397  ** PgHdr object.  The sqlite3PcacheFetch() and sqlite3PcacheFetchFinish()
000398  ** routines are split this way for performance reasons. When separated
000399  ** they can both (usually) operate without having to push values to
000400  ** the stack on entry and pop them back off on exit, which saves a
000401  ** lot of pushing and popping.
000402  */
000403  sqlite3_pcache_page *sqlite3PcacheFetch(
000404    PCache *pCache,       /* Obtain the page from this cache */
000405    Pgno pgno,            /* Page number to obtain */
000406    int createFlag        /* If true, create page if it does not exist already */
000407  ){
000408    int eCreate;
000409    sqlite3_pcache_page *pRes;
000410  
000411    assert( pCache!=0 );
000412    assert( pCache->pCache!=0 );
000413    assert( createFlag==3 || createFlag==0 );
000414    assert( pCache->eCreate==((pCache->bPurgeable && pCache->pDirty) ? 1 : 2) );
000415  
000416    /* eCreate defines what to do if the page does not exist.
000417    **    0     Do not allocate a new page.  (createFlag==0)
000418    **    1     Allocate a new page if doing so is inexpensive.
000419    **          (createFlag==1 AND bPurgeable AND pDirty)
000420    **    2     Allocate a new page even it doing so is difficult.
000421    **          (createFlag==1 AND !(bPurgeable AND pDirty)
000422    */
000423    eCreate = createFlag & pCache->eCreate;
000424    assert( eCreate==0 || eCreate==1 || eCreate==2 );
000425    assert( createFlag==0 || pCache->eCreate==eCreate );
000426    assert( createFlag==0 || eCreate==1+(!pCache->bPurgeable||!pCache->pDirty) );
000427    pRes = sqlite3GlobalConfig.pcache2.xFetch(pCache->pCache, pgno, eCreate);
000428    pcacheTrace(("%p.FETCH %d%s (result: %p) ",pCache,pgno,
000429                 createFlag?" create":"",pRes));
000430    pcachePageTrace(pgno, pRes);
000431    return pRes;
000432  }
000433  
000434  /*
000435  ** If the sqlite3PcacheFetch() routine is unable to allocate a new
000436  ** page because no clean pages are available for reuse and the cache
000437  ** size limit has been reached, then this routine can be invoked to 
000438  ** try harder to allocate a page.  This routine might invoke the stress
000439  ** callback to spill dirty pages to the journal.  It will then try to
000440  ** allocate the new page and will only fail to allocate a new page on
000441  ** an OOM error.
000442  **
000443  ** This routine should be invoked only after sqlite3PcacheFetch() fails.
000444  */
000445  int sqlite3PcacheFetchStress(
000446    PCache *pCache,                 /* Obtain the page from this cache */
000447    Pgno pgno,                      /* Page number to obtain */
000448    sqlite3_pcache_page **ppPage    /* Write result here */
000449  ){
000450    PgHdr *pPg;
000451    if( pCache->eCreate==2 ) return 0;
000452  
000453    if( sqlite3PcachePagecount(pCache)>pCache->szSpill ){
000454      /* Find a dirty page to write-out and recycle. First try to find a 
000455      ** page that does not require a journal-sync (one with PGHDR_NEED_SYNC
000456      ** cleared), but if that is not possible settle for any other 
000457      ** unreferenced dirty page.
000458      **
000459      ** If the LRU page in the dirty list that has a clear PGHDR_NEED_SYNC
000460      ** flag is currently referenced, then the following may leave pSynced
000461      ** set incorrectly (pointing to other than the LRU page with NEED_SYNC
000462      ** cleared). This is Ok, as pSynced is just an optimization.  */
000463      for(pPg=pCache->pSynced; 
000464          pPg && (pPg->nRef || (pPg->flags&PGHDR_NEED_SYNC)); 
000465          pPg=pPg->pDirtyPrev
000466      );
000467      pCache->pSynced = pPg;
000468      if( !pPg ){
000469        for(pPg=pCache->pDirtyTail; pPg && pPg->nRef; pPg=pPg->pDirtyPrev);
000470      }
000471      if( pPg ){
000472        int rc;
000473  #ifdef SQLITE_LOG_CACHE_SPILL
000474        sqlite3_log(SQLITE_FULL, 
000475                    "spill page %d making room for %d - cache used: %d/%d",
000476                    pPg->pgno, pgno,
000477                    sqlite3GlobalConfig.pcache2.xPagecount(pCache->pCache),
000478                  numberOfCachePages(pCache));
000479  #endif
000480        pcacheTrace(("%p.SPILL %d\n",pCache,pPg->pgno));
000481        rc = pCache->xStress(pCache->pStress, pPg);
000482        pcacheDump(pCache);
000483        if( rc!=SQLITE_OK && rc!=SQLITE_BUSY ){
000484          return rc;
000485        }
000486      }
000487    }
000488    *ppPage = sqlite3GlobalConfig.pcache2.xFetch(pCache->pCache, pgno, 2);
000489    return *ppPage==0 ? SQLITE_NOMEM_BKPT : SQLITE_OK;
000490  }
000491  
000492  /*
000493  ** This is a helper routine for sqlite3PcacheFetchFinish()
000494  **
000495  ** In the uncommon case where the page being fetched has not been
000496  ** initialized, this routine is invoked to do the initialization.
000497  ** This routine is broken out into a separate function since it
000498  ** requires extra stack manipulation that can be avoided in the common
000499  ** case.
000500  */
000501  static SQLITE_NOINLINE PgHdr *pcacheFetchFinishWithInit(
000502    PCache *pCache,             /* Obtain the page from this cache */
000503    Pgno pgno,                  /* Page number obtained */
000504    sqlite3_pcache_page *pPage  /* Page obtained by prior PcacheFetch() call */
000505  ){
000506    PgHdr *pPgHdr;
000507    assert( pPage!=0 );
000508    pPgHdr = (PgHdr*)pPage->pExtra;
000509    assert( pPgHdr->pPage==0 );
000510    memset(&pPgHdr->pDirty, 0, sizeof(PgHdr) - offsetof(PgHdr,pDirty));
000511    pPgHdr->pPage = pPage;
000512    pPgHdr->pData = pPage->pBuf;
000513    pPgHdr->pExtra = (void *)&pPgHdr[1];
000514    memset(pPgHdr->pExtra, 0, 8);
000515    assert( EIGHT_BYTE_ALIGNMENT( pPgHdr->pExtra ) );
000516    pPgHdr->pCache = pCache;
000517    pPgHdr->pgno = pgno;
000518    pPgHdr->flags = PGHDR_CLEAN;
000519    return sqlite3PcacheFetchFinish(pCache,pgno,pPage);
000520  }
000521  
000522  /*
000523  ** This routine converts the sqlite3_pcache_page object returned by
000524  ** sqlite3PcacheFetch() into an initialized PgHdr object.  This routine
000525  ** must be called after sqlite3PcacheFetch() in order to get a usable
000526  ** result.
000527  */
000528  PgHdr *sqlite3PcacheFetchFinish(
000529    PCache *pCache,             /* Obtain the page from this cache */
000530    Pgno pgno,                  /* Page number obtained */
000531    sqlite3_pcache_page *pPage  /* Page obtained by prior PcacheFetch() call */
000532  ){
000533    PgHdr *pPgHdr;
000534  
000535    assert( pPage!=0 );
000536    pPgHdr = (PgHdr *)pPage->pExtra;
000537  
000538    if( !pPgHdr->pPage ){
000539      return pcacheFetchFinishWithInit(pCache, pgno, pPage);
000540    }
000541    pCache->nRefSum++;
000542    pPgHdr->nRef++;
000543    assert( sqlite3PcachePageSanity(pPgHdr) );
000544    return pPgHdr;
000545  }
000546  
000547  /*
000548  ** Decrement the reference count on a page. If the page is clean and the
000549  ** reference count drops to 0, then it is made eligible for recycling.
000550  */
000551  void SQLITE_NOINLINE sqlite3PcacheRelease(PgHdr *p){
000552    assert( p->nRef>0 );
000553    p->pCache->nRefSum--;
000554    if( (--p->nRef)==0 ){
000555      if( p->flags&PGHDR_CLEAN ){
000556        pcacheUnpin(p);
000557      }else{
000558        pcacheManageDirtyList(p, PCACHE_DIRTYLIST_FRONT);
000559        assert( sqlite3PcachePageSanity(p) );
000560      }
000561    }
000562  }
000563  
000564  /*
000565  ** Increase the reference count of a supplied page by 1.
000566  */
000567  void sqlite3PcacheRef(PgHdr *p){
000568    assert(p->nRef>0);
000569    assert( sqlite3PcachePageSanity(p) );
000570    p->nRef++;
000571    p->pCache->nRefSum++;
000572  }
000573  
000574  /*
000575  ** Drop a page from the cache. There must be exactly one reference to the
000576  ** page. This function deletes that reference, so after it returns the
000577  ** page pointed to by p is invalid.
000578  */
000579  void sqlite3PcacheDrop(PgHdr *p){
000580    assert( p->nRef==1 );
000581    assert( sqlite3PcachePageSanity(p) );
000582    if( p->flags&PGHDR_DIRTY ){
000583      pcacheManageDirtyList(p, PCACHE_DIRTYLIST_REMOVE);
000584    }
000585    p->pCache->nRefSum--;
000586    sqlite3GlobalConfig.pcache2.xUnpin(p->pCache->pCache, p->pPage, 1);
000587  }
000588  
000589  /*
000590  ** Make sure the page is marked as dirty. If it isn't dirty already,
000591  ** make it so.
000592  */
000593  void sqlite3PcacheMakeDirty(PgHdr *p){
000594    assert( p->nRef>0 );
000595    assert( sqlite3PcachePageSanity(p) );
000596    if( p->flags & (PGHDR_CLEAN|PGHDR_DONT_WRITE) ){    /*OPTIMIZATION-IF-FALSE*/
000597      p->flags &= ~PGHDR_DONT_WRITE;
000598      if( p->flags & PGHDR_CLEAN ){
000599        p->flags ^= (PGHDR_DIRTY|PGHDR_CLEAN);
000600        pcacheTrace(("%p.DIRTY %d\n",p->pCache,p->pgno));
000601        assert( (p->flags & (PGHDR_DIRTY|PGHDR_CLEAN))==PGHDR_DIRTY );
000602        pcacheManageDirtyList(p, PCACHE_DIRTYLIST_ADD);
000603        assert( sqlite3PcachePageSanity(p) );
000604      }
000605      assert( sqlite3PcachePageSanity(p) );
000606    }
000607  }
000608  
000609  /*
000610  ** Make sure the page is marked as clean. If it isn't clean already,
000611  ** make it so.
000612  */
000613  void sqlite3PcacheMakeClean(PgHdr *p){
000614    assert( sqlite3PcachePageSanity(p) );
000615    assert( (p->flags & PGHDR_DIRTY)!=0 );
000616    assert( (p->flags & PGHDR_CLEAN)==0 );
000617    pcacheManageDirtyList(p, PCACHE_DIRTYLIST_REMOVE);
000618    p->flags &= ~(PGHDR_DIRTY|PGHDR_NEED_SYNC|PGHDR_WRITEABLE);
000619    p->flags |= PGHDR_CLEAN;
000620    pcacheTrace(("%p.CLEAN %d\n",p->pCache,p->pgno));
000621    assert( sqlite3PcachePageSanity(p) );
000622    if( p->nRef==0 ){
000623      pcacheUnpin(p);
000624    }
000625  }
000626  
000627  /*
000628  ** Make every page in the cache clean.
000629  */
000630  void sqlite3PcacheCleanAll(PCache *pCache){
000631    PgHdr *p;
000632    pcacheTrace(("%p.CLEAN-ALL\n",pCache));
000633    while( (p = pCache->pDirty)!=0 ){
000634      sqlite3PcacheMakeClean(p);
000635    }
000636  }
000637  
000638  /*
000639  ** Clear the PGHDR_NEED_SYNC and PGHDR_WRITEABLE flag from all dirty pages.
000640  */
000641  void sqlite3PcacheClearWritable(PCache *pCache){
000642    PgHdr *p;
000643    pcacheTrace(("%p.CLEAR-WRITEABLE\n",pCache));
000644    for(p=pCache->pDirty; p; p=p->pDirtyNext){
000645      p->flags &= ~(PGHDR_NEED_SYNC|PGHDR_WRITEABLE);
000646    }
000647    pCache->pSynced = pCache->pDirtyTail;
000648  }
000649  
000650  /*
000651  ** Clear the PGHDR_NEED_SYNC flag from all dirty pages.
000652  */
000653  void sqlite3PcacheClearSyncFlags(PCache *pCache){
000654    PgHdr *p;
000655    for(p=pCache->pDirty; p; p=p->pDirtyNext){
000656      p->flags &= ~PGHDR_NEED_SYNC;
000657    }
000658    pCache->pSynced = pCache->pDirtyTail;
000659  }
000660  
000661  /*
000662  ** Change the page number of page p to newPgno. 
000663  */
000664  void sqlite3PcacheMove(PgHdr *p, Pgno newPgno){
000665    PCache *pCache = p->pCache;
000666    sqlite3_pcache_page *pOther;
000667    assert( p->nRef>0 );
000668    assert( newPgno>0 );
000669    assert( sqlite3PcachePageSanity(p) );
000670    pcacheTrace(("%p.MOVE %d -> %d\n",pCache,p->pgno,newPgno));
000671    pOther = sqlite3GlobalConfig.pcache2.xFetch(pCache->pCache, newPgno, 0);
000672    if( pOther ){
000673      PgHdr *pXPage = (PgHdr*)pOther->pExtra;
000674      assert( pXPage->nRef==0 );
000675      pXPage->nRef++;
000676      pCache->nRefSum++;
000677      sqlite3PcacheDrop(pXPage);
000678    }
000679    sqlite3GlobalConfig.pcache2.xRekey(pCache->pCache, p->pPage, p->pgno,newPgno);
000680    p->pgno = newPgno;
000681    if( (p->flags&PGHDR_DIRTY) && (p->flags&PGHDR_NEED_SYNC) ){
000682      pcacheManageDirtyList(p, PCACHE_DIRTYLIST_FRONT);
000683      assert( sqlite3PcachePageSanity(p) );
000684    }
000685  }
000686  
000687  /*
000688  ** Drop every cache entry whose page number is greater than "pgno". The
000689  ** caller must ensure that there are no outstanding references to any pages
000690  ** other than page 1 with a page number greater than pgno.
000691  **
000692  ** If there is a reference to page 1 and the pgno parameter passed to this
000693  ** function is 0, then the data area associated with page 1 is zeroed, but
000694  ** the page object is not dropped.
000695  */
000696  void sqlite3PcacheTruncate(PCache *pCache, Pgno pgno){
000697    if( pCache->pCache ){
000698      PgHdr *p;
000699      PgHdr *pNext;
000700      pcacheTrace(("%p.TRUNCATE %d\n",pCache,pgno));
000701      for(p=pCache->pDirty; p; p=pNext){
000702        pNext = p->pDirtyNext;
000703        /* This routine never gets call with a positive pgno except right
000704        ** after sqlite3PcacheCleanAll().  So if there are dirty pages,
000705        ** it must be that pgno==0.
000706        */
000707        assert( p->pgno>0 );
000708        if( p->pgno>pgno ){
000709          assert( p->flags&PGHDR_DIRTY );
000710          sqlite3PcacheMakeClean(p);
000711        }
000712      }
000713      if( pgno==0 && pCache->nRefSum ){
000714        sqlite3_pcache_page *pPage1;
000715        pPage1 = sqlite3GlobalConfig.pcache2.xFetch(pCache->pCache,1,0);
000716        if( ALWAYS(pPage1) ){  /* Page 1 is always available in cache, because
000717                               ** pCache->nRefSum>0 */
000718          memset(pPage1->pBuf, 0, pCache->szPage);
000719          pgno = 1;
000720        }
000721      }
000722      sqlite3GlobalConfig.pcache2.xTruncate(pCache->pCache, pgno+1);
000723    }
000724  }
000725  
000726  /*
000727  ** Close a cache.
000728  */
000729  void sqlite3PcacheClose(PCache *pCache){
000730    assert( pCache->pCache!=0 );
000731    pcacheTrace(("%p.CLOSE\n",pCache));
000732    sqlite3GlobalConfig.pcache2.xDestroy(pCache->pCache);
000733  }
000734  
000735  /* 
000736  ** Discard the contents of the cache.
000737  */
000738  void sqlite3PcacheClear(PCache *pCache){
000739    sqlite3PcacheTruncate(pCache, 0);
000740  }
000741  
000742  /*
000743  ** Merge two lists of pages connected by pDirty and in pgno order.
000744  ** Do not bother fixing the pDirtyPrev pointers.
000745  */
000746  static PgHdr *pcacheMergeDirtyList(PgHdr *pA, PgHdr *pB){
000747    PgHdr result, *pTail;
000748    pTail = &result;
000749    assert( pA!=0 && pB!=0 );
000750    for(;;){
000751      if( pA->pgno<pB->pgno ){
000752        pTail->pDirty = pA;
000753        pTail = pA;
000754        pA = pA->pDirty;
000755        if( pA==0 ){
000756          pTail->pDirty = pB;
000757          break;
000758        }
000759      }else{
000760        pTail->pDirty = pB;
000761        pTail = pB;
000762        pB = pB->pDirty;
000763        if( pB==0 ){
000764          pTail->pDirty = pA;
000765          break;
000766        }
000767      }
000768    }
000769    return result.pDirty;
000770  }
000771  
000772  /*
000773  ** Sort the list of pages in ascending order by pgno.  Pages are
000774  ** connected by pDirty pointers.  The pDirtyPrev pointers are
000775  ** corrupted by this sort.
000776  **
000777  ** Since there cannot be more than 2^31 distinct pages in a database,
000778  ** there cannot be more than 31 buckets required by the merge sorter.
000779  ** One extra bucket is added to catch overflow in case something
000780  ** ever changes to make the previous sentence incorrect.
000781  */
000782  #define N_SORT_BUCKET  32
000783  static PgHdr *pcacheSortDirtyList(PgHdr *pIn){
000784    PgHdr *a[N_SORT_BUCKET], *p;
000785    int i;
000786    memset(a, 0, sizeof(a));
000787    while( pIn ){
000788      p = pIn;
000789      pIn = p->pDirty;
000790      p->pDirty = 0;
000791      for(i=0; ALWAYS(i<N_SORT_BUCKET-1); i++){
000792        if( a[i]==0 ){
000793          a[i] = p;
000794          break;
000795        }else{
000796          p = pcacheMergeDirtyList(a[i], p);
000797          a[i] = 0;
000798        }
000799      }
000800      if( NEVER(i==N_SORT_BUCKET-1) ){
000801        /* To get here, there need to be 2^(N_SORT_BUCKET) elements in
000802        ** the input list.  But that is impossible.
000803        */
000804        a[i] = pcacheMergeDirtyList(a[i], p);
000805      }
000806    }
000807    p = a[0];
000808    for(i=1; i<N_SORT_BUCKET; i++){
000809      if( a[i]==0 ) continue;
000810      p = p ? pcacheMergeDirtyList(p, a[i]) : a[i];
000811    }
000812    return p;
000813  }
000814  
000815  /*
000816  ** Return a list of all dirty pages in the cache, sorted by page number.
000817  */
000818  PgHdr *sqlite3PcacheDirtyList(PCache *pCache){
000819    PgHdr *p;
000820    for(p=pCache->pDirty; p; p=p->pDirtyNext){
000821      p->pDirty = p->pDirtyNext;
000822    }
000823    return pcacheSortDirtyList(pCache->pDirty);
000824  }
000825  
000826  /* 
000827  ** Return the total number of references to all pages held by the cache.
000828  **
000829  ** This is not the total number of pages referenced, but the sum of the
000830  ** reference count for all pages.
000831  */
000832  i64 sqlite3PcacheRefCount(PCache *pCache){
000833    return pCache->nRefSum;
000834  }
000835  
000836  /*
000837  ** Return the number of references to the page supplied as an argument.
000838  */
000839  i64 sqlite3PcachePageRefcount(PgHdr *p){
000840    return p->nRef;
000841  }
000842  
000843  /* 
000844  ** Return the total number of pages in the cache.
000845  */
000846  int sqlite3PcachePagecount(PCache *pCache){
000847    assert( pCache->pCache!=0 );
000848    return sqlite3GlobalConfig.pcache2.xPagecount(pCache->pCache);
000849  }
000850  
000851  #ifdef SQLITE_TEST
000852  /*
000853  ** Get the suggested cache-size value.
000854  */
000855  int sqlite3PcacheGetCachesize(PCache *pCache){
000856    return numberOfCachePages(pCache);
000857  }
000858  #endif
000859  
000860  /*
000861  ** Set the suggested cache-size value.
000862  */
000863  void sqlite3PcacheSetCachesize(PCache *pCache, int mxPage){
000864    assert( pCache->pCache!=0 );
000865    pCache->szCache = mxPage;
000866    sqlite3GlobalConfig.pcache2.xCachesize(pCache->pCache,
000867                                           numberOfCachePages(pCache));
000868  }
000869  
000870  /*
000871  ** Set the suggested cache-spill value.  Make no changes if if the
000872  ** argument is zero.  Return the effective cache-spill size, which will
000873  ** be the larger of the szSpill and szCache.
000874  */
000875  int sqlite3PcacheSetSpillsize(PCache *p, int mxPage){
000876    int res;
000877    assert( p->pCache!=0 );
000878    if( mxPage ){
000879      if( mxPage<0 ){
000880        mxPage = (int)((-1024*(i64)mxPage)/(p->szPage+p->szExtra));
000881      }
000882      p->szSpill = mxPage;
000883    }
000884    res = numberOfCachePages(p);
000885    if( res<p->szSpill ) res = p->szSpill; 
000886    return res;
000887  }
000888  
000889  /*
000890  ** Free up as much memory as possible from the page cache.
000891  */
000892  void sqlite3PcacheShrink(PCache *pCache){
000893    assert( pCache->pCache!=0 );
000894    sqlite3GlobalConfig.pcache2.xShrink(pCache->pCache);
000895  }
000896  
000897  /*
000898  ** Return the size of the header added by this middleware layer
000899  ** in the page-cache hierarchy.
000900  */
000901  int sqlite3HeaderSizePcache(void){ return ROUND8(sizeof(PgHdr)); }
000902  
000903  /*
000904  ** Return the number of dirty pages currently in the cache, as a percentage
000905  ** of the configured cache size.
000906  */
000907  int sqlite3PCachePercentDirty(PCache *pCache){
000908    PgHdr *pDirty;
000909    int nDirty = 0;
000910    int nCache = numberOfCachePages(pCache);
000911    for(pDirty=pCache->pDirty; pDirty; pDirty=pDirty->pDirtyNext) nDirty++;
000912    return nCache ? (int)(((i64)nDirty * 100) / nCache) : 0;
000913  }
000914  
000915  #ifdef SQLITE_DIRECT_OVERFLOW_READ
000916  /* 
000917  ** Return true if there are one or more dirty pages in the cache. Else false.
000918  */
000919  int sqlite3PCacheIsDirty(PCache *pCache){
000920    return (pCache->pDirty!=0);
000921  }
000922  #endif
000923  
000924  #if defined(SQLITE_CHECK_PAGES) || defined(SQLITE_DEBUG)
000925  /*
000926  ** For all dirty pages currently in the cache, invoke the specified
000927  ** callback. This is only used if the SQLITE_CHECK_PAGES macro is
000928  ** defined.
000929  */
000930  void sqlite3PcacheIterateDirty(PCache *pCache, void (*xIter)(PgHdr *)){
000931    PgHdr *pDirty;
000932    for(pDirty=pCache->pDirty; pDirty; pDirty=pDirty->pDirtyNext){
000933      xIter(pDirty);
000934    }
000935  }
000936  #endif