Blob Blame History Raw
diff -rupN podofo-0.9.6/src/doc/PdfPagesTree.cpp podofo-0.9.6-new/src/doc/PdfPagesTree.cpp
--- podofo-0.9.6/src/doc/PdfPagesTree.cpp	2018-02-25 23:36:48.000000000 +0100
+++ podofo-0.9.6-new/src/doc/PdfPagesTree.cpp	2019-03-13 23:34:59.093833810 +0100
@@ -51,7 +51,7 @@ PdfPagesTree::PdfPagesTree( PdfVecObject
     : PdfElement( "Pages", pParent ),
       m_cache( 0 )
 {
-    GetObject()->GetDictionary().AddKey( "Kids", PdfArray() ); // kids->Reference() 
+    GetObject()->GetDictionary().AddKey( "Kids", PdfArray() ); // kids->Reference()
     GetObject()->GetDictionary().AddKey( "Count", PdfObject( static_cast<pdf_int64>(PODOFO_LL_LITERAL(0)) ) );
 }
 
@@ -59,13 +59,13 @@ PdfPagesTree::PdfPagesTree( PdfObject* p
     : PdfElement( "Pages", pPagesRoot ),
       m_cache( GetChildCount( pPagesRoot ) )
 {
-    if( !this->GetObject() ) 
+    if( !this->GetObject() )
     {
         PODOFO_RAISE_ERROR( ePdfError_InvalidHandle );
     }
 }
 
-PdfPagesTree::~PdfPagesTree() 
+PdfPagesTree::~PdfPagesTree()
 {
     m_cache.ClearCache();
 }
@@ -90,7 +90,7 @@ PdfPage* PdfPagesTree::GetPage( int nInd
     // Not in cache -> search tree
     PdfObjectList lstParents;
     PdfObject* pObj = this->GetPageNode(nIndex, this->GetRoot(), lstParents);
-    if( pObj ) 
+    if( pObj )
     {
         pPage = new PdfPage( pObj, lstParents );
         m_cache.AddPageObject( nIndex, pPage );
@@ -105,13 +105,13 @@ PdfPage* PdfPagesTree::GetPage( const Pd
     // We have to search through all pages,
     // as this is the only way
     // to instantiate the PdfPage with a correct list of parents
-    for( int i=0;i<this->GetTotalNumberOfPages();i++ ) 
+    for( int i=0;i<this->GetTotalNumberOfPages();i++ )
     {
         PdfPage* pPage = this->GetPage( i );
-        if( pPage && pPage->GetObject()->Reference() == ref ) 
+        if( pPage && pPage->GetObject()->Reference() == ref )
             return pPage;
     }
-    
+
     return NULL;
 }
 
@@ -130,7 +130,7 @@ void PdfPagesTree::InsertPage( int nAfte
         bInsertBefore = true;
         nAfterPageIndex = 0;
     }
-    else if( nAfterPageIndex < 0 ) 
+    else if( nAfterPageIndex < 0 )
     {
         // Only ePdfPageInsertionPoint_InsertBeforeFirstPage is valid here
         PdfError::LogMessage( eLogSeverity_Information,
@@ -149,9 +149,9 @@ void PdfPagesTree::InsertPage( int nAfte
                                         lstParents );
     }
     //printf("pPageBefore=%p lstParents=%i\n", pPageBefore,lstParents.size() );
-    if( !pPageBefore || lstParents.size() == 0 ) 
+    if( !pPageBefore || lstParents.size() == 0 )
     {
-        if( this->GetTotalNumberOfPages() != 0 ) 
+        if( this->GetTotalNumberOfPages() != 0 )
         {
             PdfError::LogMessage( eLogSeverity_Critical,
                                   "Cannot find page %i or page %i has no parents. Cannot insert new page.",
@@ -188,7 +188,7 @@ void PdfPagesTree::InsertPages( int nAft
         bInsertBefore = true;
         nAfterPageIndex = 0;
     }
-    else if( nAfterPageIndex < 0 ) 
+    else if( nAfterPageIndex < 0 )
     {
         // Only ePdfPageInsertionPoint_InsertBeforeFirstPage is valid here
         PdfError::LogMessage( eLogSeverity_Information,
@@ -204,9 +204,9 @@ void PdfPagesTree::InsertPages( int nAft
         pPageBefore = this->GetPageNode( nAfterPageIndex, this->GetRoot(),
                                         lstParents );
     }
-    if( !pPageBefore || lstParents.size() == 0 ) 
+    if( !pPageBefore || lstParents.size() == 0 )
     {
-        if( this->GetTotalNumberOfPages() != 0 ) 
+        if( this->GetTotalNumberOfPages() != 0 )
         {
             PdfError::LogMessage( eLogSeverity_Critical,
                                   "Cannot find page %i or page %i has no parents. Cannot insert new page.",
@@ -239,7 +239,7 @@ PdfPage* PdfPagesTree::CreatePage( const
 
     InsertPage( this->GetTotalNumberOfPages() - 1, pPage );
     m_cache.AddPageObject( this->GetTotalNumberOfPages(), pPage );
-    
+
     return pPage;
 }
 
@@ -278,12 +278,12 @@ void PdfPagesTree::DeletePage( int nPage
 {
     // Delete from cache
     m_cache.DeletePage( nPageNumber );
-    
+
     // Delete from pages tree
     PdfObjectList lstParents;
     PdfObject* pPageNode = this->GetPageNode( nPageNumber, this->GetRoot(), lstParents );
 
-    if( !pPageNode ) 
+    if( !pPageNode )
     {
         PdfError::LogMessage( eLogSeverity_Information,
                               "Invalid argument to PdfPagesTree::DeletePage: %i - Page not found\n",
@@ -291,11 +291,11 @@ void PdfPagesTree::DeletePage( int nPage
         PODOFO_RAISE_ERROR( ePdfError_PageNotFound );
     }
 
-    if( lstParents.size() > 0 ) 
+    if( lstParents.size() > 0 )
     {
         PdfObject* pParent = lstParents.back();
         int nKidsIndex = this->GetPosInKids( pPageNode, pParent );
-        
+
         DeletePageFromNode( pParent, lstParents, nKidsIndex, pPageNode );
     }
     else
@@ -312,10 +312,10 @@ void PdfPagesTree::DeletePage( int nPage
 // Private methods
 ////////////////////////////////////////////////////
 
-PdfObject* PdfPagesTree::GetPageNode( int nPageNum, PdfObject* pParent, 
-                                      PdfObjectList & rLstParents ) 
+PdfObject* PdfPagesTree::GetPageNode( int nPageNum, PdfObject* pParent,
+                                      PdfObjectList & rLstParents )
 {
-    if( !pParent ) 
+    if( !pParent )
     {
         PODOFO_RAISE_ERROR( ePdfError_InvalidHandle );
     }
@@ -325,21 +325,20 @@ PdfObject* PdfPagesTree::GetPageNode( in
         PODOFO_RAISE_ERROR( ePdfError_InvalidKey );
     }
 
-    
+
     const PdfObject* pObj = pParent->GetIndirectKey( "Kids" );
     if( pObj == NULL || !pObj->IsArray() )
     {
         PODOFO_RAISE_ERROR( ePdfError_InvalidDataType );
     }
 
-    const PdfArray & rKidsArray = pObj->GetArray(); 
+    const PdfArray & rKidsArray = pObj->GetArray();
     PdfArray::const_iterator it = rKidsArray.begin();
 
-    const size_t numDirectKids = rKidsArray.size();
     const size_t numKids = GetChildCount(pParent);
 
     // use <= since nPageNum is 0-based
-    if( static_cast<int>(numKids) <= nPageNum ) 
+    if( static_cast<int>(numKids) <= nPageNum )
     {
         PdfError::LogMessage( eLogSeverity_Critical,
 	    "Cannot retrieve page %i from a document with only %i pages.",
@@ -347,92 +346,71 @@ PdfObject* PdfPagesTree::GetPageNode( in
         return NULL;
     }
 
-    //printf("Fetching: %i %i %i\n", numDirectKids, numKids, nPageNum );
-    if( numDirectKids == numKids && static_cast<size_t>(nPageNum) < numDirectKids )
-    {
-        // This node has only page nodes as kids,
-        // so we can access the array directly
-        rLstParents.push_back( pParent );
-        return GetPageNodeFromArray( nPageNum, rKidsArray, rLstParents );
-    } 
-    else
-    {
-        // We have to traverse the tree
-        while( it != rKidsArray.end() ) 
-        {
-            if( (*it).IsArray() ) 
-            { // Fixes PDFs broken by having trees with arrays nested once
-                
-                rLstParents.push_back( pParent );
-
-                // the following code is to find the reference to log this with
-                const PdfReference & rIterArrayRef = (*it).Reference();
-                PdfReference refToLog;
-                bool isDirectObject // don't worry about 0-num. indirect ones
-                    = ( !(rIterArrayRef.ObjectNumber() ) );
-                if ( isDirectObject ) 
-		{
-                    if ( !(pObj->Reference().ObjectNumber() ) ) // rKidsArray's
-		    {
-                        refToLog = pParent->Reference();
-                    }
-		    else
-                    {
-                        refToLog = pObj->Reference();
-                    }
-                }
-                else
-                {
-                    refToLog = rIterArrayRef;
-                }
-                PdfError::LogMessage( eLogSeverity_Error,
-                                    "Entry in Kids array is itself an array"
-                    "%s reference: %s\n", isDirectObject ? " (direct object)"
-                    ", in object with" : ",", refToLog.ToString().c_str() );
+    //printf("Fetching: %i %i\n", numKids, nPageNum );
 
-                    const PdfArray & rIterArray = (*it).GetArray();
+    // We have to traverse the tree
+    //
+    // BEWARE: There is no valid shortcut for tree traversal.
+    // Even if eKidsArray.size()==numKids, this does not imply that
+    // eKidsArray can be accessed with the index of the page directly.
+    // The tree could have an arbitrary complex structure because
+    // internal nodes with no leaves (page objects) are not forbidden
+    // by the PDF spec.
+    while( it != rKidsArray.end() )
+    {
+        if(!(*it).IsReference() )
+        {
+            PdfError::LogMessage( eLogSeverity_Critical, "Requesting page index %i. Invalid datatype in kids array: %s\n",
+                                  nPageNum, (*it).GetDataTypeString());
+            return NULL;
+        }
 
-                    // is the array large enough to potentially have the page?
-                    if( static_cast<size_t>(nPageNum) < rIterArray.GetSize() )
-                    {
-                        PdfObject* pPageNode = GetPageNodeFromArray( nPageNum,
-                                                    rIterArray, rLstParents );
-                        if ( pPageNode ) // and if not, search further
-                            return pPageNode;
-                    }
-            }
-            else if( (*it).IsReference() ) 
-            {
                 PdfObject* pChild = GetRoot()->GetOwner()->GetObject( (*it).GetReference() );
-                if (!pChild) 
+                if (!pChild)
                 {
-                    PdfError::LogMessage( eLogSeverity_Critical, "Requesting page index %i. Child not found: %s\n", 
-                                          nPageNum, (*it).GetReference().ToString().c_str()); 
+                    PdfError::LogMessage( eLogSeverity_Critical, "Requesting page index %i. Child not found: %s\n",
+                                          nPageNum, (*it).GetReference().ToString().c_str());
                     return NULL;
                 }
 
-                if( this->IsTypePages(pChild) ) 
+                if( this->IsTypePages(pChild) )
                 {
                     int childCount = GetChildCount( pChild );
                     if( childCount < nPageNum + 1 ) // Pages are 0 based, but count is not
                     {
-                        // skip this page node
-                        // and go to the next one
+                        // skip this page tree node
+                        // and go to the next child in rKidsArray
                         nPageNum -= childCount;
                     }
                     else
                     {
+                        // page is in the subtree of pChild
+                        // => call GetPageNode() recursively
+
                         rLstParents.push_back( pParent );
+
+                        if ( std::find( rLstParents.begin(), rLstParents.end(), pChild )
+                             != rLstParents.end() ) // cycle in parent list detected, fend
+                        { // off security vulnerability similar to CVE-2017-8054 (infinite recursion)
+                            std::ostringstream oss;
+                            oss << "Cycle in page tree: child in /Kids array of object "
+                                << ( *(rLstParents.rbegin()) )->Reference().ToString()
+                                << " back-references to object " << pChild->Reference()
+                                .ToString() << " one of whose descendants the former is.";
+                            PODOFO_RAISE_ERROR_INFO( ePdfError_PageNotFound, oss.str() );
+                        }
+
                         return this->GetPageNode( nPageNum, pChild, rLstParents );
                     }
                 }
-                else if( this->IsTypePage(pChild) ) 
+                else if( this->IsTypePage(pChild) )
                 {
                     if( 0 == nPageNum )
                     {
+                        // page found
                         rLstParents.push_back( pParent );
                         return pChild;
-                    } 
+                    }
 
                     // Skip a normal page
                     if(nPageNum > 0 )
@@ -448,100 +426,16 @@ PdfObject* PdfPagesTree::GetPageNode( in
                         "Invalid datatype referenced in kids array: %s\n"
                         "Reference to invalid object: %i %i R\n", nPageNum,
                         pChild->GetDataTypeString(), nLogObjNum, nLogGenNum);
+                    return NULL;
                 }
-            }
-            else
-            {
-                PdfError::LogMessage( eLogSeverity_Critical, "Requesting page index %i. Invalid datatype in kids array: %s\n", 
-                                      nPageNum, (*it).GetDataTypeString()); 
-                return NULL;
-            }
-            
-            ++it;
-        }
-    }
-
-    return NULL;
-}
-
-PdfObject* PdfPagesTree::GetPageNodeFromArray( int nPageNum, const PdfArray & rKidsArray, PdfObjectList & rLstParents )
-{
-    if( static_cast<size_t>(nPageNum) >= rKidsArray.GetSize() )
-    {
-        PdfError::LogMessage( eLogSeverity_Critical, "Requesting page index %i from array of size %i\n", 
-                              nPageNum, rKidsArray.size() );
-        return NULL;
-    }
-
-    // TODO: Fill cache immediately with all pages 
-    //       in this kids array
-    PdfVariant rVar = rKidsArray[nPageNum];
-    while( true ) 
-    {
-        if( rVar.IsArray() ) 
-        {
-            // Fixes some broken PDFs who have trees with 1 element kids arrays
-            // Recursive call removed to prevent stack overflow (CVE-2017-8054)
-            // replaced by the following inside this conditional incl. continue
-            const PdfArray & rVarArray = rVar.GetArray();
-            if (rVarArray.GetSize() == 0)
-            {
-                PdfError::LogMessage( eLogSeverity_Critical, "Trying to access"
-                    " first page index of empty array" );
-                return NULL;
-            }
-            PdfVariant rVarFirstEntry = rVarArray[0]; // avoids use-after-free
-            rVar = rVarFirstEntry; // in this line (rVar-ref'd array is freed)
-            continue;
-        }
-        else if( !rVar.IsReference() )
-        {
-            PODOFO_RAISE_ERROR_INFO( ePdfError_NotImplemented, "Cannot handle inline pages." );
-        }
-
-        PdfObject* pgObject = GetRoot()->GetOwner()->GetObject( rVar.GetReference() );
-		if(pgObject==NULL)
-        {
-			PODOFO_RAISE_ERROR_INFO( ePdfError_PageNotFound, "Invalid reference." );
-		}
-
-        //printf("Reading %s\n", pgObject->Reference().ToString().c_str());
-        // make sure the object is a /Page and not a /Pages with a single kid
-        if( this->IsTypePage(pgObject) ) 
-        {
-            return pgObject;
-        }
 
-        // it's a /Pages with a single kid, so dereference and try again...
-        if (this->IsTypePages(pgObject) ) 
-        {
-            if( !pgObject->GetDictionary().HasKey( "Kids" ) )
-                return NULL;
-
-            if ( std::find( rLstParents.begin(), rLstParents.end(), pgObject )
-                != rLstParents.end() ) // cycle in parent list detected, fend
-            { // off security vulnerability CVE-2017-8054 (infinite recursion)
-                std::ostringstream oss;
-                oss << "Cycle in page tree: child in /Kids array of object "
-                    << ( *(rLstParents.rbegin()) )->Reference().ToString()
-                    << " back-references to object " << pgObject->Reference()
-                    .ToString() << " one of whose descendants the former is.";
-
-                PODOFO_RAISE_ERROR_INFO( ePdfError_PageNotFound, oss.str() );
-            }
-
-            rLstParents.push_back( pgObject );
-            rVar = *(pgObject->GetDictionary().GetKey( "Kids" ));
-        } else {
-            // Reference to unexpected object
-            PODOFO_RAISE_ERROR_INFO( ePdfError_PageNotFound, "Reference to unexpected object." );
+            ++it;
         }
-    }
 
     return NULL;
 }
 
-bool PdfPagesTree::IsTypePage(const PdfObject* pObject) const 
+bool PdfPagesTree::IsTypePage(const PdfObject* pObject) const
 {
     if( !pObject )
         return false;
@@ -552,7 +446,7 @@ bool PdfPagesTree::IsTypePage(const PdfO
     return false;
 }
 
-bool PdfPagesTree::IsTypePages(const PdfObject* pObject) const 
+bool PdfPagesTree::IsTypePages(const PdfObject* pObject) const
 {
     if( !pObject )
         return false;
@@ -565,12 +459,12 @@ bool PdfPagesTree::IsTypePages(const Pdf
 
 int PdfPagesTree::GetChildCount( const PdfObject* pNode ) const
 {
-    if( !pNode ) 
+    if( !pNode )
         return 0;
 
     const PdfObject *pCount = pNode->GetIndirectKey( "Count" );
     if( pCount != 0 ) {
-        return (pCount->GetDataType() == PoDoFo::ePdfDataType_Number) ?  
+        return (pCount->GetDataType() == PoDoFo::ePdfDataType_Number) ?
             static_cast<int>( pCount->GetNumber() ):0;
     } else {
         return 0;
@@ -589,7 +483,7 @@ int PdfPagesTree::GetPosInKids( PdfObjec
     PdfArray::const_iterator it = rKids.begin();
 
     int index = 0;
-    while( it != rKids.end() ) 
+    while( it != rKids.end() )
     {
         if( (*it).GetReference() == pPageObj->Reference() )
         {
@@ -606,10 +500,10 @@ int PdfPagesTree::GetPosInKids( PdfObjec
     return -1;
 }
 
-void PdfPagesTree::InsertPageIntoNode( PdfObject* pParent, const PdfObjectList & rlstParents, 
+void PdfPagesTree::InsertPageIntoNode( PdfObject* pParent, const PdfObjectList & rlstParents,
                                        int nIndex, PdfObject* pPage )
 {
-    if( !pParent || !pPage ) 
+    if( !pParent || !pPage )
     {
         PODOFO_RAISE_ERROR( ePdfError_InvalidHandle );
     }
@@ -625,17 +519,17 @@ void PdfPagesTree::InsertPageIntoNode( P
 
     newKids.reserve( oldKids.GetSize() + 1 );
 
-    if( nIndex < 0 ) 
+    if( nIndex < 0 )
     {
         newKids.push_back( pPage->Reference() );
     }
 
     int i = 0;
-    while( it != oldKids.end() ) 
+    while( it != oldKids.end() )
     {
         newKids.push_back( *it );
 
-        if( i == nIndex ) 
+        if( i == nIndex )
             newKids.push_back( pPage->Reference() );
 
         ++i;
@@ -650,7 +544,7 @@ void PdfPagesTree::InsertPageIntoNode( P
     */
 
     pParent->GetDictionary().AddKey( PdfName("Kids"), newKids );
- 
+
     // 2. increase count
     PdfObjectList::const_reverse_iterator itParents = rlstParents.rbegin();
     while( itParents != rlstParents.rend() )
@@ -658,16 +552,16 @@ void PdfPagesTree::InsertPageIntoNode( P
         this->ChangePagesCount( *itParents, 1 );
 
         ++itParents;
-    } 
+    }
 
     // 3. add parent key to the page
     pPage->GetDictionary().AddKey( PdfName("Parent"), pParent->Reference() );
 }
 
-void PdfPagesTree::InsertPagesIntoNode( PdfObject* pParent, const PdfObjectList & rlstParents, 
+void PdfPagesTree::InsertPagesIntoNode( PdfObject* pParent, const PdfObjectList & rlstParents,
                                        int nIndex, const std::vector<PdfObject*>& vecPages )
 {
-    if( !pParent || !vecPages.size() ) 
+    if( !pParent || !vecPages.size() )
     {
         PODOFO_RAISE_ERROR( ePdfError_InvalidHandle );
     }
@@ -683,7 +577,7 @@ void PdfPagesTree::InsertPagesIntoNode(
 
     bool bIsPushedIn = false;
     int i=0;
-    for (PdfArray::const_iterator it=oldKids.begin(); it!=oldKids.end(); ++it, ++i ) 
+    for (PdfArray::const_iterator it=oldKids.begin(); it!=oldKids.end(); ++it, ++i )
     {
         if ( !bIsPushedIn && (nIndex < i) )    // Pushing before
         {
@@ -697,7 +591,7 @@ void PdfPagesTree::InsertPagesIntoNode(
     }
 
     // If new kids are still not pushed in then they may be appending to the end
-    if ( !bIsPushedIn && ( (nIndex + 1) == static_cast<int>(oldKids.size())) ) 
+    if ( !bIsPushedIn && ( (nIndex + 1) == static_cast<int>(oldKids.size())) )
     {
         for (std::vector<PdfObject*>::const_iterator itPages=vecPages.begin(); itPages!=vecPages.end(); ++itPages)
         {
@@ -707,13 +601,13 @@ void PdfPagesTree::InsertPagesIntoNode(
     }
 
     pParent->GetDictionary().AddKey( PdfName("Kids"), newKids );
- 
+
 
     // 2. increase count
     for ( PdfObjectList::const_reverse_iterator itParents = rlstParents.rbegin(); itParents != rlstParents.rend(); ++itParents )
     {
         this->ChangePagesCount( *itParents, vecPages.size() );
-    } 
+    }
 
     // 3. add parent key to each of the pages
     for (std::vector<PdfObject*>::const_iterator itPages=vecPages.begin(); itPages!=vecPages.end(); ++itPages)
@@ -722,10 +616,10 @@ void PdfPagesTree::InsertPagesIntoNode(
     }
 }
 
-void PdfPagesTree::DeletePageFromNode( PdfObject* pParent, const PdfObjectList & rlstParents, 
+void PdfPagesTree::DeletePageFromNode( PdfObject* pParent, const PdfObjectList & rlstParents,
                                        int nIndex, PdfObject* pPage )
 {
-    if( !pParent || !pPage ) 
+    if( !pParent || !pPage )
     {
         PODOFO_RAISE_ERROR( ePdfError_InvalidHandle );
     }
@@ -746,14 +640,14 @@ void PdfPagesTree::DeletePageFromNode( P
         this->ChangePagesCount( *itParents, -1 );
 
         ++itParents;
-    } 
+    }
 
     // 3. Remove empty pages nodes
     itParents = rlstParents.rbegin();
     while( itParents != rlstParents.rend() )
     {
         // Never delete root node
-        if( IsEmptyPageNode( *itParents ) && *itParents != GetRoot() ) 
+        if( IsEmptyPageNode( *itParents ) && *itParents != GetRoot() )
         {
             PdfObject* pParentOfNode = *(itParents + 1);
             int nKidsIndex = this->GetPosInKids( *itParents, pParentOfNode );
@@ -764,10 +658,10 @@ void PdfPagesTree::DeletePageFromNode( P
         }
 
         ++itParents;
-    } 
+    }
 }
 
-void PdfPagesTree::DeletePageNode( PdfObject* pParent, int nIndex ) 
+void PdfPagesTree::DeletePageNode( PdfObject* pParent, int nIndex )
 {
     PdfArray kids = pParent->GetDictionary().GetKey( PdfName("Kids") )->GetArray();
     kids.erase( kids.begin() + nIndex );
@@ -779,7 +673,7 @@ int PdfPagesTree::ChangePagesCount( PdfO
     // Increment or decrement inPagesDict's Count by inDelta, and return the new count.
     // Simply return the current count if inDelta is 0.
     int	cnt = GetChildCount( pPageObj );
-    if( 0 != nDelta ) 
+    if( 0 != nDelta )
     {
         cnt += nDelta ;
         pPageObj->GetDictionary().AddKey( "Count", PdfVariant( static_cast<pdf_int64>(cnt) ) );
@@ -788,7 +682,7 @@ int PdfPagesTree::ChangePagesCount( PdfO
     return cnt ;
 }
 
-bool PdfPagesTree::IsEmptyPageNode( PdfObject* pPageNode ) 
+bool PdfPagesTree::IsEmptyPageNode( PdfObject* pPageNode )
 {
     long lCount = GetChildCount( pPageNode );
     bool bKidsEmpty = true;
@@ -802,7 +696,7 @@ bool PdfPagesTree::IsEmptyPageNode( PdfO
 }
 
 /*
-PdfObject* PdfPagesTree::GetPageNode( int nPageNum, PdfObject* pPagesObject, 
+PdfObject* PdfPagesTree::GetPageNode( int nPageNum, PdfObject* pPagesObject,
                                       std::deque<PdfObject*> & rListOfParents )
 {
     // recurse through the pages tree nodes
@@ -835,9 +729,9 @@ PdfObject* PdfPagesTree::GetPageNode( in
         }
 
         PdfVariant pgVar = kidsArray[ nPageNum ];
-        while ( true ) 
+        while ( true )
         {
-            if ( pgVar.IsArray() ) 
+            if ( pgVar.IsArray() )
             {
                 // Fixes some broken PDFs who have trees with 1 element kids arrays
                 return GetPageNodeFromTree( nPageNum, pgVar.GetArray(), rListOfParents );
@@ -857,8 +751,8 @@ PdfObject* PdfPagesTree::GetPageNode( in
             rListOfParents.push_back( pgObject );
             pgVar = *(pgObject->GetDictionary().GetKey( "Kids" ));
         }
-    } 
-    else 
+    }
+    else
     {
         return GetPageNodeFromTree( nPageNum, kidsArray, rListOfParents );
     }
diff -rupN podofo-0.9.6/src/doc/PdfPagesTree.h podofo-0.9.6-new/src/doc/PdfPagesTree.h
--- podofo-0.9.6/src/doc/PdfPagesTree.h	2014-06-15 14:27:46.000000000 +0200
+++ podofo-0.9.6-new/src/doc/PdfPagesTree.h	2019-03-13 23:34:59.094833809 +0100
@@ -190,7 +190,6 @@ class PODOFO_DOC_API PdfPagesTree : publ
     PdfPagesTree();	// don't allow construction from nothing!
 
     PdfObject* GetPageNode( int nPageNum, PdfObject* pParent, PdfObjectList & rLstParents );
-    PdfObject* GetPageNodeFromArray( int nPageNum, const PdfArray & rKidsArray, PdfObjectList & rLstParents );
 
     int GetChildCount( const PdfObject* pNode ) const;
 
diff -rupN podofo-0.9.6/test/unit/PagesTreeTest.cpp podofo-0.9.6-new/test/unit/PagesTreeTest.cpp
--- podofo-0.9.6/test/unit/PagesTreeTest.cpp	2016-05-12 22:08:20.000000000 +0200
+++ podofo-0.9.6-new/test/unit/PagesTreeTest.cpp	2019-03-13 23:34:59.094833809 +0100
@@ -22,6 +22,8 @@
 
 #include <podofo.h>
 
+#include <sstream>
+
 #define PODOFO_TEST_PAGE_KEY "PoDoFoTestPageNumber"
 #define PODOFO_TEST_NUM_PAGES 100
 
@@ -70,6 +72,58 @@ void PagesTreeTest::testEmptyDoc()
     CPPUNIT_ASSERT_THROW( writer.GetPage( 1 ), PdfError );
 }
 
+void PagesTreeTest::testCyclicTree()
+{
+    for (int pass=0; pass < 2; pass++)
+    {
+        PdfMemDocument doc;
+        CreateCyclicTree( doc, pass==1);
+        //doc.Write(pass==0?"tree_valid.pdf":"tree_cyclic.pdf");
+        for (int pagenum=0; pagenum < doc.GetPageCount(); pagenum++)
+        {
+            if (pass==0)
+            {
+                // pass 0:
+                // valid tree without cycles should yield all pages
+                PdfPage* pPage = doc.GetPage( pagenum );
+                CPPUNIT_ASSERT_EQUAL( pPage != NULL, true );
+                CPPUNIT_ASSERT_EQUAL( IsPageNumber( pPage, pagenum ), true );
+            }
+            else
+            {
+                // pass 1:
+                // cyclic tree must throw exception to prevent infinite recursion
+                CPPUNIT_ASSERT_THROW( doc.GetPage( pagenum ), PdfError );
+            }
+        }
+    }
+}
+
+void PagesTreeTest::testEmptyKidsTree()
+{
+    PdfMemDocument doc;
+    CreateEmptyKidsTree(doc);
+    //doc.Write("tree_zerokids.pdf");
+    for (int pagenum=0; pagenum < doc.GetPageCount(); pagenum++)
+    {
+        PdfPage* pPage = doc.GetPage( pagenum );
+        CPPUNIT_ASSERT_EQUAL( pPage != NULL, true );
+        CPPUNIT_ASSERT_EQUAL( IsPageNumber( pPage, pagenum ), true );
+    }
+}
+
+void PagesTreeTest::testNestedArrayTree()
+{
+    PdfMemDocument doc;
+    CreateNestedArrayTree(doc);
+    //doc.Write("tree_nested_array.pdf");
+    for (int pagenum=0; pagenum < doc.GetPageCount(); pagenum++)
+    {
+        PdfPage* pPage = doc.GetPage( pagenum );
+        CPPUNIT_ASSERT_EQUAL( pPage == NULL, true );
+    }
+}
+
 void PagesTreeTest::testCreateDelete()
 {
     PdfMemDocument  writer;
@@ -354,6 +408,152 @@ void PagesTreeTest::CreateTestTreeCustom
     pRoot->GetDictionary().AddKey( PdfName("Count"), static_cast<pdf_int64>(PODOFO_TEST_NUM_PAGES) );
 }
 
+std::vector<PdfPage*> PagesTreeTest::CreateSamplePages( PdfMemDocument & rDoc,
+                                                        int nPageCount)
+{
+    PdfFont* pFont;
+
+    // create font
+    pFont = rDoc.CreateFont( "Arial" );
+    if( !pFont )
+    {
+        PODOFO_RAISE_ERROR( ePdfError_InvalidHandle );
+    }
+    pFont->SetFontSize( 16.0 );
+
+    std::vector<PdfPage*> pPage(nPageCount);
+    for (int i = 0; i < nPageCount; ++i)
+    {
+        pPage[i] = new PdfPage( PdfPage::CreateStandardPageSize( ePdfPageSize_A4 ),
+                                &(rDoc.GetObjects()) );
+        pPage[i]->GetObject()->GetDictionary().AddKey( PODOFO_TEST_PAGE_KEY,
+                                                       static_cast<pdf_int64>(i) );
+
+        PdfPainter painter;
+        painter.SetPage( pPage[i] );
+        painter.SetFont( pFont );
+        std::ostringstream os;
+        os << "Page " << i+1;
+        painter.DrawText( 200, 200, os.str()  );
+        painter.FinishPage();
+    }
+
+    return pPage;
+}
+
+std::vector<PdfObject*> PagesTreeTest::CreateNodes( PdfMemDocument & rDoc,
+                                                    int nNodeCount)
+{
+    std::vector<PdfObject*> pNode(nNodeCount);
+
+    for (int i = 0; i < nNodeCount; ++i)
+    {
+        pNode[i]=rDoc.GetObjects().CreateObject("Pages");
+        // init required keys
+        pNode[i]->GetDictionary().AddKey( "Kids", PdfArray());
+        pNode[i]->GetDictionary().AddKey( "Count", PdfVariant(static_cast<pdf_int64>(0L)));
+    }
+
+    return pNode;
+}
+
+void PagesTreeTest::CreateCyclicTree( PoDoFo::PdfMemDocument & rDoc,
+                                      bool bCreateCycle )
+{
+    const int COUNT = 3;
+
+    std::vector<PdfPage*> pPage=CreateSamplePages( rDoc, COUNT );
+    std::vector<PdfObject*> pNode=CreateNodes( rDoc, 2 );
+
+    // manually insert pages into pagetree
+    PdfObject* pRoot = rDoc.GetPagesTree()->GetObject();
+
+    // tree layout (for !bCreateCycle):
+    //
+    //    root
+    //    +-- node0
+    //        +-- node1
+    //        |   +-- page0
+    //        |   +-- page1
+    //        \-- page2
+
+    // root node
+    AppendChildNode(pRoot, pNode[0]);
+
+    // tree node 0
+    AppendChildNode(pNode[0], pNode[1]);
+    AppendChildNode(pNode[0], pPage[2]->GetObject());
+
+    // tree node 1
+    AppendChildNode(pNode[1], pPage[0]->GetObject());
+    AppendChildNode(pNode[1], pPage[1]->GetObject());
+
+    if (bCreateCycle)
+    {
+        // invalid tree: Cycle!!!
+        // was not detected in PdfPagesTree::GetPageNode() rev. 1937
+        pNode[0]->GetIndirectKey("Kids")->GetArray()[0]=pRoot->Reference();
+    }
+}
+
+void PagesTreeTest::CreateEmptyKidsTree( PoDoFo::PdfMemDocument & rDoc )
+{
+    const int COUNT = 3;
+
+    std::vector<PdfPage*> pPage=CreateSamplePages( rDoc, COUNT );
+    std::vector<PdfObject*> pNode=CreateNodes( rDoc, 3 );
+
+    // manually insert pages into pagetree
+    PdfObject* pRoot = rDoc.GetPagesTree()->GetObject();
+
+    // tree layout:
+    //
+    //    root
+    //    +-- node0
+    //    |   +-- page0
+    //    |   +-- page1
+    //    |   +-- page2
+    //    +-- node1
+    //    \-- node2
+
+    // root node
+    AppendChildNode(pRoot, pNode[0]);
+    AppendChildNode(pRoot, pNode[1]);
+    AppendChildNode(pRoot, pNode[2]);
+
+    // tree node 0
+    AppendChildNode(pNode[0], pPage[0]->GetObject());
+    AppendChildNode(pNode[0], pPage[1]->GetObject());
+    AppendChildNode(pNode[0], pPage[2]->GetObject());
+
+    // tree node 1 and node 2 are left empty: this is completely valid
+    // according to the PDF spec, i.e. the required keys may have the
+    // values "/Kids [ ]" and "/Count 0"
+}
+
+void PagesTreeTest::CreateNestedArrayTree( PoDoFo::PdfMemDocument & rDoc )
+{
+    const int COUNT = 3;
+
+    std::vector<PdfPage*> pPage=CreateSamplePages( rDoc, COUNT );
+    PdfObject* pRoot = rDoc.GetPagesTree()->GetObject();
+
+    // create kids array
+    PdfArray kids;
+    for (int i=0; i < COUNT; i++)
+    {
+        kids.push_back( pPage[i]->GetObject()->Reference() );
+        pPage[i]->GetObject()->GetDictionary().AddKey( PdfName("Parent"), pRoot->Reference());
+    }
+
+    // create nested kids array
+    PdfArray nested;
+    nested.push_back(kids);
+
+    // manually insert pages into pagetree
+    pRoot->GetDictionary().AddKey( PdfName("Count"), static_cast<pdf_int64>(COUNT) );
+    pRoot->GetDictionary().AddKey( PdfName("Kids"), nested);
+}
 
 bool PagesTreeTest::IsPageNumber( PoDoFo::PdfPage* pPage, int nNumber )
 {
@@ -367,3 +567,33 @@ bool PagesTreeTest::IsPageNumber( PoDoFo
     else
         return true;
 }
+
+void PagesTreeTest::AppendChildNode(PdfObject* pParent, PdfObject* pChild)
+{
+    // 1. Add the reference of the new child to the kids array of pParent
+    PdfArray kids;
+    PdfObject* oldKids=pParent->GetIndirectKey("Kids");
+    if (oldKids && oldKids->IsArray()) kids=oldKids->GetArray();
+    kids.push_back(pChild->Reference());
+    pParent->GetDictionary().AddKey( PdfName("Kids"), kids);
+
+    // 2. If the child is a page (leaf node), increase count of every parent
+    //    (which also includes pParent)
+    if( pChild->GetDictionary().GetKeyAsName( PdfName( "Type" ) )
+        == PdfName( "Page" ) )
+    {
+        PdfObject* node=pParent;
+        while (node)
+        {
+            pdf_int64 count=0;
+            if (node->GetIndirectKey("Count")) count=node->GetIndirectKey("Count")->GetNumber();
+            count++;
+            node->GetDictionary().AddKey( PdfName("Count"), count);
+
+            node=node->GetIndirectKey("Parent");
+        }
+    }
+
+    // 3. Add Parent key to the child
+    pChild->GetDictionary().AddKey( PdfName("Parent"), pParent->Reference());
+}
diff -rupN podofo-0.9.6/test/unit/PagesTreeTest.h podofo-0.9.6-new/test/unit/PagesTreeTest.h
--- podofo-0.9.6/test/unit/PagesTreeTest.h	2009-05-08 19:45:52.000000000 +0200
+++ podofo-0.9.6-new/test/unit/PagesTreeTest.h	2019-03-13 23:34:59.094833809 +0100
@@ -21,11 +21,14 @@
 #ifndef _PAGES_TREE_TEST_H_
 #define _PAGES_TREE_TEST_H_
 
+#include <vector>
+
 #include <cppunit/extensions/HelperMacros.h>
 
 namespace PoDoFo {
 class PdfMemDocument;
 class PdfPage;
+class PdfObject;
 };
 
 /** This test tests the class PdfPagesTree
@@ -35,6 +38,9 @@ class PagesTreeTest : public CppUnit::Te
   CPPUNIT_TEST_SUITE( PagesTreeTest );
   CPPUNIT_TEST( testEmptyTree );
   CPPUNIT_TEST( testEmptyDoc );
+  CPPUNIT_TEST( testCyclicTree );
+  CPPUNIT_TEST( testEmptyKidsTree );
+  CPPUNIT_TEST( testNestedArrayTree );
   CPPUNIT_TEST( testCreateDelete );
   CPPUNIT_TEST( testGetPagesCustom );
   CPPUNIT_TEST( testGetPagesPoDoFo );
@@ -52,6 +58,9 @@ class PagesTreeTest : public CppUnit::Te
 
   void testEmptyTree();
   void testEmptyDoc();
+  void testCyclicTree();
+  void testEmptyKidsTree();
+  void testNestedArrayTree();
   void testCreateDelete();
   void testGetPagesCustom();
   void testGetPagesPoDoFo();
@@ -98,7 +107,58 @@ class PagesTreeTest : public CppUnit::Te
    */
   void CreateTestTreeCustom( PoDoFo::PdfMemDocument & rDoc );
 
+  /**
+   * Create a pages tree with cycles to test prevention of endless
+   * recursion as mentioned in different CVE reports.
+   *
+   * \param bCreateCycle if true a cyclic tree is created, otherwise a
+   *                     valid tree without cycles
+   */
+  void CreateCyclicTree( PoDoFo::PdfMemDocument & rDoc,
+                         bool bCreateCycle );
+
+  /**
+   * Create a pages tree with nodes containing empty kids.
+   *
+   * This is completely valid according to the PDF spec, i.e. the
+   * required keys may have the values "/Kids [ ]" and "/Count 0"
+   * Such a tree must still be parsable by a conforming reader:
+   *
+   * <BLOCKQUOTE>The tree contains nodes of two types���intermediate
+   * nodes, called page tree nodes, and leaf nodes, called page
+   * objects���whose form is described in the subsequent subclauses.
+   * Conforming products shall be prepared to handle any form
+   * of tree structure built of such nodes.</BLOCKQUOTE>
+   */
+  void CreateEmptyKidsTree( PoDoFo::PdfMemDocument & rDoc );
+
+  /**
+  * Ceate a pages tree with a nested kids array.
+  *
+  * Such a tree is not valid to the PDF spec, which requires they key
+  * "Kids" to be an array of indirect references. And the children shall
+  * only be page objects or other page tree nodes.
+  */
+  void CreateNestedArrayTree( PoDoFo::PdfMemDocument & rDoc );
+
+ /**
+  * Create page object nodes (leaf nodes),
+  * where every page object has an additional
+  * key PoDoFoTestPageNumber with the original
+  * page number of the page.
+  */
+  std::vector<PoDoFo::PdfPage*> CreateSamplePages( PoDoFo::PdfMemDocument & rDoc,
+                                                   int nPageCount);
+
+  /**
+  * Create page tree nodes (internal nodes)
+  */
+  std::vector<PoDoFo::PdfObject*> CreateNodes( PoDoFo::PdfMemDocument & rDoc,
+                                               int nNodeCount);
+
   bool IsPageNumber( PoDoFo::PdfPage* pPage, int nNumber );
+
+  void AppendChildNode(PoDoFo::PdfObject* pParent, PoDoFo::PdfObject* pChild);
 };
 
 #endif // _PAGES_TREE_TEST_H_