From 8f971b576adf3b88c1a1117ceba2cd69b312e05e Mon Sep 17 00:00:00 2001 From: Paul Koch Date: Wed, 25 Dec 2024 11:38:21 -0800 Subject: [PATCH] REMOVE flatten --- .../PartitionOneDimensionalBoosting.cpp | 287 ++---------------- 1 file changed, 18 insertions(+), 269 deletions(-) diff --git a/shared/libebm/PartitionOneDimensionalBoosting.cpp b/shared/libebm/PartitionOneDimensionalBoosting.cpp index bfc0210cd..be88be18b 100644 --- a/shared/libebm/PartitionOneDimensionalBoosting.cpp +++ b/shared/libebm/PartitionOneDimensionalBoosting.cpp @@ -119,279 +119,28 @@ static ErrorEbm Flatten(BoosterShell* const pBoosterShell, const TreeNode* pMissingValueTreeNode, const size_t cSlices, const size_t cBins) { - LOG_0(Trace_Verbose, "Entered Flatten"); - EBM_ASSERT(nullptr != pBoosterShell); - EBM_ASSERT(iDimension <= k_cDimensionsMax); - EBM_ASSERT(nullptr != apBins); - EBM_ASSERT(1 <= cSlices); - EBM_ASSERT(2 <= cBins); - EBM_ASSERT(cSlices <= cBins); - EBM_ASSERT(!bNominal || cSlices == cBins); - ErrorEbm error; + UNUSED(bMissing); + UNUSED(bNominal); + UNUSED(flags); + UNUSED(regAlpha); + UNUSED(regLambda); + UNUSED(deltaStepMax); + UNUSED(iDimension); + UNUSED(apBins); + UNUSED(pMissingValueTreeNode); + UNUSED(cSlices); + UNUSED(cBins); -#ifndef NDEBUG - auto* const pRootTreeNodeDebug = pBoosterShell->GetTreeNodesTemp(); - size_t cSamplesExpectedDebug = static_cast(pRootTreeNodeDebug->GetBin()->GetCountSamples()); - size_t cSamplesTotalDebug = 0; -#endif // NDEBUG - const Bin* pMissingBin = nullptr; + BoosterCore* const pBoosterCore = pBoosterShell->GetBoosterCore(); + const size_t cScores = pBoosterCore->GetCountScores(); Tensor* const pInnerTermUpdate = pBoosterShell->GetInnerTermUpdate(); - - error = pInnerTermUpdate->SetCountSlices(iDimension, cSlices); - if(UNLIKELY(Error_None != error)) { - // already logged - return error; - } - - const BoosterCore* const pBoosterCore = pBoosterShell->GetBoosterCore(); - const size_t cScores = pBoosterCore->GetCountScores(); - - EBM_ASSERT(!IsMultiplyError(cScores, cSlices)); - error = pInnerTermUpdate->EnsureTensorScoreCapacity(cScores * cSlices); - if(UNLIKELY(Error_None != error)) { - // already logged - return error; - } - - UIntSplit* pSplit = pInnerTermUpdate->GetSplitPointer(iDimension); - - FloatScore* const aUpdateScore = pInnerTermUpdate->GetTensorScoresPointer(); - FloatScore* pUpdateScore; - - UIntSplit prev = 0; - const Bin* const* ppBinCur = nullptr; - if(bNominal) { - UIntSplit iSplit = 1; - while(cSlices != iSplit) { - pSplit[iSplit - 1] = iSplit; - ++iSplit; - } - ppBinCur = reinterpret_cast* const*>(apBins); - } else { - pUpdateScore = aUpdateScore; - - if(bMissing) { - EBM_ASSERT(2 <= cSlices); // no cuts if there was only missing bin - - // always put a split on the missing bin - *pSplit = 1; - ++pSplit; - prev = 1; - - // pUpdateScore is overwritten later if bNominal - pUpdateScore += cScores; - } - } - - const size_t cBytesPerBin = GetBinSize(true, true, bHessian, cScores); - auto* const aBins = pBoosterShell->GetBoostingMainBins()->Specialize(); - - EBM_ASSERT(!IsOverflowTreeNodeSize(bHessian, cScores)); // we're accessing allocated memory - const size_t cBytesPerTreeNode = GetTreeNodeSize(bHessian, cScores); - - auto* const pRootTreeNode = pBoosterShell->GetTreeNodesTemp(); - auto* pTreeNode = pRootTreeNode; - - const bool bUpdateWithHessian = bHessian && !(TermBoostFlags_DisableNewtonUpdate & flags); - - TreeNode* pParent = nullptr; - - while(true) { - if(UNPREDICTABLE(pTreeNode->AFTER_IsSplit())) { - auto* const pLeftChild = pTreeNode->DECONSTRUCT_TraverseLeftAndMark(pParent); - pParent = pTreeNode; - pTreeNode = pLeftChild; - } else { - const Bin* const* ppBinLast; - // if the pointer points to the space within the bins, then the TreeNode could not be split - // and this TreeNode never had children and we never wrote a pointer to the children in this memory - if(pTreeNode->AFTER_IsSplittable()) { - auto* const pChildren = pTreeNode->AFTER_GetChildren(); - - EBM_ASSERT(IndexTreeNode(pTreeNode, cBytesPerTreeNode) <= pChildren && - pChildren <= - IndexTreeNode(pRootTreeNode, pBoosterCore->GetCountBytesTreeNodes() - cBytesPerTreeNode)); - - if(pMissingValueTreeNode == GetLeftNode(pChildren)) { - EBM_ASSERT(nullptr == pMissingBin); - pMissingBin = pTreeNode->GetBin(); - } - - // the node was examined and a gain calculated, so it has left and right children. - // We can retrieve the split location by looking at where the right child would end its range - const auto* const pRightChild = GetRightNode(pChildren, cBytesPerTreeNode); - ppBinLast = pRightChild->BEFORE_GetBinLast(); - - if(pMissingValueTreeNode == pRightChild) { - EBM_ASSERT(nullptr == pMissingBin); - pMissingBin = pTreeNode->GetBin(); - } - } else { - ppBinLast = pTreeNode->BEFORE_GetBinLast(); - if(pMissingValueTreeNode == pTreeNode) { - EBM_ASSERT(nullptr == pMissingBin); - pMissingBin = pTreeNode->GetBin(); - } - } - - EBM_ASSERT(apBins <= ppBinLast); - EBM_ASSERT(ppBinLast < apBins + (cBins - (nullptr != pMissingValueTreeNode ? size_t{1} : size_t{0}))); - -#ifndef NDEBUG - cSamplesTotalDebug += static_cast(pTreeNode->GetBin()->GetCountSamples()); -#endif // NDEBUG - - size_t iEdge; - const auto* const aGradientPair = pTreeNode->GetBin()->GetGradientPairs(); - size_t iScore; - if(nullptr != ppBinCur) { - goto determine_bin; - } - EBM_ASSERT(!bNominal); - - iEdge = ppBinLast - apBins + 1 + (nullptr != pMissingValueTreeNode ? 1 : 0); - - while(true) { // not a real loop - if(bMissing) { - if(TermBoostFlags_MissingLow & flags) { - if(nullptr == pMissingBin) { - pMissingBin = pTreeNode->GetBin(); - } - if(1 == iEdge) { - // this cut would isolate the missing bin, but we handle those scores separately - break; - } - } else if(TermBoostFlags_MissingHigh & flags) { - ++iEdge; // missing is at index 0 in the model, so we are offset by one - pMissingBin = pTreeNode->GetBin(); - EBM_ASSERT(iEdge <= cBins + 1); - EBM_ASSERT(0 != prev); - if(cBins + 1 == iEdge && cBins == prev) { - // this cut would isolate the missing bin, but we handle those scores separately - break; - } - } - } - - while(true) { - iScore = 0; - do { - FloatCalc updateScore; - if(bUpdateWithHessian) { - updateScore = -CalcNegUpdate(static_cast(aGradientPair[iScore].m_sumGradients), - static_cast(aGradientPair[iScore].GetHess()), - regAlpha, - regLambda, - deltaStepMax); - } else { - updateScore = -CalcNegUpdate(static_cast(aGradientPair[iScore].m_sumGradients), - static_cast(pTreeNode->GetBin()->GetWeight()), - regAlpha, - regLambda, - deltaStepMax); - } - - *pUpdateScore = static_cast(updateScore); - ++pUpdateScore; - - ++iScore; - } while(cScores != iScore); - if(nullptr == ppBinCur) { - break; - } - EBM_ASSERT(bNominal); - ++ppBinCur; - if(ppBinLast < ppBinCur) { - break; - } - determine_bin:; - const auto* const pBinCur = *ppBinCur; - const size_t iBin = CountBins(pBinCur, aBins, cBytesPerBin); - pUpdateScore = aUpdateScore + iBin * cScores; - } - - break; - } - - pTreeNode = pParent; - - while(true) { - if(nullptr == pTreeNode) { - EBM_ASSERT(cSamplesTotalDebug == cSamplesExpectedDebug); - - EBM_ASSERT(nullptr == pMissingValueTreeNode || nullptr != pMissingBin); - if(nullptr != pMissingBin) { - EBM_ASSERT(bMissing); - - FloatScore hess = static_cast(pMissingBin->GetWeight()); - const auto* pGradientPair = pMissingBin->GetGradientPairs(); - const auto* const pGradientPairEnd = pGradientPair + cScores; - FloatScore* pMissingUpdateScore = aUpdateScore; - do { - if(bUpdateWithHessian) { - hess = static_cast(pGradientPair->GetHess()); - } - FloatCalc updateScore = -CalcNegUpdate(static_cast(pGradientPair->m_sumGradients), - hess, - regAlpha, - regLambda, - deltaStepMax); - - *pMissingUpdateScore = updateScore; - ++pMissingUpdateScore; - - ++pGradientPair; - } while(pGradientPairEnd != pGradientPair); - } - - EBM_ASSERT(bNominal || pSplit == cSlices - 1 + pInnerTermUpdate->GetSplitPointer(iDimension)); - - LOG_0(Trace_Verbose, "Exited Flatten"); - return Error_None; - } - if(!pTreeNode->DECONSTRUCT_IsRightChildTraversal()) { - // we checked earlier that countBins could be converted to a UIntSplit - if(nullptr == ppBinCur) { - EBM_ASSERT(!bNominal); - - while(true) { // not a real loop - if(bMissing) { - if(TermBoostFlags_MissingLow & flags) { - if(1 == iEdge) { - // this cut would isolate the missing bin, but missing already has a cut - break; - } - } else if(TermBoostFlags_MissingHigh & flags) { - EBM_ASSERT(iEdge <= cBins); - if(cBins == iEdge) { - // this cut would isolate the missing bin, but missing already has a cut - break; - } - } - } - - EBM_ASSERT(!IsConvertError(iEdge)); - prev = static_cast(iEdge); - EBM_ASSERT(pSplit < cSlices - 1 + pInnerTermUpdate->GetSplitPointer(iDimension)); - *pSplit = prev; - ++pSplit; - - break; - } - } - pParent = pTreeNode; - pTreeNode = pTreeNode->DECONSTRUCT_TraverseRightAndMark(cBytesPerTreeNode); - break; - } else { - pTreeNode = pTreeNode->DECONSTRUCT_GetParent(); - } - } - } - } + pInnerTermUpdate->SetCountSlices(iDimension, 1); + pInnerTermUpdate->EnsureTensorScoreCapacity(cScores); + return Error_None; } WARNING_POP @@ -1104,8 +853,8 @@ template class PartitionOneDimensionalBoo cSlices, cBins); - EBM_ASSERT(!bMissing || 2 <= pBoosterShell->GetInnerTermUpdate()->GetCountSlices(iDimension)); - EBM_ASSERT(!bMissing || *pBoosterShell->GetInnerTermUpdate()->GetSplitPointer(iDimension) == 1); + //EBM_ASSERT(!bMissing || 2 <= pBoosterShell->GetInnerTermUpdate()->GetCountSlices(iDimension)); + //EBM_ASSERT(!bMissing || *pBoosterShell->GetInnerTermUpdate()->GetSplitPointer(iDimension) == 1); return error; }