插入元素后,若叶子超过了容量 M,则发生分裂;分裂是向上传递的,如果非叶节点的元素超过了 M - 1,也会发生分裂
// T(M, N) = O((M / log(M)) * log(N)) Btree Insert( ElementType X, Btree T ) { Search from root to leaf for X and find the proper leaf node; Insert X; while ( this node has M+1 keys ) { // T = O(M) split it into 2 nodes with(M+1)/2 and(M+1)/2 keys, respectively; if (this node is the root) create a new root with two children; check its parent; } }
M 最优取值为 3 或 4
删除
删除元素后,若叶子的实际元素数量少于了 M / 2,则发生合并,合并也是向上传递的
复杂度
Depth(M,N)=O(logM/2+1N)
TFind(M,N)=O(logN)
Inverted File Index
从搜索引擎开始,当我们在搜索引擎中输入一个字符串 S 的时候,到底发生了什么?
引入
Sol. 1
一篇一篇找,太烂了
Sol. 2 Term-Document Incidence Matrix
以页面序号为 col,单词为 row,构建矩阵
矩阵过于稀疏,空间浪费了!
Sol. 3 Compact Version - Inverted File Index
倒排索引的思路,某个单词 T 对应一个列表 posting list,储存包含 T 的所有页面的指针
索引生成
while ( read a document D ) { while ( read a term T in D ) { if ( Find( Dictionary, T ) == false ) Insert( Dictionary, T ); Get T’s posting list; Insert a node to T’s posting list; } } Write the inverted index to disk;
对斜堆进行均摊分析,定义势能函数 Φ(Di) 为 heavy 节点的数量;定义 heavy 节点 X 为:X 的右子树的后代数量不小于 X 的后代的一半(X 的后代包括 X 自己)
Tamortized=O(logN)
Binomial Queue
二项队列是二项树的集合
二项树
记高度为 k 的二项树为 Bk
B0 是只有一个节点的树
Bk 是两个 Bk−1 的组合,其中一个是另一个的子树
Bk 的根有 k 个孩子,分别是 B0,B1,...,Bk−1,Bk Bk 有 2k 个节点,深度为 d 的节点数量是 Ckd
性质
二项队列中不会出现 2+ 个相同高度的二项树
具有 n 个元素的二项队列的结构是一定的,就是 n 的二进制表示
操作
FindMin
由于最小值一定出现在根上,对根遍历即可
T(N)=O(logN)
Merge
从小到大依次合并,Bk 和 Bk 合并成 Bk+1
T(N)=O(logN)
Insertion
特殊的合并,向初始为空的二项队列进行 N 次插入,总共需要 O(N) 的消耗
实现
typedefstructBinNode *Position; typedefstructCollection *BinQueue; typedefstructBinNode *BinTree;/* missing from p.176 */
structBinNode { ElementType Element; Position LeftChild; Position NextSibling; } ;
structCollection { int CurrentSize; /* total number of nodes */ BinTree TheTrees[ MaxTrees ]; } ;
BinTree CombineTrees( BinTree T1, BinTree T2 ) { /* merge equal-sized T1 and T2 */ if ( T1->Element > T2->Element ) /* attach the larger one to the smaller one */ return CombineTrees( T2, T1 ); /* insert T2 to the front of the children list of T1 */ T2->NextSibling = T1->LeftChild; T1->LeftChild = T2; return T1; }
ElementType DeleteMin( BinQueue H ) { BinQueue DeletedQueue; Position DeletedTree, OldRoot; ElementType MinItem = Infinity; /* the minimum item to be returned */ int i, j, MinTree; /* MinTree is the index of the tree with the minimum item */
if ( IsEmpty( H ) ) { PrintErrorMessage(); return –Infinity; }
for ( i = 0; i < MaxTrees; i++) { /* Step 1: find the minimum item */ if( H->TheTrees[i] && H->TheTrees[i]->Element < MinItem ) { MinItem = H->TheTrees[i]->Element; MinTree = i; } /* end if */ } /* end for-i-loop */ DeletedTree = H->TheTrees[ MinTree ]; H->TheTrees[ MinTree ] = NULL; /* Step 2: remove the MinTree from H => H’ */ OldRoot = DeletedTree; /* Step 3.1: remove the root */ DeletedTree = DeletedTree->LeftChild; free(OldRoot); DeletedQueue = Initialize(); /* Step 3.2: create H” */ DeletedQueue->CurrentSize = ( 1<<MinTree ) – 1; /* 2^MinTree – 1 */ for ( j = MinTree – 1; j >= 0; j – – ) { DeletedQueue->TheTrees[j] = DeletedTree; DeletedTree = DeletedTree->NextSibling; DeletedQueue->TheTrees[j]->NextSibling = NULL; } /* end for-j-loop */ H->CurrentSize – = DeletedQueue->CurrentSize + 1; H = Merge( H, DeletedQueue ); /* Step 4: merge H’ and H” */ return MinItem; }
给定 N 个点和 N(N - 1) / 2 个距离组成的距离集 S,要求在 x 轴上部署这些点,使得点与点的距离集和给定的距离相同
解决方法
x1=0;xN=max{S}
找到 S 中目前最大的距离,放置点(2 种方法,靠近 x1 或 xN)
进行检查,检查成功则从 S 中删去检查出来的距离并回到 2;否则回溯
代码
boolReconstruct( DistType X[ ], DistSet D, int N, int left, int right ) { /* X[1]...X[left-1] and X[right+1]...X[N] are solved */ bool Found = false; if ( Is_Empty( D ) ) returntrue; /* solved */ D_max = Find_Max( D ); /* option 1:X[right] = D_max */ /* check if |D_max-X[i]|D is true for all X[i]’s that have been solved */ OK = Check( D_max, N, left, right ); /* pruning */ if ( OK ) { /* add X[right] and update D */ X[right] = D_max; for ( i=1; i<left; i++ ) Delete( |X[right]-X[i]|, D); for ( i=right+1; i<=N; i++ ) Delete( |X[right]-X[i]|, D); Found = Reconstruct ( X, D, N, left, right-1 ); if ( !Found ) { /* if does not work, undo */ for ( i=1; i<left; i++ ) Insert( |X[right]-X[i]|, D); for ( i=right+1; i<=N; i++ ) Insert( |X[right]-X[i]|, D); } } /* finish checking option 1 */ if ( !Found ) { /* if option 1 does not work */ /* option 2: X[left] = X[N]-D_max */ OK = Check( X[N]-D_max, N, left, right ); if ( OK ) { X[left] = X[N] – D_max; for ( i=1; i<left; i++ ) Delete( |X[left]-X[i]|, D); for ( i=right+1; i<=N; i++ ) Delete( |X[left]-X[i]|, D); Found = Reconstruct (X, D, N, left+1, right ); if ( !Found ) { for ( i=1; i<left; i++ ) Insert( |X[left]-X[i]|, D); for ( i=right+1; i<=N; i++ ) Insert( |X[left]-X[i]|, D); } } /* finish checking option 2 */ } /* finish checking all the options */ return Found; }
模板
boolBacktracking( int i ) { Found = false; if ( i > N ) returntrue; /* solved with (x1, …, xN) */ for ( each xi Si ) { /* check if satisfies the restriction R */ OK = Check((x1, …, xi) , R ); /* pruning */ if ( OK ) { Count xi in; Found = Backtracking( i+1 ); if ( !Found ) Undo( i ); /* recover to (x1, …, xi-1) */ } if ( Found ) break; } return Found; }
回溯的效率跟 S 的规模、约束函数的复杂性、满足约束条件的结点数相关。
约束函数决定了剪枝的效率,但是如果函数本身太复杂也未必合算。
满足约束条件的结点数最难估计,使得复杂度分析很难完成。
如果 S 有不同的大小,更小的 S 优先
井字棋 Tic-tac-toe: Minimax Strategy
代入电脑,尝试理解电脑是如何从 30 多万种可能性中找到胜利的下法的
定义对于某个位置 P 的估计性质的胜率函数 f
f(P)=WC−WH
W:可能取得胜利的数量(统计所有未下的棋子,下下去找就行了)
C:computer
H:human
computer 希望 f 增大;human 希望 f 减小
α−β Prunninng
以 max 为例,假如已知选择路线 1 的 min 分数为 30,在搜索后继的路线 2 时,发现存在分数为 20 的情况,那么路线 2 无需继续搜索(进行剪枝),因为下一次会走 min 选择,路线 2 的实际分数一定不大于 20
intFibonacci( int N ) { int i, Last, NextToLast, Answer; if ( N <= 1 ) return1; Last = NextToLast = 1; /* F(0) = F(1) = 1 */ for ( i = 2; i <= N; i++ ) { Answer = Last + NextToLast; /* F(i) = F(i-1) + F(i-2) */ NextToLast = Last; Last = Answer; /* update F(i-1) and F(i-2) */ } /* end-for */ return Answer; }
Ordering Matrix Multiplications
对于连续的矩阵相乘,寻找一个计算量最小的顺序
令 bn 为连续 n 个矩阵相乘的顺序数量
bn=i=1∑n−1bibn−1
⇒bn=O(nn4n)
/* r contains number of columns for each of the N matrices */ /* r[ 0 ] is the number of rows in matrix 1 */ /* Minimum number of multiplications is left in M[ 1 ][ N ] */ voidOptMatrix( constlong r[ ], int N, TwoDimArray M ) { int i, j, k, L; long ThisM; for( i = 1; i <= N; i++ ) M[ i ][ i ] = 0; for( k = 1; k < N; k++ ) /* k = j - i */ for( i = 1; i <= N - k; i++ ) { /* For each position */ j = i + k; M[ i ][ j ] = Infinity; for( L = i; L < j; L++ ) { ThisM = M[ i ][ L ] + M[ L + 1 ][ j ] + r[ i - 1 ] * r[ L ] * r[ j ]; if ( ThisM < M[ i ][ j ] ) /* Update min */ M[ i ][ j ] = ThisM; } /* end for-L */ } /* end for-Left */ }
/* A[ ] contains the adjacency matrix with A[ i ][ i ] = 0 */ /* D[ ] contains the values of the shortest path */ /* N is the number of vertices */ /* A negative cycle exists iff D[ i ][ i ] < 0 */ voidAllPairs( TwoDimArray A, TwoDimArray D, int N ) { int i, j, k; for ( i = 0; i < N; i++ ) /* Initialize D */ for( j = 0; j < N; j++ ) D[ i ][ j ] = A[ i ][ j ]; for( k = 0; k < N; k++ ) /* add one vertex k into the path */ for( i = 0; i < N; i++ ) for( j = 0; j < N; j++ ) if( D[ i ][ k ] + D[ k ][ j ] < D[ i ][ j ] ) /* Update shortest path */ D[ i ][ j ] = D[ i ][ k ] + D[ k ][ j ]; }