Лекция 5: B-деревья (B-trees, k-way merge sort)

of 51/51
Лекция 5: B-деревья (B-trees) Курносов Михаил Георгиевич к.т.н. доцент Кафедры вычислительных систем Сибирский государственный университет телекоммуникаций и информатики http://www.mkurnosov.net
  • date post

    02-Dec-2014
  • Category

    Technology

  • view

    2.324
  • download

    6

Embed Size (px)

description

 

Transcript of Лекция 5: B-деревья (B-trees, k-way merge sort)

  • 1. 5: B- (B-trees) ... http://www.mkurnosov.net
  • 2. 2 (hard disk drive, HDD) (platters), (tracks) / (heads) (Sector) ( ) (Cluster) (Cylinder) ,
  • 3. 3 (seek time) (Revolutions per minute, RPM) (Rotational latency) 5400 rpm 0.011 . 7200 rpm 0.008 . 15 000 rpm 0.004 . ( 10 ) 105
  • 4. (SSD) 4 (Solid state drive, flash drive, SSD) SSD (seek time) 0
  • 5. 5 /
  • 6. 6 (binary search tree), : (key) 32 (char[32], 32 ) (value) (int, 4 ) left right ( 8 ) 16 GiB ?
  • 7. 7 (binary search tree), : (key) 32 (char[32], 32 ) (value) (int, 4 ) left right ( 8 ) 16 GiB ? 32 + 4 + 8 + 8 = 52 16 GiB ( ): 16 * 2^30 / 52 330 382 099
  • 8. 8 1 000 000 000 ? Facebook ( 2013): 1 200 000 000 Google Gmail ( 2012): 425 000 000 ( 2013): 43 000 000 : HDD/SSD-,
  • 9. B- (B-tree) 9 B- (B-tree) , (, HDD/SSD) B- ( ) B- () Rudolf Bayer Edward M. McCreight Boeing Research Labs, USA, 1971 B-: Boeing, Bayer, Bayer R., McCreight E. Organization and Maintenance of Large Ordered Indices // Math. and Information Sciences Report (Boeing Research Labs). 1970, No. 20. Bayer R. Binary B-Trees for Virtual Memory // Workshop on Data Description, Access and Control, 1971.
  • 10. B- (B-tree) 10 B- O(logn), n B- ( ) B- 1 k , k + 1
  • 11. B- (B-tree) 11 key1, key2, , key1000 key1, key2, , key1000 key1, key2, , key1000 key1, key2, , key1000 1 2 1001 key1, key2, , key1000 key1, key2, , key1000 key1, key2, , key1000 1 2 1001 root 3 1 + 1001 + 1001 * 1001 = 1 003 003 1000 + 1001 * 1000 + 1001 * 1001 * 1000 = 1 003 003 000
  • 12. B- (B-tree) 12 B- (B-tree) , : 1) : n , key1, key2, , keyn, key1 key2 keyn leaf, true, n + 1 c1, c2, , cn+1 ( )
  • 13. B- (B-tree) 13 B- (B-tree) , : 2) , : ki ci, k1 key1 k2 key2 keyn kn+1 3) , h
  • 14. B- (B-tree) 14 B- (B-tree) , : 4) , t (minimum degree) B- 1 2t 1 t 1 t 2t 1 2t ( ) (full), 2t 1
  • 15. B- (B-tree) 15 root t 1 t 1 t 1 t 1 t 1 t 1 t 1 t 1 t 1 t 1 t 1 t 1 t 1 t 1 1: # 0: 1 1: 2 2: 2t 3: 2t2
  • 16. 2-3-4- (2-3-4 tree) 16 2-3-4- (2-3-4 tree) t = 2, 2, 3 4
  • 17. B- 17 . B- n 1 t 2 logt((n + 1) / 2) . B- h. B-: 1 , t 1 ( ) 0 () 1 1: 2 ( t 1 ) 2: 2t 3: 2t2 h: 2th-1
  • 18. B- 18 . B- n 1 t 2 logt((n + 1) / 2) . , = 1 + 1 2 + 2 + 22 + 23 + + 21 = = 1 + 2 1 1 + + 2 + + 1 h = 1( 1) 1
  • 19. B- 19 . B- n 1 t 2 logt((n + 1) / 2) . , = 1 + 2 1 1 1 = 2 1 + 1 2 = = + .
  • 20. B- 20 : o B- o DiskRead DiskWrite - ( DiskRead, DiskWrite)
  • 21. (Lookup) 21 1) . k key1 key2 keyn keyi, k keyi 2) k = keyi, ( ) 3) , (DiskRead) ci 4) (leaf = true), ( )
  • 22. (Lookup) 22 function BTreeLookup(node, key) i = 1 while i < n AND key > node.key[i] do i = i + 1 end while if i node.key[i] do i = i + 1 end while if i node.key[i] do i = i + 1 end while if i node.key[i] do i = i + 1 end while if i 1 AND key < node.key[i] do node.key[i + 1] = node.key[i] i = i - 1 end while node.key[i + 1] = key node.n = node.n + 1 DiskWrite(node) else while i > 1 AND key < node.key[i] do i = i - 1 end while i = i + 1 DiskRead(node.c[i]) 34
  • 35. (Insert) 35 if node.c[i].n = 2t 1 then BTreeSplitNode(node.c[i], node, i) if key > node.key[i] then i = i + 1 end if end if node = BTreeInsertNonfull(node.c[i], key) end if return node end function B- ( ): = = ( log ) : = = (log )
  • 36. (Delete) 36 , key key , t 1, B- [CLRS, . 530]
  • 37. B- 37 B+- (B+-tree) B-, , : B*- (B*-tree) B-, ( ) 2/3 , 1/2 B-
  • 38. B- 38 B+-: o NTFS, ReiserFS, NSS, XFS, JFS o IBM DB2, Informix, Microsoft SQL Server, Oracle 8, Sybase ASE, SQLite
  • 39. 2-3-4 B-tree 39 /* Minimum degree of B-tree */ #define T 2 /* 2-3-4 B-tree */ struct btree { int leaf; int nkeys; int *key; int *value; struct btree **child; };
  • 40. B-tree 40 struct btree *btree_create() { struct btree *node; node = malloc(sizeof(*node)); node->leaf = 1; node->nkeys = 0; node->key = malloc(sizeof(*node->key) * 2 * T - 1); node->value = malloc(sizeof(*node->value) * 2 * T - 1); node->child = malloc(sizeof(*node->child) * 2 * T); return node; }
  • 41. B-tree void btree_lookup(struct btree *tree, int key, struct btree **node, int *index) { int i; for (i = 0; i < tree->nkeys && key > tree->key[i]; ) { i++; } if (i < tree->nkeys && key == tree->key[i]) { *node = tree; *index = i; return; } if (!tree->leaf) { /* Disk read tree->child[i] */ btree_lookup(tree, key, node, index); } else { *node = NULL; } } 41
  • 42. B-tree struct btree *btree_insert(struct btree *tree, int key, int value) { struct btree *newroot; if (tree == NULL) { tree = btree_create(); tree->nkeys = 1; tree->key[0] = key; tree->value[0] = value; return tree; } if (tree->nkeys == 2 * T - 1) { newroot = btree_create(); /* Create empty root */ newroot->leaf = 0; newroot->child[0] = tree; btree_split_node(tree, newroot, 0); return btree_insert_nonfull(newroot, key, value); } return btree_insert_nonfull(tree, key, value); } 42
  • 43. B-tree void btree_split_node(struct btree *node, struct btree *parent, int index) { struct btree *z; int i; z = btree_create(); z->leaf = node->leaf; z->nkeys = T - 1; for (i = 0; i < T - 1; i++) { z->key[i] = node->key[T + i]; z->value[i] = node->value[T + i]; } if (!node->leaf) { for (i = 0; i < T; i++) z->child[i] = node->child[i + T]; } node->nkeys = T - 1; 43
  • 44. B-tree /* Insert median key into parent node */ for (i = parent->nkeys; i >= 0 && i child[i + 1] = parent->child[i]; parent->child[index + 1] = z; for (i = parent->nkeys - 1; i >= 0 && i key[i + 1] = parent->key[i]; parent->value[i + 1] = parent->value[i]; } parent->key[index] = node->key[T - 1]; parent->value[index] = node->value[T - 1]; parent->nkeys++; /* Write to disk: node, z, parent */ } 44
  • 45. B-tree struct btree *btree_insert_nonfull( struct btree *node, int key, int value) { int i; i = node->nkeys; if (node->leaf) { for (i = node->nkeys - 1; i > 0 && key < node->key[i]; i--) { node->key[i + 1] = node->key[i]; } node->key[i + 1] = key; node->nkeys++; } else { 45
  • 46. B-tree for (i = node->nkeys - 1; i > 0 && key < node->key[i]; ) { i--; } i++; if (node->child[i]->nkeys == 2 * T - 1) { btree_split_node(node->child[i], node, i); if (key > node->key[i]) i++; } node = btree_insert_nonfull(node->child[i], key, value); } return node; } 46
  • 47. B-tree int main() { struct btree *tree; tree = btree_insert(NULL, 3, 0); tree = btree_insert(tree, 12, 0); tree = btree_insert(tree, 9, 0); tree = btree_insert(tree, 18, 0); return 0; } 47
  • 48. 48 (External sorting) , 300 GiB 2 GiB ?
  • 49. 49 (External sorting) , 300 GiB 2 GiB ? 1. 2 GiB 2. (MergeSort, QuickSort, CountingSort, ) () 3. 1 2, 300 / 2 = 150 4. 151 13 KiB (2 GiB / 151 13 KiB) 5. 150 13 KiB , 151- 6. (merge) 150 7. 5 6 150
  • 50. 50 (External sorting) , 300 GiB 2 GiB ? 1. 2 GiB 2. (MergeSort, QuickSort, CountingSort, ) () 3. 1 2, 300 / 2 = 150 4. 151 13 KiB (2 GiB / 151 13 KiB) 5. 150 13 KiB , 151- 6. (merge) 150 7. 5 6 150 k-way merge sort k = 300 GiB / 2 GiB = 150 150-way merge sort
  • 51. 51 B- (key) (value) t, Btrfs B- (MySQL, PostgreSQL, Microsoft SQL Server, IBM DB2 .): , (key) (value)