1 /** 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.kahadb.index; 18 19 import java.io.DataInput; 20 import java.io.DataOutput; 21 import java.io.IOException; 22 import java.io.PrintWriter; 23 import java.util.Arrays; 24 import java.util.Iterator; 25 import java.util.Map; 26 import java.util.NoSuchElementException; 27 import java.util.Map.Entry; 28 29 import org.apache.kahadb.index.BTreeIndex.Prefixer; 30 import org.apache.kahadb.page.Page; 31 import org.apache.kahadb.page.Transaction; 32 import org.apache.kahadb.util.VariableMarshaller; 33 34 35 /** 36 * The BTreeNode class represents a node in the BTree object graph. It is stored in 37 * one Page of a PageFile. 38 */ 39 public final class BTreeNode<Key,Value> { 40 41 // The index that this node is part of. 42 private final BTreeIndex<Key,Value> index; 43 // The parent node or null if this is the root node of the BTree 44 private BTreeNode<Key,Value> parent; 45 // The page associated with this node 46 private Page<BTreeNode<Key,Value>> page; 47 48 // Order list of keys in the node 49 private Key[] keys; 50 // Values associated with the Keys. Null if this is a branch node. 51 private Value[] values; 52 // nodeId pointers to children BTreeNodes. Null if this is a leaf node. 53 private long[] children; 54 // The next leaf node after this one. Used for fast iteration of the entries. 55 private long next = -1; 56 57 private final class KeyValueEntry implements Map.Entry<Key, Value> { 58 private final Key key; 59 private final Value value; 60 61 public KeyValueEntry(Key key, Value value) { 62 this.key = key; 63 this.value = value; 64 } 65 66 public Key getKey() { 67 return key; 68 } 69 70 public Value getValue() { 71 return value; 72 } 73 74 public Value setValue(Value value) { 75 throw new UnsupportedOperationException(); 76 } 77 78 } 79 80 private final class BTreeIterator implements Iterator<Map.Entry<Key, Value>> { 81 82 private final Transaction tx; 83 BTreeNode<Key,Value> current; 84 int nextIndex; 85 Map.Entry<Key,Value> nextEntry; 86 87 private BTreeIterator(Transaction tx, BTreeNode<Key,Value> current, int nextIndex) { 88 this.tx = tx; 89 this.current = current; 90 this.nextIndex=nextIndex; 91 } 92 93 synchronized private void findNextPage() { 94 if( nextEntry!=null ) { 95 return; 96 } 97 98 try { 99 while( current!=null ) { 100 if( nextIndex >= current.keys.length ) { 101 // we need to roll to the next leaf.. 102 if( current.next >= 0 ) { 103 current = index.loadNode(tx, current.next, null); 104 nextIndex=0; 105 } else { 106 break; 107 } 108 } else { 109 nextEntry = new KeyValueEntry(current.keys[nextIndex], current.values[nextIndex]); 110 nextIndex++; 111 break; 112 } 113 114 } 115 } catch (IOException e) { 116 } 117 } 118 119 public boolean hasNext() { 120 findNextPage(); 121 return nextEntry !=null; 122 } 123 124 public Entry<Key, Value> next() { 125 findNextPage(); 126 if( nextEntry !=null ) { 127 Entry<Key, Value> lastEntry = nextEntry; 128 nextEntry=null; 129 return lastEntry; 130 } else { 131 throw new NoSuchElementException(); 132 } 133 } 134 135 public void remove() { 136 throw new UnsupportedOperationException(); 137 } 138 } 139 140 /** 141 * The Marshaller is used to store and load the data in the BTreeNode into a Page. 142 * 143 * @param <Key> 144 * @param <Value> 145 */ 146 static public class Marshaller<Key,Value> extends VariableMarshaller<BTreeNode<Key,Value>> { 147 private final BTreeIndex<Key,Value> index; 148 149 public Marshaller(BTreeIndex<Key,Value> index) { 150 this.index = index; 151 } 152 153 public void writePayload(BTreeNode<Key,Value> node, DataOutput os) throws IOException { 154 // Write the keys 155 short count = (short)node.keys.length; // cast may truncate value... 156 if( count != node.keys.length ) { 157 throw new IOException("Too many keys"); 158 } 159 160 os.writeShort(count); 161 for (int i = 0; i < node.keys.length; i++) { 162 index.getKeyMarshaller().writePayload(node.keys[i], os); 163 } 164 165 if( node.isBranch() ) { 166 // If this is a branch... 167 os.writeBoolean(true); 168 for (int i = 0; i < count+1; i++) { 169 os.writeLong(node.children[i]); 170 } 171 172 } else { 173 // If this is a leaf 174 os.writeBoolean(false); 175 for (int i = 0; i < count; i++) { 176 index.getValueMarshaller().writePayload(node.values[i], os); 177 } 178 os.writeLong(node.next); 179 } 180 } 181 182 @SuppressWarnings("unchecked") 183 public BTreeNode<Key,Value> readPayload(DataInput is) throws IOException { 184 BTreeNode<Key,Value> node = new BTreeNode<Key,Value>(index); 185 int count = is.readShort(); 186 187 node.keys = (Key[])new Object[count]; 188 for (int i = 0; i < count; i++) { 189 node.keys[i] = index.getKeyMarshaller().readPayload(is); 190 } 191 192 if( is.readBoolean() ) { 193 node.children = new long[count+1]; 194 for (int i = 0; i < count+1; i++) { 195 node.children[i] = is.readLong(); 196 } 197 } else { 198 node.values = (Value[])new Object[count]; 199 for (int i = 0; i < count; i++) { 200 node.values[i] = index.getValueMarshaller().readPayload(is); 201 } 202 node.next = is.readLong(); 203 } 204 return node; 205 } 206 } 207 208 public BTreeNode(BTreeIndex<Key,Value> index) { 209 this.index = index; 210 } 211 212 public void setEmpty() { 213 setLeafData(createKeyArray(0), createValueArray(0)); 214 } 215 216 217 /** 218 * Internal (to the BTreeNode) method. Because this method is called only by 219 * BTreeNode itself, no synchronization done inside of this method. 220 * @throws IOException 221 */ 222 private BTreeNode<Key,Value> getChild(Transaction tx, int idx) throws IOException { 223 if (isBranch() && idx >= 0 && idx < children.length) { 224 BTreeNode<Key, Value> result = this.index.loadNode(tx, children[idx], this); 225 return result; 226 } else { 227 return null; 228 } 229 } 230 231 public Value remove(Transaction tx, Key key) throws IOException { 232 233 if(isBranch()) { 234 int idx = Arrays.binarySearch(keys, key); 235 idx = idx < 0 ? -(idx + 1) : idx + 1; 236 BTreeNode<Key, Value> child = getChild(tx, idx); 237 if( child.getPageId() == index.getPageId() ) { 238 throw new IOException("BTree corrupted: Cylce detected."); 239 } 240 Value rc = child.remove(tx, key); 241 242 // child node is now empty.. remove it from the branch node. 243 if( child.keys.length == 0 ) { 244 245 // If the child node is a branch, promote 246 if( child.isBranch() ) { 247 // This is cause branches are never really empty.. they just go down to 1 child.. 248 children[idx] = child.children[0]; 249 } else { 250 251 // The child was a leaf. Then we need to actually remove it from this branch node.. 252 253 // We need to update the previous child's next pointer to skip over the child being removed.... 254 if( idx > 0 && children.length > 1) { 255 BTreeNode<Key, Value> previousChild = getChild(tx, idx-1); 256 previousChild.next = child.next; 257 index.storeNode(tx, previousChild, true); 258 } 259 260 if( idx < children.length-1 ) { 261 // Delete it and key to the right. 262 setBranchData(arrayDelete(keys, idx), arrayDelete(children, idx)); 263 } else { 264 // It was the last child.. Then delete it and key to the left 265 setBranchData(arrayDelete(keys, idx-1), arrayDelete(children, idx)); 266 } 267 268 // If we are the root node, and only have 1 child left. Then 269 // make the root be the leaf node. 270 if( children.length == 1 && parent==null ) { 271 child = getChild(tx, 0); 272 keys = child.keys; 273 children = child.children; 274 values = child.values; 275 // free up the page.. 276 tx.free(child.getPage()); 277 } 278 279 } 280 index.storeNode(tx, this, true); 281 } 282 283 return rc; 284 } else { 285 int idx = Arrays.binarySearch(keys, key); 286 if (idx < 0) { 287 return null; 288 } else { 289 Value oldValue = values[idx]; 290 setLeafData(arrayDelete(keys, idx), arrayDelete(values, idx)); 291 292 if( keys.length==0 && parent!=null) { 293 tx.free(getPage()); 294 } else { 295 index.storeNode(tx, this, true); 296 } 297 298 return oldValue; 299 } 300 } 301 } 302 303 public Value put(Transaction tx, Key key, Value value) throws IOException { 304 if (key == null) { 305 throw new IllegalArgumentException("Key cannot be null"); 306 } 307 308 if( isBranch() ) { 309 return getLeafNode(tx, this, key).put(tx, key, value); 310 } else { 311 int idx = Arrays.binarySearch(keys, key); 312 313 Value oldValue=null; 314 if (idx >= 0) { 315 // Key was found... Overwrite 316 oldValue = values[idx]; 317 values[idx] = value; 318 setLeafData(keys, values); 319 } else { 320 // Key was not found, Insert it 321 idx = -(idx + 1); 322 setLeafData(arrayInsert(keys, key, idx), arrayInsert(values, value, idx)); 323 } 324 325 try { 326 index.storeNode(tx, this, allowOverflow()); 327 } catch ( Transaction.PageOverflowIOException e ) { 328 // If we get an overflow 329 split(tx); 330 } 331 332 return oldValue; 333 } 334 } 335 336 private void promoteValue(Transaction tx, Key key, long nodeId) throws IOException { 337 338 int idx = Arrays.binarySearch(keys, key); 339 idx = idx < 0 ? -(idx + 1) : idx + 1; 340 setBranchData(arrayInsert(keys, key, idx), arrayInsert(children, nodeId, idx + 1)); 341 342 try { 343 index.storeNode(tx, this, allowOverflow()); 344 } catch ( Transaction.PageOverflowIOException e ) { 345 split(tx); 346 } 347 348 } 349 350 /** 351 * Internal to the BTreeNode method 352 */ 353 private void split(Transaction tx) throws IOException { 354 Key[] leftKeys; 355 Key[] rightKeys; 356 Value[] leftValues=null; 357 Value[] rightValues=null; 358 long[] leftChildren=null; 359 long[] rightChildren=null; 360 Key separator; 361 362 int vc = keys.length; 363 int pivot = vc / 2; 364 365 // Split the node into two nodes 366 if( isBranch() ) { 367 368 leftKeys = createKeyArray(pivot); 369 leftChildren = new long[leftKeys.length + 1]; 370 rightKeys = createKeyArray(vc - (pivot + 1)); 371 rightChildren = new long[rightKeys.length + 1]; 372 373 System.arraycopy(keys, 0, leftKeys, 0, leftKeys.length); 374 System.arraycopy(children, 0, leftChildren, 0, leftChildren.length); 375 System.arraycopy(keys, leftKeys.length + 1, rightKeys, 0, rightKeys.length); 376 System.arraycopy(children, leftChildren.length, rightChildren, 0, rightChildren.length); 377 378 // Is it a Simple Prefix BTree?? 379 Prefixer<Key> prefixer = index.getPrefixer(); 380 if(prefixer!=null) { 381 separator = prefixer.getSimplePrefix(leftKeys[leftKeys.length - 1], rightKeys[0]); 382 } else { 383 separator = keys[leftKeys.length]; 384 } 385 386 387 } else { 388 389 leftKeys = createKeyArray(pivot); 390 leftValues = createValueArray(leftKeys.length); 391 rightKeys = createKeyArray(vc - pivot); 392 rightValues = createValueArray(rightKeys.length); 393 394 System.arraycopy(keys, 0, leftKeys, 0, leftKeys.length); 395 System.arraycopy(values, 0, leftValues, 0, leftValues.length); 396 System.arraycopy(keys, leftKeys.length, rightKeys, 0, rightKeys.length); 397 System.arraycopy(values, leftValues.length, rightValues, 0, rightValues.length); 398 399 // separator = getSeparator(leftVals[leftVals.length - 1], 400 // rightVals[0]); 401 separator = rightKeys[0]; 402 403 } 404 405 // Promote the pivot to the parent branch 406 if (parent == null) { 407 408 // This can only happen if this is the root 409 BTreeNode<Key,Value> rNode = this.index.createNode(tx, this); 410 BTreeNode<Key,Value> lNode = this.index.createNode(tx, this); 411 412 if( isBranch() ) { 413 rNode.setBranchData(rightKeys, rightChildren); 414 lNode.setBranchData(leftKeys, leftChildren); 415 } else { 416 rNode.setLeafData(rightKeys, rightValues); 417 lNode.setLeafData(leftKeys, leftValues); 418 lNode.setNext(rNode.getPageId()); 419 } 420 421 Key[] v = createKeyArray(1); 422 v[0]=separator; 423 setBranchData(v, new long[] { lNode.getPageId(), rNode.getPageId() }); 424 425 index.storeNode(tx, this, true); 426 index.storeNode(tx, rNode, true); 427 index.storeNode(tx, lNode, true); 428 429 } else { 430 BTreeNode<Key,Value> rNode = this.index.createNode(tx, parent); 431 432 if( isBranch() ) { 433 setBranchData(leftKeys, leftChildren); 434 rNode.setBranchData(rightKeys, rightChildren); 435 } else { 436 rNode.setNext(next); 437 next = rNode.getPageId(); 438 setLeafData(leftKeys, leftValues); 439 rNode.setLeafData(rightKeys, rightValues); 440 } 441 442 index.storeNode(tx, this, true); 443 index.storeNode(tx, rNode, true); 444 parent.promoteValue(tx, separator, rNode.getPageId()); 445 } 446 } 447 448 public void printStructure(Transaction tx, PrintWriter out, String prefix) throws IOException { 449 if( prefix.length()>0 && parent == null ) { 450 throw new IllegalStateException("Cycle back to root node detected."); 451 } 452 453 if( isBranch() ) { 454 for(int i=0 ; i < children.length; i++) { 455 BTreeNode<Key, Value> child = getChild(tx, i); 456 if( i == children.length-1) { 457 out.println(prefix+"\\- "+child.getPageId()+(child.isBranch()?" ("+child.children.length+")":"")); 458 child.printStructure(tx, out, prefix+" "); 459 } else { 460 out.println(prefix+"|- "+child.getPageId()+(child.isBranch()?" ("+child.children.length+")":"")+" : "+keys[i]); 461 child.printStructure(tx, out, prefix+" "); 462 } 463 } 464 } 465 } 466 467 468 public int getMinLeafDepth(Transaction tx, int depth) throws IOException { 469 depth++; 470 if( isBranch() ) { 471 int min = Integer.MAX_VALUE; 472 for(int i=0 ; i < children.length; i++) { 473 min = Math.min(min, getChild(tx, i).getMinLeafDepth(tx, depth)); 474 } 475 return min; 476 } else { 477 // print(depth*2, "- "+page.getPageId()); 478 return depth; 479 } 480 } 481 482 public int getMaxLeafDepth(Transaction tx, int depth) throws IOException { 483 depth++; 484 if( isBranch() ) { 485 int v = 0; 486 for(int i=0 ; i < children.length; i++) { 487 v = Math.max(v, getChild(tx, i).getMaxLeafDepth(tx, depth)); 488 } 489 depth = v; 490 } 491 return depth; 492 } 493 494 public Value get(Transaction tx, Key key) throws IOException { 495 if (key == null) { 496 throw new IllegalArgumentException("Key cannot be null"); 497 } 498 if( isBranch() ) { 499 return getLeafNode(tx, this, key).get(tx, key); 500 } else { 501 int idx = Arrays.binarySearch(keys, key); 502 if (idx < 0) { 503 return null; 504 } else { 505 return values[idx]; 506 } 507 } 508 } 509 510 public boolean isEmpty(final Transaction tx) throws IOException { 511 return keys.length==0; 512 } 513 514 public void visit(Transaction tx, BTreeVisitor<Key, Value> visitor) throws IOException { 515 if (visitor == null) { 516 throw new IllegalArgumentException("Visitor cannot be null"); 517 } 518 if( isBranch() ) { 519 for(int i=0; i < this.children.length; i++) { 520 Key key1 = null; 521 if( i!=0 ) { 522 key1 = keys[i-1]; 523 } 524 Key key2 = null; 525 if( i!=this.children.length-1 ) { 526 key2 = keys[i]; 527 } 528 if( visitor.isInterestedInKeysBetween(key1, key2) ) { 529 BTreeNode<Key, Value> child = getChild(tx, i); 530 child.visit(tx, visitor); 531 } 532 } 533 } else { 534 visitor.visit(Arrays.asList(keys), Arrays.asList(values)); 535 } 536 } 537 538 public Map.Entry<Key,Value> getFirst(Transaction tx) throws IOException { 539 BTreeNode<Key, Value> node = this; 540 while( node .isBranch() ) { 541 node = node.getChild(tx, 0); 542 } 543 if( node.values.length>0 ) { 544 return new KeyValueEntry(node.keys[0], node.values[0]); 545 } else { 546 return null; 547 } 548 } 549 550 public Map.Entry<Key,Value> getLast(Transaction tx) throws IOException { 551 BTreeNode<Key, Value> node = this; 552 while( node.isBranch() ) { 553 node = node.getChild(tx, node.children.length-1); 554 } 555 if( node.values.length>0 ) { 556 int idx = node.values.length-1; 557 return new KeyValueEntry(node.keys[idx], node.values[idx]); 558 } else { 559 return null; 560 } 561 } 562 563 public BTreeNode<Key,Value> getFirstLeafNode(Transaction tx) throws IOException { 564 BTreeNode<Key, Value> node = this; 565 while( node .isBranch() ) { 566 node = node.getChild(tx, 0); 567 } 568 return node; 569 } 570 571 public Iterator<Map.Entry<Key,Value>> iterator(final Transaction tx, Key startKey) throws IOException { 572 if (startKey == null) { 573 return iterator(tx); 574 } 575 if( isBranch() ) { 576 return getLeafNode(tx, this, startKey).iterator(tx, startKey); 577 } else { 578 int idx = Arrays.binarySearch(keys, startKey); 579 if (idx < 0) { 580 idx = -(idx + 1); 581 } 582 return new BTreeIterator(tx, this, idx); 583 } 584 } 585 586 public Iterator<Map.Entry<Key,Value>> iterator(final Transaction tx) throws IOException { 587 return new BTreeIterator(tx, getFirstLeafNode(tx), 0); 588 } 589 590 public void clear(Transaction tx) throws IOException { 591 if( isBranch() ) { 592 for (int i = 0; i < children.length; i++) { 593 BTreeNode<Key, Value> node = index.loadNode(tx, children[i], this); 594 node.clear(tx); 595 tx.free(node.getPage()); 596 } 597 } 598 // Reset the root node to be a leaf. 599 if( parent == null ) { 600 setLeafData(createKeyArray(0), createValueArray(0)); 601 next=-1; 602 index.storeNode(tx, this, true); 603 } 604 } 605 606 607 private static <Key,Value> BTreeNode<Key, Value> getLeafNode(Transaction tx, final BTreeNode<Key, Value> node, Key key) throws IOException { 608 BTreeNode<Key, Value> current = node; 609 while( true ) { 610 if( current.isBranch() ) { 611 int idx = Arrays.binarySearch(current.keys, key); 612 idx = idx < 0 ? -(idx + 1) : idx + 1; 613 BTreeNode<Key, Value> child = current.getChild(tx, idx); 614 615 // A little cycle detection for sanity's sake 616 if( child == node ) { 617 throw new IOException("BTree corrupted: Cylce detected."); 618 } 619 620 current = child; 621 } else { 622 break; 623 } 624 } 625 return current; 626 } 627 628 public boolean contains(Transaction tx, Key key) throws IOException { 629 if (key == null) { 630 throw new IllegalArgumentException("Key cannot be null"); 631 } 632 633 if( isBranch() ) { 634 return getLeafNode(tx, this, key).contains(tx, key); 635 } else { 636 int idx = Arrays.binarySearch(keys, key); 637 if (idx < 0) { 638 return false; 639 } else { 640 return true; 641 } 642 } 643 } 644 645 /////////////////////////////////////////////////////////////////// 646 // Implementation methods 647 /////////////////////////////////////////////////////////////////// 648 649 650 private boolean allowOverflow() { 651 // Only allow page overflow if there are <= 3 keys in the node. Otherwise a split will occur on overflow 652 return this.keys.length<=3; 653 } 654 655 656 private void setLeafData(Key[] keys, Value[] values) { 657 this.keys = keys; 658 this.values = values; 659 this.children = null; 660 } 661 662 private void setBranchData(Key[] keys, long[] nodeIds) { 663 this.keys = keys; 664 this.children = nodeIds; 665 this.values = null; 666 } 667 668 @SuppressWarnings("unchecked") 669 private Key[] createKeyArray(int size) { 670 return (Key[])new Object[size]; 671 } 672 673 @SuppressWarnings("unchecked") 674 private Value[] createValueArray(int size) { 675 return (Value[])new Object[size]; 676 } 677 678 @SuppressWarnings("unchecked") 679 static private <T> T[] arrayDelete(T[] vals, int idx) { 680 T[] newVals = (T[])new Object[vals.length - 1]; 681 if (idx > 0) { 682 System.arraycopy(vals, 0, newVals, 0, idx); 683 } 684 if (idx < newVals.length) { 685 System.arraycopy(vals, idx + 1, newVals, idx, newVals.length - idx); 686 } 687 return newVals; 688 } 689 690 static private long[] arrayDelete(long[] vals, int idx) { 691 long[] newVals = new long[vals.length - 1]; 692 if (idx > 0) { 693 System.arraycopy(vals, 0, newVals, 0, idx); 694 } 695 if (idx < newVals.length) { 696 System.arraycopy(vals, idx + 1, newVals, idx, newVals.length - idx); 697 } 698 return newVals; 699 } 700 701 @SuppressWarnings("unchecked") 702 static private <T> T[] arrayInsert(T[] vals, T val, int idx) { 703 T[] newVals = (T[])new Object[vals.length + 1]; 704 if (idx > 0) { 705 System.arraycopy(vals, 0, newVals, 0, idx); 706 } 707 newVals[idx] = val; 708 if (idx < vals.length) { 709 System.arraycopy(vals, idx, newVals, idx + 1, vals.length - idx); 710 } 711 return newVals; 712 } 713 714 715 static private long[] arrayInsert(long[] vals, long val, int idx) { 716 717 long[] newVals = new long[vals.length + 1]; 718 if (idx > 0) { 719 System.arraycopy(vals, 0, newVals, 0, idx); 720 } 721 newVals[idx] = val; 722 if (idx < vals.length) { 723 System.arraycopy(vals, idx, newVals, idx + 1, vals.length - idx); 724 } 725 return newVals; 726 } 727 728 /////////////////////////////////////////////////////////////////// 729 // Property Accessors 730 /////////////////////////////////////////////////////////////////// 731 private boolean isBranch() { 732 return children!=null; 733 } 734 735 public long getPageId() { 736 return page.getPageId(); 737 } 738 739 public BTreeNode<Key, Value> getParent() { 740 return parent; 741 } 742 743 public void setParent(BTreeNode<Key, Value> parent) { 744 this.parent = parent; 745 } 746 747 public Page<BTreeNode<Key, Value>> getPage() { 748 return page; 749 } 750 751 public void setPage(Page<BTreeNode<Key, Value>> page) { 752 this.page = page; 753 } 754 755 public long getNext() { 756 return next; 757 } 758 759 public void setNext(long next) { 760 this.next = next; 761 } 762 763 @Override 764 public String toString() { 765 return "[BTreeNode "+(isBranch()?"branch":"leaf")+": "+Arrays.asList(keys)+"]"; 766 } 767 768 } 769 770