Chris Okasaki’s “Purely functional data structures”.

February 19, 2010

Jon Harrop, of Flying Frog Consultancy have posted a comment that I should read Chris Okasaki’s “Purely functional data structures”.

I googled it and found this pdf : Purely functional data structures

And there is a book of course. I don’t know if the pdf and the book are the same. Nevertheless, so far I’ve been reading the pdf, and it’s awesome !

Thank you Jon !

PS: I’ve been follower of FFC blog and FFC website for about couple of years now. So I feel pretty excited to get an advice from the person who kicks butt at OCaml and F#.


Skip Lists and functional programming

February 18, 2010

I spent some time trying to figure out how to write a Skip Lists data-structure in F# using pure functional approach. It seems that functional implementation of Skip Lists will make them perform in O(n^2) instead of O(log n). This happens since in a functional implementation lists in Skip List data-structure are immutable. Therefore any change to a list – whether element is inserted or removed – will force the whole Skip List to be rebuild. That includes n lists vertically and then each list has double number of items compared to the list above.


Red and Black Tree re-write

February 16, 2010

I rewrote Red And Black tree from scratch. Making new implementation about 100 lines less than what I posted before. I also believe that new implementation is much simpler, and it follows the Red And Black tree definition pretty closely.

I’ve been using Mercurial to revision control my projects. So I’ve pushed Avl Tree and Red and Black tree to BitBucket.
F# data structures.
And I also plan to implement many more other fancy data structures.


Red and Black tree in F#: post mortem

January 20, 2010

I implemented the Red and Black tree so I could understand continuations better. I really don’t like the implementation, since it is not as clean as I would like it to be.

But it works nevertheless. I did some random testing of 10000 elements. Insertion and deletion worked. The cool thing is that the Black height of the tree with 10000 elements does not exceed 10.


Red and Black tree in F# – extras

January 20, 2010

This includes some simple functions that are might be useful for playing around with the Red and Black tree.

This is the height of tree measured in black nodes


let getBlackHeight tree =

    let rec doGetBlackHeight leftoverTree height =

      match leftoverTree with
      | Empty -> height
      | Node(node, leftNode, _) ->

        match node with
        | Black _ -> doGetBlackHeight leftNode (height + 1)
        | Red _ -> doGetBlackHeight leftNode height

    doGetBlackHeight tree 0


let getNumberOfNodes tree =

    let rec doGetNumberOfNodes node sum cont =

      match node with
      | Empty -> cont sum
      | Node(n, lc, rc) -> doGetNumberOfNodes lc (sum + 1) (fun theSum -> doGetNumberOfNodes rc theSum cont)

    doGetNumberOfNodes tree 0 (fun x -> x)


//returns the node, with the value
let rec findNode node value =

    match node with
    | Empty -> Empty
    | Node(Red(v) as t, lc, rc) | Node(Black(v) as t, lc, rc) ->

      match compare v value with
      | 0 -> node
      | -1 -> findNode lc value
      | 1 -> findNode rc value
      | _ -> Empty


//returns true if value is in the tree. false otherwise
let find tree value =

    let node = findNode tree value
    match node with
    | Empty -> false
    | _ -> true


//this is the check that the tree has the same black height in every branch
let validate tree =

    let theHeight = getBlackHeight tree

    let checkHeight h =

      if h theHeight then

        failwith (sprintf "Height does not match! Should be %d but it is %d. " theHeight h)

    let rec doCheckBlackHeight rest height cont =

      match rest with
      | Empty -> checkHeight height; cont height
      | Node(Black(_), lc, rc) -> doCheckBlackHeight lc (height + 1) (fun h -> doCheckBlackHeight rc (height + 1) cont)
      | Node(Red(_), lc, rc) -> doCheckBlackHeight lc height (fun h -> doCheckBlackHeight rc height cont)

    doCheckBlackHeight tree 0 (fun x -> x)


let testInsert length =

    let rand = new System.Random()
    let numbers = [for i in 1 .. length -> rand.Next()]

    printfn "Number of insertions : %d" length

    let rec ins tree l =

      match l with
      | [] -> tree
      | hd :: tl ->

        let r, t = insert tree hd
        if false = r then

          printfn "Failed to insert %d" hd

        let _ = validate t
        ins t tl

    ins Empty numbers, numbers


let testRemove tree numbers =

    let rec rem tree l =

      match l with
      | [] -> tree
      | hd :: tl ->

        let r, t = remove tree hd
        if false = r then

          printfn "Failed to delete %d, remaining tree is %A" hd t

        try

          validate t |> (fun x -> ())

        with

          | ex -> printfn "Failed to delete %d from\n %A" hd tree; failwith ex.Message

        rem t tl

    rem tree numbers


let test length =

    let tree, numbers = testInsert length
    if length < 100 then

      printfn "going to use numbers : %A" numbers

    printfn "Done inserting"
    printfn "Total number of nodes : %d" (getNumberOfNodes tree)
    printfn "Validating the tree"
    printfn "Black height of the tree is %d" (validate tree)
    if length < 100 then

      printfn "Going to perform deletions on tree : %A" tree

    printfn "Going to perform deletions"
    let finalTree = testRemove tree numbers
    printfn "Done deleting. The tree is %A, and it should be Empty" finalTree


let testRand () =

    let rand = new System.Random()
    let length = rand.Next(10, 10000)
    test length


//the console loop:
let mutable quit = false
let mutable tree = Empty


let printHelp () =

    printfn "In conosle loop we only support integer values"
    printfn "q -> quit"
    printfn "i 10 -> insert 10 into the tree"
    printfn "d -20 -> delete -20 from the tree"
    printfn "f 5 -> find 5 in the tree"
    printfn "h -> get black height"
    printfn "r -> reset the tree"
    printfn "n -> get number of nodes"
    printfn "t -> run a test that inserts random nodes and then deletes them"
    printfn "? -> print this help message"

printHelp()


let print tree =

    printfn "%A" tree


while not quit do

    let input = System.Console.ReadLine()
    let strings = input.Split([|' '|])

    try

      let s = strings.[0]
      match s with
      | "q" -> quit

        let mutable value = 0
        if false = System.Int32.TryParse(strings.[1], &value) then

          printfn "Value %s failed to get converted to an integer" strings.[1]

        else

          printfn "Insert value %d" value
          match insert tree value with
          | true, t -> tree printfn "Element %d is already in the tree" value
          print tree

      | "d" ->

        let mutable value = 0
        if false = System.Int32.TryParse(strings.[1], &value) then

          printfn "Value %s failed to get converted to an integer" strings.[1]

        else

          match remove tree value with
          | true, t -> tree printfn "Element %d does not exist in the tree" value
          print tree

      | "f" ->

        let mutable value = 0
        if false = System.Int32.TryParse(strings.[1], &value) then

          printfn "Value %s failed to get converted to an integer" strings.[1]

        else

          printfn "Searching for value %d returned %A" value (find tree value)

      | "h" -> printfn "Tree height in Black nodes : %d" (getBlackHeight tree)
      | "r" -> printfn "Going to reset tree"; tree printfn "Number of nodes is %d" (getNumberOfNodes tree)
      | "t" ->

        let mutable value = 0
        if false = System.Int32.TryParse(strings.[1], &value) then

          printfn "Value %s failed to get converted to an integer" strings.[1]

        else

          test value

        | "?" -> printHelp()
        | "" -> () //if enter was pressed - just ignore it
        | _ -> printfn "I don't think %s is supported" strings.[0]

    with
    | ex -> printfn "Uhhh, there is an error : %s" ex.Message


Red and Black tree in F# : Deletion

January 20, 2010

Deleting a node even from regular binary tree is a little bit more complicated than insertion. Deleting a node from Red and Black tree is also a little bit more complicated than insertion.

I updated the legend to include Double black node:
Updated legend

Double Black nodes appear when a Black node with Empty children becomes Empty. Thus it acquires extra black property from it’s children making the node Double Black.

As an example here is a tree that is the result of the sample insertions from the previous post:
final tree

Deleting node 7 would result in following steps:
Step 1

Value 7 is deleted, and value 12 is bubbled up to the node that used to hold 7.

Step 2

In order to preserve black height property it is not possible to throw away Empty nodes. Since Empty nodes are considered to be Black nodes, they get merged into double black node with the node that held value 12.
Step 3
To get rid of the double black node, and preserve black height property – perform node rotation, and recoloring. This produces the final tree.
Step 4

The book that I am using for implementation of the Red and Black tree covers only 3 node restructuring cases for deletion. After implementing those, I found that there is another case that was not mentioned by the book.

Case 1 happens when parent node is Red, it has one child that has Double Black property, and another child that is a Black node with at least one Red child.
In this case the black height of the tree to left of node with value 3 is the same as to the right of it. Even though the node to the right is a Double Black node.
The mirror case needs to be handled as well, when Double Black node is a left child.
Case 1
Case1

Case 1

Case 2 happens when parent node is Red, and it has one child with Double Black property and another child is Black node child. Then parent node becomes black, and Black node child becomes Red node, and Double Black node child becomes a Black node.
Case 2

On the other hand if parent node is a Black node, then Double Black property bubbles up to the parent node.
Case 2

Case 3 needs node restructuring, which after restructuring could be handled as Case 1 or Case 2.
Case 3

When I was doing some testing I found out that there is one case that was not handled. And the case is not mentioned in the book.
Not mentioned case :
Not Mentioned

Naming of functions that handle Double Black node cases might be not the best. For example fixLeftChildRedParent – means that Right Child has a Double Black property and it’s parent is a Red node, and we need to do some fixes to the Left Child. Ether recolor some nodes, or rearrange them. Thus removing Double Black property from the Right Child.


let fixLeftChildRedParent pV lc rc =

    match lc with
    //case 1 begins
    | Empty -> failwith "leftChild is empty when fixing double black"
    | Node(Black(lcV), (Node(Red(lclcV), lclclc, lclcrc) as lclc), lcrc) ->

      let newRc, newLc = Node(Black(pV), lcrc, rc), Node(Black(lclcV), lclclc, lclcrc)
      false, Node(Red(lcV), newLc, newRc)

    | Node(Black(lcV), lclc, (Node(Red(lcrcV), lcrclc, lcrcrc) as lcrc)) ->

      let newRc, newLc = Node(Black(pV), lcrcrc, rc), Node(Black(lcV), lclc, lcrclc)
      false, Node(Red(lcrcV), newLc, newRc)

    //case 1 ends
    //case 2 begins
    | Node(Black(lcV), lclc, lcrc) when isConsideredBlack lclc && isConsideredBlack lcrc ->

      let newRc, newLc = rc, Node(Red(lcV), lclc, lcrc)
      false, Node(Black(pV), newLc, newRc)

    //case 2 ends
    | _ -> failwith (sprintf "wrong node combination in fixLeftChildRedParent. LeftChild is %A" lc)


let fixRightChildRedParent pV lc rc =

    match rc with
    //case 1 begins
    | Empty -> failwith "rightChild is empty when fixing double black"
    | Node(Black(rcV), (Node(Red(rclcV), rclclc, rclcrc) as rclc), rcrc) ->

      let newLc, newRc = Node(Black(pV), lc, rclclc), Node(Black(rcV), rclcrc, rcrc)
      false, Node(Red(rclcV), newLc, newRc)

    | Node(Black(rcV), rclc, (Node(Red(rcrcV), rcrclc, rcrcrc) as rcrc)) ->

      let newLc, newRc = Node(Black(pV), lc, rclc), Node(Black(rcrcV), rcrclc, rcrcrc)
      false, Node(Red(rcV), newLc, newRc)

    //case 1 ends
    //case 2 begins
    | Node(Black(rcV), rclc, rcrc) when isConsideredBlack rclc && isConsideredBlack rcrc ->

      let newLc, newRc = lc, Node(Red(rcV), rclc, rcrc)
      false, Node(Black(pV), newLc, newRc)

    //case 2 ends
    | _ -> failwith (sprintf "wrong node combination in fixRightChildRedParent. RightChild is %A" rc)


//lc -> leftChild
//rc -> rightChild
//p -> parent
//pV -> parentValue
let rec fixDoubleBlack parent child =

    match parent with
    | Node(Red(pV), lc, rc) -> if rc = child

        then fixLeftChildRedParent pV lc rc
        else fixRightChildRedParent pV lc rc

    | Node(Black(pV), lc, rc) -> if rc = child

        then fixLeftChildBlackParent pV lc rc
        else fixRightChildBlackParent pV lc rc

    | Empty -> failwith "parent node cannot be empty when fixing double black"

and fixLeftChildBlackParent pV lc rc =

    match lc with
    | Empty -> failwith "leftChild is empty when fixing double black with black parent"

    //This cases were not mentioned in the book, but the do occur.
    | Node(Black(lcV), Node(Red(lclcV), lclclc, lclcrc), lcrc) ->

      let newLc = Node(Black(lclcV), lclclc, lclcrc)
      let newRc = Node(Black(pV), lcrc, rc)
      false, Node(Black(lcV), newLc, newRc)

    | Node(Black(lcV), lclc, Node(Red(lcrcV), lcrclc, lcrcrc)) ->

      let newLc = Node(Black(lcV), lclc, lcrclc)
      let newRc = Node(Black(pV), lcrcrc, rc)
      false, Node(Black(lcrcV), newLc, newRc)

    //end of not mentioned cases

    //case 2 begins
    | Node(Black(lcV), lclc, lcrc) when isConsideredBlack lclc && isConsideredBlack lcrc->

      let newRc, newLc = rc, Node(Red(lcV), lclc, lcrc)
      true, Node(Black(pV), newLc, newRc)

    //case 2 ends
    //case 3 begins
    | Node(Red(lcV), lclc, lcrc) ->

      let newLc, newDoubleBlackRc = lclc, Node(Red(pV), lcrc, rc)
      let isStillDoubleBlack, newRc = fixDoubleBlack newDoubleBlackRc rc
      assert(false = isStillDoubleBlack)
      false, Node(Black(lcV), newLc, newRc)

    //case 3 ends
    | _ -> failwith (sprintf "wrong node combination in fixLeftChildBlackParent. LeftChild is %A" lc)

and fixRightChildBlackParent pV lc rc =

    match rc with
    | Empty -> failwith "rightChild is empty when fixing double black with black parent"

    //not mentioned cases begin
    | Node(Black(rcV), Node(Red(rclcV), rclclc, rclcrc), rcrc) ->

      let newLc = Node(Black(pV), lc, rclclc)
      let newRc = Node(Black(rcV), rclcrc, rcrc)
      false, Node(Black(rclcV), newLc, newRc)

    | Node(Black(rcV), rclc, Node(Red(rcrcV), rcrclc, rcrcrc)) ->

      let newLc = Node(Black(pV), lc, rclc)
      let newRc = Node(Black(rcrcV), rcrclc, rcrcrc)
      false, Node(Black(rcV), newLc, newRc)

    //not mentioned cases end
    //case 2 begins
    | Node(Black(rcV), rclc, rcrc) when isConsideredBlack rclc && isConsideredBlack rcrc ->

      let newLc, newRc = lc, Node(Red(rcV), rclc, rcrc)
      true, Node(Black(pV), newLc, newRc)

    //case 2 ends
    //case 3 begins
    | Node(Red(rcV), rclc, rcrc) ->

      let newRc, newDoubleBlackLc = rcrc, Node(Red(pV), lc, rclc)
      let isStillDoubleBlack, newLc = fixDoubleBlack newDoubleBlackLc lc
      assert(false = isStillDoubleBlack)
      false, Node(Black(rcV), newLc, newRc)

    //case 3 ends
    | _ -> failwith (sprintf "wrong node combination in fixRightChildBlackParent. RightChild is :%A" rc)


(*Deletion is a little bit more complicated than insertion. When inserting, the node is added as a leaf, and then
there are maybe some node rotations have to performed in order to keep the tree balanced. But the node that we
inserting is always added as a leaf. Now when we do deletion we can delete a node from anywhwere in the tree.
So that creates an extra complication, since now when we find the node that we want to delete, we need to go
down the tree from that node, and bubble all the values up, when we start bubbling values up we might need
to fix double black node. Fix could ether get rid of double black node completely, or propogate it to the parent
node. In that case we have to perform fix in the parent node. That in turn could also propogate double black node
to it's parent ... *)


//returns tuple (bool, Node).
//bool value in the tuple is true if the node has double black property
//false otherwise
//bb -> double black

let fixDoubleBlackIfNeeded bb parent child =

    if bb
    then fixDoubleBlack parent child
    else false, parent

This is kind of similar to deletion in regular binary tree. When deleting a value from the node, it could happen that the node is not a leaf node. Therefore that whole needs to be patched up. So use a value from a leaf node to patch the whole in the tree. Then rearrange leaf nodes to keep the tree balanced.
The way leaf node is picked is -> from the node that is being deleted, go once down the right child, and then go down left from the right child until the Empty node is reached. The parent of the Empty node is the leaf node that is used in fixing the hole.


let bubbleUp node =

    //go right one node, then go down left as far as possible -> that's the child that should replace the deleted node
    //if that child had a child on the right, then move that child one up.
    //then start going up the tree fixing double black until we get to the start.
    //Replace the node at the start with the left most child node

    //special cases -> node does not have right child, but it has one left child, then use the left child to
    //use it instead of the node

    let rec bbUp node cont =

      match node with
      | Empty -> failwith "Node cannot be empty when bbUp"
      | Node(n, lc, rc) ->

        match lc with
        | Empty -> failwith (sprintf "Left child is Empty when bbUp. Node %A" n)
        | Node(Red(_), Empty, Node(Red(_), _, _)) -> failwith "Error, parent and child are both Red nodes"
        | Node(Black(_), Empty, Node(Black(_), _, _)) -> failwith "Erorr black height property does not hold"
        | Node(Red(lcV), Empty, Empty) -> cont (lcV, (false, Node(n, Empty, rc)))
        | Node(Black(lcV), Empty, Empty) -> cont (lcV, fixDoubleBlackIfNeeded true (Node(n, Empty, rc)) Empty)
        | Node(Black(lcV), Empty, Node(Red(lcrcV), Empty, Empty)) ->
        cont (lcV, (false, Node(n, Node(Black(lcrcV), Empty, Empty), rc)))
        | lc -> cont (bbUp lc (fun (v, (bb, newLc)) -> v, fixDoubleBlackIfNeeded bb (Node(n, newLc, rc)) newLc))

      //handle special cases first
      match node with
      | Empty -> failwith "node cannot be empty when bubbling up"
      //if right child is empty, and there is non empty left child. Then left child has to be Red with Empty children,
      //otherwise Black height property is violated
      | Node(Black(_), Empty, Empty) -> (true, Empty)
      | Node(Red(_), Empty, Empty) -> (false, Empty)

      //if right child is empty and left child is not empty - it has to be red node with no children,
      //otherwise Black height property will not hold
      | Node(Black(_), Node(Red(lcV), Empty, Empty), Empty) -> false, Node(Black(lcV), Empty, Empty)
      | Node(Black(_), Node(Black(_), Empty, Empty), Empty) -> failwith (sprintf "black height propery is violated %A" node)

      //another special case, when the node that we are deleting has right non empty child, but the child
      //does not have any childred. So it is not possible to go down the left branch.
      | Node(n, lc, Node(rcn, Empty, Empty)) ->

        match n, rcn with
        | Black(_), Black(rcV) -> fixDoubleBlackIfNeeded true (Node(Black(rcV), lc, Empty)) Empty
        | Black(_), Red(rcV) -> false, Node(Black(rcV), lc, Empty)
        | Red(_), Black(rcV) -> fixDoubleBlackIfNeeded true (Node(Red(rcV), lc, Empty)) Empty
        | Red(_), Red(rcV) -> false, Node(Red(rcV), lc, Empty)

      //another case is when right child does not have a left child, but it has a right child.
      //In That case right child can only be Red otherwise Black height property would not hold
      | Node(n, lc, Node(Black(rcV), Empty, Node(Red(rcrcV), Empty, Empty))) ->

        match n with
        | Black(_) -> false, Node(Black(rcV), lc, Node(Black(rcrcV), Empty, Empty))
        | Red(_) -> false, Node(Red(rcV), lc, Node(Black(rcrcV), Empty, Empty))

      | Node(_, _, Node(Black(_), Empty, Node(Black(_), Empty, Empty))) -> failwith(sprintf "black height property is violated %A" node)

      | Node(n, lc, rc) ->

        let v, (nbb, newRc) = bbUp rc (fun (v, (bb, n)) -> v, (bb, n))

        let newNode =

          match n with
          | Black(_) -> Node(Black(v), lc, newRc)
          | Red(_) -> Node(Red(v), lc, newRc)

        fixDoubleBlackIfNeeded nbb newNode newRc


    //bb -> double black
    let remove tree value =

      //when we get here, we will never bubbleUp when parent node is empty. Since the only case when parent node is
      //empty, is when the root node has the value that we are deleting and it has no children.
      //This case was already handled before we get here.
      let rec removeValue parent remValue cont =

        match parent with
        | Empty -> Empty
        | Node(Red(v) as t, lc, rc) | Node(Black(v) as t, lc, rc)->

          match compare remValue v with
          | 0 -> cont(bubbleUp parent)
          | 1 -> removeValue rc remValue (fun (bb, nrc) -> cont(fixDoubleBlackIfNeeded bb (Node(t, lc, nrc)) nrc))
          | -1 -> removeValue lc remValue (fun (bb, nlc) -> cont(fixDoubleBlackIfNeeded bb (Node(t, nlc, rc)) nlc))
          | weirdValue -> failwith (sprintf "Node comparison returned wrong result %d" weirdValue)

        match tree with
        | Node(Black(v), Empty, Empty) when (compare v value = 0) -> (true, Empty)
        | Node(Red(_), _, _) -> failwith "Root node is not black !!"
        | _ -> match removeValue tree value (fun (b, n) -> n) with

          | Empty -> (false, tree)
          | newTree -> (true, newTree)

    (*

    let b0, r0 = insert Empty 4
    let b1, r1 = insert r0 7
    let b2, r2 = insert r1 12
    let b3, r3 = insert r2 15
    let b4, r4 = insert r3 3
    let b5, r5 = insert r4 5
    let b6, r6 = insert r5 14
    let b7, r7 = insert r6 18
    let b8, r8 = insert r7 16
    let b9, r9 = insert r8 17

    let b10, r10 = remove r9 3
    (*Expected tree:
    Node
    (Black 14,
    Node
    (Red 7,Node (Black 4,Empty,Node (Red 5,Empty,Empty)),
    Node (Black 12,Empty,Empty)),
    Node
    (Red 16,Node (Black 15,Empty,Empty),
    Node (Black 18,Node (Red 17,Empty,Empty),Empty)))
    *)

    let b11, r11 = remove r10 12
    (*Expected tree:
    Node
    (Black 14,
    Node (Red 5,Node (Black 4,Empty,Empty),Node (Black 7,Empty,Empty)),
    Node
    (Red 16,Node (Black 15,Empty,Empty),
    Node (Black 18,Node (Red 17,Empty,Empty),Empty)))
    *)

    let b12, r12 = remove r11 17
    (*Expected tree:
    Node
    (Black 14,
    Node (Red 5,Node (Black 4,Empty,Empty),Node (Black 7,Empty,Empty)),
    Node (Red 16,Node (Black 15,Empty,Empty),Node (Black 18,Empty,Empty)))
    *)

    let b13, r13 = remove r12 18
    (*Expected tree:
    Node
    (Black 14,
    Node (Red 5,Node (Black 4,Empty,Empty),Node (Black 7,Empty,Empty)),
    Node (Black 16,Node (Red 15,Empty,Empty),Empty))
    *)

    let b14, r14 = remove r13 15
    (*Expected tree:
    Node
    (Black 14,
    Node (Red 5,Node (Black 4,Empty,Empty),Node (Black 7,Empty,Empty)),
    Node (Black 16,Empty,Empty))
    *)

    let b15, r15 = remove r14 16
    (*Expected tree:
    Node
    (Black 5,Node (Black 4,Empty,Empty),
    Node (Black 14,Node (Red 7,Empty,Empty),Empty))
    *)
    *)


There was a bug in insertion implementation

January 8, 2010

I missed the case when recoloring of the black parent into red may cause double red problem in the parent, therefore we need to propagate node restructuring up the tree.
I fixed the bug, and updated Red and Black tree post