An Introduction to Hashing. By: Sara Kennedy Presented: November 1, 2002.

An Introduction to Hashing.

By: Sara Kennedy

Presented: November 1, 2002

What is hashing?

Hashing is a method of inserting data into a table.

Tables can be implemented in many ways.

Examples include a fixed array (limiting number of elements), array of linked lists (potentially unlimited number of elements)

Why use hashing?

There is the potential to retrieve data faster.

Using the proper hash function will distribute the elements throughout the table. To retrieve the element, apply the hash function until it is found or it is clear that it was not found.

Hash Functions (1)

Let U be the universe of possible keys for a set of elements. It is generally assumed that all elements have unique integer keys.

Let m be the size of our array that will hold the elements.

A hash function h(key) is a function that maps U to Zm.

Hash Functions (2)

If we can define a one-to-one mapping from U to Zm, h(k) is called a perfect hashing function.

If we cannot define a perfect hashing function, we must deal with collisions.

Hash Functions (3)

A collision is defined when multiple keys map onto the same table index.

There are many ways to handle collisions. These include “chaining”, “double hashing”, “linear probing”, “quadratic probing”, “random probing”, etc.

The method that I will describe is double hashing.

MIS for Hash Table

INTERFACE:

TYPE Element;

PROCEDURE Put(key: INTEGER);

PROCEDURE Find(key: INTEGER): BOOLEAN;

PROCEDURE Delete(key: INTEGER);

(* Title: Hash.mAuthor: Sara KennedyLast Revised: November 1, 2002Description: An implementation of a hash table using double hashing.Interface:

TYPE Element;PROCEDURE Put(key: INTEGER);PROCEDURE Find(key: INTEGER): BOOLEAN;PROCEDURE Delete(key: INTEGER);

*)

MODULE Hash;TYPE Element* = RECORD

key: INTEGER;data: REAL;status: INTEGER;

END;

CONST Size = 100;VAR Table: ARRAY Size OF Element;

(* Find the array index using a hash function *)PROCEDURE HashValue(key, i: INTEGER): INTEGER;

VAR h, h1, h2: INTEGER;BEGIN

h1 := key MOD Size;h2 := 1 + (key MOD Size-1);h := (h1 + i*h2) MOD Size;RETURN h;

END HashValue;

(* Insert a given key into the Hash Table *)PROCEDURE Put*(key: INTEGER);

VAR loc, i: INTEGER;BEGIN

i:= 0;loc:= HashValue(key, i);

WHILE (Table[loc].status = 1) DOi := i + 1;loc := HashValue(key, i);

END;Table[loc].key := key;Table[loc].status := 1; (*Array index has an

element *)END Put;

(* Find location of key if it exists in the hash table *)PROCEDURE FindLoc(key: INTEGER; VAR loc: INTEGER):

BOOLEAN;VAR i: INTEGER;BEGIN

i := 0;loc := HashValue(key, i);WHILE ((Table[loc].status # -1) & (i < Size)) DO IF ((Table[loc].key = key) & (Table[loc].status

= 1)) THENRETURN TRUE;

ELSEi := i + 1;loc := HashValue(key, i);

END;END;

RETURN FALSE;END FindLoc;

(* Find if the key exists in the table *)PROCEDURE Find*(key: INTEGER): BOOLEAN;

VAR loc: INTEGER;BEGIN

RETURN FindLoc(key, loc);END Find;

(* Remove a key from the table *)PROCEDURE Delete*(key: INTEGER);

VAR loc: INTEGER;BEGIN

IF FindLoc(key, loc) THENTable[loc].status := 0;

END;END Delete;

VAR init: INTEGER;BEGIN

init := 0;(* Initialize the table as an empty table *)WHILE (init < Size) DO

Table[init].key := 0;Table[init].status := -1;init := init + 1;

ENDEND Hash.

MODULE TestHashIMPORT Out, Hash;

PROCEDURE Insert(key: INTEGER);BEGIN

Hash.Put(key);Out.Int(key, 0); IF Hash.Find(key) THEN

Out.String(“ inserted.”);ELSE

Out.String(“ not inserted.”);ENDOut.Ln;

END Insert;

PROCEDURE Delete(key: INTEGER);BEGIN

Out.Int(key, 0);IF Hash.Find(key) THEN

Hash.Delete(key);IF Hash.Find(key) THEN

Out.String(" not deleted.");ELSE

Out.String(" deleted.");END;

ELSEOut.String(" not deleted.");

END;Out.Ln;

END Delete;

PROCEDURE Find(key: INTEGER);BEGIN

Out.Real(key);IF Hash.Find(key) THEN

Out.String(“ found.”);ELSE

Out.String(“ not found.”);ENDOut.Ln;

END Find;

BEGINInsert(3);Insert(200);Insert(2056);Insert(-97);Find(3);Delete(200);Delete(5);Insert(5);Insert(103);Find(200);Find(103);Find(100);

END TestHash.

Output:3 inserted.200 inserted.2056 inserted.-97 inserted.3 found.200 deleted.5 not deleted.5 inserted.103 inserted.200 not found.103 found.100 not found.

(* Title: Hash.mAuthor: Sara KennedyLast Revised: November 1, 2002Description: An implementation of a hash table using double hashing on 2D vectors with integer coordinates.Interface:

TYPE Element;PROCEDURE Put(x, y: INTEGER);PROCEDURE Find(x, y: INTEGER): BOOLEAN;PROCEDURE Delete(x, y: INTEGER);

*)

MODULE Hash;

TYPE Element* = RECORDx, y: INTEGER;status: INTEGER;

END;

CONST Size = 1000;VAR Table: ARRAY Size OF Element;

(* Find the array index using a hash function *)PROCEDURE HashValue(key, i: INTEGER): INTEGER;

VAR h, h1, h2: INTEGER;BEGIN

h1 := key MOD Size;h2 := 1 + (key MOD Size-1);h := (h1 + i*h2) MOD Size;RETURN h;

END HashValue;

(* Calculates the key *)PROCEDURE GetKey(x, y: INTEGER): INTEGER;BEGIN

RETURN x + y;END GetKey;

(* Insert a given key into the Hash Table *)PROCEDURE Put*(x, y: INTEGER);

VAR loc, i: INTEGER;VAR key: INTEGER;

BEGINi:= 0;key := GetKey(x, y);loc:= HashValue(key, i);

WHILE (Table[loc].status = 1) DOi := i + 1;loc := HashValue(key, i);

END;Table[loc].x := x;Table[loc].y := y;Table[loc].status := 1; (*Array index has an

element *)END Put;

(* Find location of key if it exists in the hash table *)PROCEDURE FindLoc(x, y: INTEGER; VAR loc: INTEGER): BOOLEAN;

VAR i, key: INTEGER;BEGIN

i := 0;key := GetKey(x, y);loc := HashValue(key, i);WHILE ((Table[loc].status # -1) & (i < Size)) DO IF ((Table[loc].x = x) & (Table[loc].y = y) &

(Table[loc].status = 1)) THENRETURN TRUE;

ELSEi := i + 1;loc := HashValue(key, i);

END;END;

RETURN FALSE;END FindLoc;

(* Find if the key exists in the table *)

PROCEDURE Find*(x, y: INTEGER): BOOLEAN;

VAR loc: INTEGER;

BEGIN

RETURN FindLoc(x, y, loc);

END Find;

(* Remove a key from the table *)

PROCEDURE Delete*(x, y: INTEGER);

VAR loc: INTEGER;

BEGIN

IF FindLoc(x, y, loc) THEN

Table[loc].status := 0;

END;

END Delete;

VAR init: INTEGER;

BEGIN

init := 0;

(* Initialize the table as an empty table *)

WHILE (init < Size) DO

Table[init].status := -1;

init := init + 1;

END

END Hash.

MODULE TestHash;IMPORT Out, Hash;

PROCEDURE Print(x, y: INTEGER);BEGIN

Out.String("(");Out.Int(x, 0);Out.String(", ");Out.Int(y, 0);Out.String(")");

END Print;

PROCEDURE Insert(x, y: INTEGER);BEGIN

Hash.Put(x, y);Print(x, y);IF Hash.Find(x, y) THEN

Out.String(" inserted.");ELSE

Out.String(" not inserted.");END;Out.Ln;

END Insert;

PROCEDURE Delete(x, y: INTEGER);BEGIN

Print(x, y);IF Hash.Find(x, y) THEN

Hash.Delete(x, y);IF Hash.Find(x, y) THEN

Out.String(" not deleted.");ELSE

Out.String(" deleted.");END;

ELSEOut.String(" not deleted.");

END;Out.Ln;

END Delete;

PROCEDURE Find(x, y: INTEGER);BEGIN

Print(x, y);IF Hash.Find(x, y) THEN

Out.String(" found.");ELSE

Out.String(" not found.");END;Out.Ln;

END Find;

BEGINInsert(3, 4);Insert(200, 1);Insert(2056, 1028);Insert(-97, 0);Find(3, 4);Delete(200, 1);Delete(5, 4);Insert(5, 5);Insert(103, 301);Find(200, 1);Find(103, 103);Find(100, 200);Find(3, 6);

END TestHash;

Output:(3, 4) inserted.(200, 1) inserted.(2056, 1028) inserted.(-97, 0) inserted.(3, 4) found.(200, 1) deleted.(5, 4) not deleted.(5, 5) inserted.(103, 301) inserted.(200, 1) not found.(103, 103) not found.(100, 200) not found.(3, 6) not found.

An Introduction to Hashing. By: Sara Kennedy Presented: November 1, 2002.

Documents

Transcript of An Introduction to Hashing. By: Sara Kennedy Presented: November 1, 2002.