Skip to main content

Rabin-karp algorithm for pattern searching

The Rabin-Karp algorithm is a pattern searching algorithm that uses hashing to find the occurrence of a pattern in a text. It is a linear time algorithm, which means it can search for the pattern in O(n) time complexity, where n is the length of the text.

The algorithm works by first calculating the hash value of the pattern and the first window of the text, i.e., the substring of the text of the same length as the pattern. It then slides the window one character at a time and recalculates the hash value of the new window using the previously calculated hash value. If the hash value of the new window matches the hash value of the pattern, then it compares the characters of the pattern and the window. If they match, it reports the occurrence of the pattern in the text. If not, it continues sliding the window and recalculating the hash value until the end of the text is reached.

To improve the performance of the algorithm, the hash function should be selected such that it can be efficiently computed and has a low collision rate. One commonly used hash function is the polynomial rolling hash function, which is defined as:

hash(s) = s[0] * a^(m-1) + s[1] * a^(m-2) + ... + s[m-1] * a^0

where s is the substring of length m, s[0] is the first character of the substring, and a is a constant prime number.

The algorithm has a worst-case time complexity of O(mn), where m is the length of the pattern and n is the length of the text. This can happen when the hash function has a high collision rate, which means that multiple substrings of the text have the same hash value as the pattern. In practice, the algorithm has a very good average-case performance and is widely used in many applications, such as plagiarism detection, DNA sequencing, and file comparison.

Here given code implementation process.

// C Program 
// Rabin-karp algorithm for pattern searching
#include <stdio.h>

//  Handles the request to find pattern in given text
void search(char text[], char pattern[], int n, int m)
{
	// Number of characters in alphabet
	int d = 256;
	//  prime modulus is 101
	int q = 101;
	// text hash 
	int t = 0;
	// pattern hash
	int p = 0;
	// hash function h
	int h = 1;
	// Loop controlling variable i and j
	int i = 0;
	int j = 0;
	// Calaculate pow(d, M-1)%q
	for (i = 0; i < m - 1; i++)
	{
		h = (h *d) % q;
	}
	// Calculate hash value
	for (i = 0; i < m; i++)
	{
		// hash of pattern
		p = (d *p + pattern[i]) % q;
		// hash of text
		t = (d *t + text[i]) % q;
	}
	// Display Given Data
	printf("\n Given text : %s", text);
	printf("\n Given pattern : %s\n", pattern);
	// Find pattern in text
	for (i = 0; i <= n - m; i++)
	{
		if (p == t)
		{
			while (j < m && text[i + j] == pattern[j])
			{
				j++;
			}
			if (j == m)
			{
				// When pattern exists print its starting index
				printf(" Pattern exists at index %d \n", i);
			}
			j = 0;
		}
		if (i < n - m)
		{
			t = (d *(t - text[i] *h) + text[i + m]) % q;
			if (t < 0)
			{
				// When t is less than zero
				// Then change t
				t = (t + q);
			}
		}
	}
}
int main()
{
	char text[] = "LXYZHEQXYZXYZQQHE11HXYZ1E";
	char pattern[] = "XYZ";
	int n = sizeof(text) / sizeof(text[0]) - 1;
	int m = sizeof(pattern) / sizeof(pattern[0]) - 1;
	search(text, pattern, n, m);
	return 0;
}

Output

 Given text : LXYZHEQXYZXYZQQHE11HXYZ1E
 Given pattern : XYZ
 Pattern exists at index 1
 Pattern exists at index 7
 Pattern exists at index 10
 Pattern exists at index 20
/*
  Java Program 
  Rabin-karp algorithm for pattern searching
*/
public class RabinKarp
{
	//  Handles the request to find pattern in given text
	public void search(String text, String pattern, int n, int m)
	{
		// Number of characters in alphabet
		int d = 256;
		//  prime modulus is 101
		int q = 101;
		// text hash 
		int t = 0;
		// pattern hash
		int p = 0;
		// hash function h
		int h = 1;
		// Loop controlling variable i and j
		int i = 0;
		int j = 0;
		// Calaculate pow(d, M-1)%q
		for (i = 0; i < m - 1; i++)
		{
			h = (h * d) % q;
		}
		// Calculate hash value
		for (i = 0; i < m; i++)
		{
			// hash of pattern
			p = (d * p + pattern.charAt(i)) % q;
			// hash of text
			t = (d * t + text.charAt(i)) % q;
		}
		// Display Given Data
		System.out.print("\n Given text : " + text);
		System.out.print("\n Given pattern : " + pattern + "\n");
		// Find pattern in text
		for (i = 0; i <= n - m; i++)
		{
			if (p == t)
			{
				while (j < m && text.charAt(i + j) == pattern.charAt(j))
				{
					j++;
				}
				if (j == m)
				{
					// When pattern exists print its starting index
					System.out.print(" Pattern exists at index " + i + " \n");
				}
				j = 0;
			}
			if (i < n - m)
			{
				t = (d * (t - text.charAt(i) * h) + text.charAt(i + m)) % q;
				if (t < 0)
				{
					// When t is less than zero
					// Then change t
					t = (t + q);
				}
			}
		}
	}
	public static void main(String[] args)
	{
		RabinKarp task = new RabinKarp();
		String text = "LXYZHEQXYZXYZQQHE11HXYZ1E";
		String pattern = "XYZ";
		int n = text.length();
		int m = pattern.length();
		// Test
		task.search(text, pattern, n, m);
	}
}

Output

 Given text : LXYZHEQXYZXYZQQHE11HXYZ1E
 Given pattern : XYZ
 Pattern exists at index 1
 Pattern exists at index 7
 Pattern exists at index 10
 Pattern exists at index 20
// Include namespace system
using System;
/*
  C# Program 
  Rabin-karp algorithm for pattern searching
*/
public class RabinKarp
{
	//  Handles the request to find pattern in given text
	public void search(String text, String pattern, int n, int m)
	{
		// Number of characters in alphabet
		int d = 256;
		//  prime modulus is 101
		int q = 101;
		// text hash
		int t = 0;
		// pattern hash
		int p = 0;
		// hash function h
		int h = 1;
		// Loop controlling variable i and j
		int i = 0;
		int j = 0;
		// Calaculate pow(d, M-1)%q
		while (i < m - 1)
		{
			h = (h * d) % q;
			i++;
		}
		i = 0;
		// Calculate hash value
		while (i < m)
		{
			// hash of pattern
			p = (d * p + pattern[i]) % q;
			// hash of text
			t = (d * t + text[i]) % q;
			i++;
		}
		// Display Given Data
		Console.Write("\n Given text : " + text);
		Console.Write("\n Given pattern : " + pattern + "\n");
		i = 0;
		// Find pattern in text
		while (i <= n - m)
		{
			if (p == t)
			{
				while (j < m && text[i + j] == pattern[j])
				{
					j++;
				}
				if (j == m)
				{
					// When pattern exists print its starting index
					Console.Write(" Pattern exists at index " + i + " \n");
				}
				j = 0;
			}
			if (i < n - m)
			{
				t = (d * (t - text[i] * h) + text[i + m]) % q;
				if (t < 0)
				{
					// When t is less than zero
					// Then change t
					t = (t + q);
				}
			}
			i++;
		}
	}
	public static void Main(String[] args)
	{
		RabinKarp task = new RabinKarp();
		String text = "LXYZHEQXYZXYZQQHE11HXYZ1E";
		String pattern = "XYZ";
		int n = text.Length;
		int m = pattern.Length;
		// Test
		task.search(text, pattern, n, m);
	}
}

Output

 Given text : LXYZHEQXYZXYZQQHE11HXYZ1E
 Given pattern : XYZ
 Pattern exists at index 1
 Pattern exists at index 7
 Pattern exists at index 10
 Pattern exists at index 20
// Include header file
#include <iostream>
#include <string>
using namespace std;
/*
  C++ Program 
  Rabin-karp algorithm for pattern searching
*/
class RabinKarp
{
	public:
		//  Handles the request to find pattern in given text
		void search(string text, string pattern, int n, int m)
		{
			// Number of characters in alphabet
			int d = 256;
			//  prime modulus is 101
			int q = 101;
			// text hash
			int t = 0;
			// pattern hash
			int p = 0;
			// hash function h
			int h = 1;
			// Loop controlling variable i and j
			int i = 0;
			int j = 0;
			// Calaculate pow(d, M-1)%q
			for (i = 0; i < m - 1; i++)
			{
				h = (h *d) % q;
			}
			// Calculate hash value
			for (i = 0; i < m; i++)
			{
				// hash of pattern
				p = (d *p + pattern[i]) % q;
				// hash of text
				t = (d *t + text[i]) % q;
			}
			// Display Given Data
			cout << "\n Given text : " << text;
			cout << "\n Given pattern : " << pattern << "\n";
			// Find pattern in text
			for (i = 0; i <= n - m; i++)
			{
				if (p == t)
				{
					while (j < m && text[i + j] == pattern[j])
					{
						j++;
					}
					if (j == m)
					{
						// When pattern exists print its starting index
						cout << " Pattern exists at index " << i << " \n";
					}
					j = 0;
				}
				if (i < n - m)
				{
					t = (d *(t - text[i] *h) + text[i + m]) % q;
					if (t < 0)
					{
						// When t is less than zero
						// Then change t
						t = (t + q);
					}
				}
			}
		}
};
int main()
{
	RabinKarp task = RabinKarp();
	string text = "LXYZHEQXYZXYZQQHE11HXYZ1E";
	string pattern = "XYZ";
	int n = text.length();
	int m = pattern.length();
	// Test
	task.search(text, pattern, n, m);
	return 0;
}

Output

 Given text : LXYZHEQXYZXYZQQHE11HXYZ1E
 Given pattern : XYZ
 Pattern exists at index 1
 Pattern exists at index 7
 Pattern exists at index 10
 Pattern exists at index 20
<?php
/*
  Php Program 
  Rabin-karp algorithm for pattern searching
*/
class RabinKarp
{
	//  Handles the request to find pattern in given text
	public	function search($text, $pattern, $n, $m)
	{
		// Number of characters in alphabet
		$d = 256;
		//  prime modulus is 101
		$q = 101;
		// text hash
		$t = 0;
		// pattern hash
		$p = 0;
		// hash function h
		$h = 1;
		// Loop controlling variable i and j
		$i = 0;
		$j = 0;
		// Calaculate pow(d, M-1)%q
		for ($i = 0; $i < $m - 1; $i++)
		{
			$h = ($h * $d) % $q;
		}
		// Calculate hash value
		for ($i = 0; $i < $m; $i++)
		{
			// hash of pattern
			$p = ($d * $p + $pattern[$i]) % $q;
			// hash of text
			$t = ($d * $t + $text[$i]) % $q;
		}
		// Display Given Data
		echo "\n Given text : ". $text;
		echo "\n Given pattern : ". $pattern ."\n";
		// Find pattern in text
		for ($i = 0; $i <= $n - $m; $i++)
		{
			if ($p == $t)
			{
				while ($j < $m && $text[$i + $j] == $pattern[$j])
				{
					$j++;
				}
				if ($j == $m)
				{
					// When pattern exists print its starting index
					echo " Pattern exists at index ". $i ." \n";
				}
				$j = 0;
			}
			if ($i < $n - $m)
			{
				$t = ($d * ($t - $text[$i] * $h) + $text[$i + $m]) % $q;
				if ($t < 0)
				{
					// When t is less than zero
					// Then change t
					$t = ($t + $q);
				}
			}
		}
	}
}

function main()
{
	$task = new RabinKarp();
	$text = "LXYZHEQXYZXYZQQHE11HXYZ1E";
	$pattern = "XYZ";
	$n = strlen($text);
	$m = strlen($pattern);
	$task->search($text, $pattern, $n, $m);
}
main();

Output

 Given text : LXYZHEQXYZXYZQQHE11HXYZ1E
 Given pattern : XYZ
 Pattern exists at index 1
 Pattern exists at index 7
 Pattern exists at index 10
 Pattern exists at index 20
/*
  Node Js Program 
  Rabin-karp algorithm for pattern searching
*/
class RabinKarp
{
	//  Handles the request to find pattern in given text
	search(text, pattern, n, m)
	{
		// Number of characters in alphabet
		var d = 256;
		//  prime modulus is 101
		var q = 101;
		// text hash
		var t = 0;
		// pattern hash
		var p = 0;
		// hash function h
		var h = 1;
		// Loop controlling variable i and j
		var i = 0;
		var j = 0;
		// Calaculate pow(d, M-1)%q
		for (i = 0; i < m - 1; i++)
		{
			h = (h * d) % q;
		}
		// Calculate hash value
		for (i = 0; i < m; i++)
		{
			// hash of pattern
			p = (d * p + pattern.charCodeAt(i)) % q;
			// hash of text
			t = (d * t + text.charCodeAt(i)) % q;
		}
		// Display Given Data
		process.stdout.write("\n Given text : " + text);
		process.stdout.write("\n Given pattern : " + pattern + "\n");
		// Find pattern in text
		for (i = 0; i <= n - m; i++)
		{
			if (p == t)
			{
				while (j < m && text.charCodeAt(i + j) == pattern.charCodeAt(j))
				{
					j++;
				}
				if (j == m)
				{
					// When pattern exists print its starting index
					process.stdout.write(" Pattern exists at index " + i + " \n");
				}
				j = 0;
			}
			if (i < n - m)
			{
				t = (d * (t - text.charCodeAt(i) * h) + text.charCodeAt(i + m)) % q;
				if (t < 0)
				{
					// When t is less than zero
					// Then change t
					t = (t + q);
				}
			}
		}
	}
}

function main()
{
	var task = new RabinKarp();
	var text = "LXYZHEQXYZXYZQQHE11HXYZ1E";
	var pattern = "XYZ";
	var n = text.length;
	var m = pattern.length;
	// Test
	task.search(text, pattern, n, m);
}
main();

Output

 Given text : LXYZHEQXYZXYZQQHE11HXYZ1E
 Given pattern : XYZ
 Pattern exists at index 1
 Pattern exists at index 7
 Pattern exists at index 10
 Pattern exists at index 20
#   Python 3 Program 
#   Rabin-karp algorithm for pattern searching

class RabinKarp :
	#   Handles the request to find pattern in given text
	def search(self, text, pattern, n, m) :
		#  Number of characters in alphabet
		d = 256
		#   prime modulus is 101
		q = 101
		#  text hash
		t = 0
		#  pattern hash
		p = 0
		#  hash function h
		h = 1
		#  Loop controlling variable i and j
		i = 0
		j = 0
		#  Calaculate pow(d, M-1)%q
		while (i < m - 1) :
			h = (h * d) % q
			i += 1
		
		i = 0
		#  Calculate hash value
		while (i < m) :
			#  hash of pattern
			p = (d * p + ord(pattern[i])) % q
			#  hash of text
			t = (d * t + ord(text[i])) % q
			i += 1
		
		#  Display Given Data
		print("\n Given text : ", text, end = "")
		print("\n Given pattern : ", pattern )
		i = 0
		#  Find pattern in text
		while (i <= n - m) :
			if (p == t) :
				while (j < m and ord(text[i + j]) == ord(pattern[j])) :
					j += 1
				
				if (j == m) :
					#  When pattern exists print its starting index
					print(" Pattern exists at index ", i ," ")
				
				j = 0
			
			if (i < n - m) :
				t = (d * (t - ord(text[i]) * h) + ord(text[i + m])) % q
				if (t < 0) :
					#  When t is less than zero
					#  Then change t
					t = (t + q)
			i += 1

def main() :
	task = RabinKarp()
	text = "LXYZHEQXYZXYZQQHE11HXYZ1E"
	pattern = "XYZ"
	n = len(text)
	m = len(pattern)
	#  Test
	task.search(text, pattern, n, m)

if __name__ == "__main__": main()

Output

 Given text :  LXYZHEQXYZXYZQQHE11HXYZ1E
 Given pattern :  XYZ
 Pattern exists at index  1
 Pattern exists at index  7
 Pattern exists at index  10
 Pattern exists at index  20
#   Ruby Program 
#   Rabin-karp algorithm for pattern searching

class RabinKarp 
	#   Handles the request to find pattern in given text
	def search(text, pattern, n, m) 
		#  Number of characters in alphabet
		d = 256
		#   prime modulus is 101
		q = 101
		#  text hash
		t = 0
		#  pattern hash
		p = 0
		#  hash function h
		h = 1
		#  Loop controlling variable i and j
		i = 0
		j = 0
		#  Calaculate pow(d, M-1)%q
		while (i < m - 1) 
			h = (h * d) % q
			i += 1
		end

		i = 0
		#  Calculate hash value
		while (i < m) 
			#  hash of pattern
			p = (d * p + pattern[i].ord) % q
			#  hash of text
			t = (d * t + text[i].ord) % q
			i += 1
		end

		#  Display Given Data
		print("\n Given text : ", text)
		print("\n Given pattern : ", pattern ,"\n")
		i = 0
		#  Find pattern in text
		while (i <= n - m) 
			if (p == t) 
				while (j < m && text[i + j] == pattern[j]) 
					j += 1
				end

				if (j == m) 
					#  When pattern exists print its starting index
					print(" Pattern exists at index ", i ," \n")
				end

				j = 0
			end

			if (i < n - m) 
				t = (d * (t - text[i].ord * h) + text[i + m].ord) % q
				if (t < 0) 
					#  When t is less than zero
					#  Then change t
					t = (t + q)
				end

			end

			i += 1
		end

	end

end

def main() 
	task = RabinKarp.new()
	text = "LXYZHEQXYZXYZQQHE11HXYZ1E"
	pattern = "XYZ"
	n = text.length
	m = pattern.length
	#  Test
	task.search(text, pattern, n, m)
end

main()

Output

 Given text : LXYZHEQXYZXYZQQHE11HXYZ1E
 Given pattern : XYZ
 Pattern exists at index 1 
 Pattern exists at index 7 
 Pattern exists at index 10 
 Pattern exists at index 20 
import scala.collection.mutable._;
/*
  Scala Program 
  Rabin-karp algorithm for pattern searching
*/
class RabinKarp
{
	//  Handles the request to find pattern in given text
	def search(text: String, pattern: String, n: Int, m: Int): Unit = {
		// Number of characters in alphabet
		var d: Int = 256;
		//  prime modulus is 101
		var q: Int = 101;
		// text hash
		var t: Int = 0;
		// pattern hash
		var p: Int = 0;
		// hash function h
		var h: Int = 1;
		// Loop controlling variable i and j
		var i: Int = 0;
		var j: Int = 0;
		// Calaculate pow(d, M-1)%q
		while (i < m - 1)
		{
			h = (h * d) % q;
			i += 1;
		}
		i = 0;
		// Calculate hash value
		while (i < m)
		{
			// hash of pattern
			p = (d * p + pattern.charAt(i)) % q;
			// hash of text
			t = (d * t + text.charAt(i)) % q;
			i += 1;
		}
		// Display Given Data
		print("\n Given text : " + text);
		print("\n Given pattern : " + pattern + "\n");
		i = 0;
		// Find pattern in text
		while (i <= n - m)
		{
			if (p == t)
			{
				while (j < m && text.charAt(i + j) == pattern.charAt(j))
				{
					j += 1;
				}
				if (j == m)
				{
					// When pattern exists print its starting index
					print(" Pattern exists at index " + i + " \n");
				}
				j = 0;
			}
			if (i < n - m)
			{
				t = (d * (t - text.charAt(i) * h) + text.charAt(i + m)) % q;
				if (t < 0)
				{
					// When t is less than zero
					// Then change t
					t = (t + q);
				}
			}
			i += 1;
		}
	}
}
object Main
{
	def main(args: Array[String]): Unit = {
		var task: RabinKarp = new RabinKarp();
		var text: String = "LXYZHEQXYZXYZQQHE11HXYZ1E";
		var pattern: String = "XYZ";
		var n: Int = text.length();
		var m: Int = pattern.length();
		// Test
		task.search(text, pattern, n, m);
	}
}

Output

 Given text : LXYZHEQXYZXYZQQHE11HXYZ1E
 Given pattern : XYZ
 Pattern exists at index 1
 Pattern exists at index 7
 Pattern exists at index 10
 Pattern exists at index 20
import Foundation
/*
  Swift 4 Program 
  Rabin-karp algorithm for pattern searching
*/

class RabinKarp
{
  	func  ascii(_ ch : Character) -> Int
    {
      
      	return Int(UnicodeScalar(String(ch))!.value);
    }
	//  Handles the request to find pattern in given text
	func search(_ txt: String, _ pt: String, _ n: Int, _ m: Int)
	{
      
  		let text = Array(txt);
      	let pattern = Array(pt);
		// Number of characters in alphabet
		let d: Int = 256;
		//  prime modulus is 101
		let q: Int = 101;
		// text hash
		var t: Int = 0;
		// pattern hash
		var p: Int = 0;
		// hash function h
		var h: Int = 1;
		// Loop controlling variable i and j
		var i: Int = 0;
		var j: Int = 0;
		// Calaculate pow(d, M-1)%q
		while (i < m - 1)
		{
			h = (h * d) % q;
			i += 1;
		}
		i = 0;
		// Calculate hash value
		while (i < m)
		{
			// hash of pattern
			p = (d * p + ascii(pattern[i])) % q;
			// hash of text
			t = (d * t + ascii(text[i])) % q;
			i += 1;
		}
		// Display Given Data
		print("\n Given text : ", txt, terminator: "");
		print("\n Given pattern : ", pt );
		i = 0;
		// Find pattern in text
		while (i <= n - m)
		{
			if (p == t)
			{
				while (j < m && text[i + j] == pattern[j])
				{
					j += 1;
				}
				if (j == m)
				{
					// When pattern exists print its starting index
					print(" Pattern exists at index ", i ," ");
				}
				j = 0;
			}
			if (i < n - m)
			{
				t = (d * (t - ascii(text[i]) * h) + ascii(text[i + m])) % q;
				if (t < 0)
				{
					// When t is less than zero
					// Then change t
					t = (t + q);
				}
			}
			i += 1;
		}
	}
}
func main()
{
	let task: RabinKarp = RabinKarp();
	let text: String = "LXYZHEQXYZXYZQQHE11HXYZ1E";
	let pattern: String = "XYZ";
	let n: Int = text.count;
	let m: Int = pattern.count;
	// Test
	task.search(text, pattern, n, m);
}
main();

Output

 Given text :  LXYZHEQXYZXYZQQHE11HXYZ1E
 Given pattern :  XYZ
 Pattern exists at index  1
 Pattern exists at index  7
 Pattern exists at index  10
 Pattern exists at index  20
/*
  Kotlin Program 
  Rabin-karp algorithm for pattern searching
*/
class RabinKarp
{
	//  Handles the request to find pattern in given text
	fun search(text: String, pattern: String, n: Int, m: Int): Unit
	{
		// Number of characters in alphabet
		var d: Int = 256;
		//  prime modulus is 101
		var q: Int = 101;
		// text hash
		var t: Int = 0;
		// pattern hash
		var p: Int = 0;
		// hash function h
		var h: Int = 1;
		// Loop controlling variable i and j
		var i: Int = 0;
		var j: Int = 0;
		// Calaculate pow(d, M-1)%q
		while (i < m - 1)
		{
			h = (h * d) % q;
			i += 1;
		}
		i = 0;
		// Calculate hash value
		while (i < m)
		{
			// hash of pattern
			p = (d * p + pattern.get(i).toInt()) % q;
			// hash of text
			t = (d * t + text.get(i).toInt()) % q;
			i += 1;
		}
		// Display Given Data
		print("\n Given text : " + text);
		print("\n Given pattern : " + pattern + "\n");
		i = 0;
		// Find pattern in text
		while (i <= n - m)
		{
			if (p == t)
			{
				while (j < m && text.get(i + j) == pattern.get(j))
				{
					j += 1;
				}
				if (j == m)
				{
					// When pattern exists print its starting index
					print(" Pattern exists at index " + i + " \n");
				}
				j = 0;
			}
			if (i < n - m)
			{
				t = (d * (t - text.get(i).toInt() * h) + text.get(i + m).toInt()) % q;
				if (t < 0)
				{
					// When t is less than zero
					// Then change t
					t = (t + q);
				}
			}
			i += 1;
		}
	}
}
fun main(args: Array < String > ): Unit
{
	var task: RabinKarp = RabinKarp();
	var text: String = "LXYZHEQXYZXYZQQHE11HXYZ1E";
	var pattern: String = "XYZ";
	var n: Int = text.length;
	var m: Int = pattern.length;
	// Test
	task.search(text, pattern, n, m);
}

Output

 Given text : LXYZHEQXYZXYZQQHE11HXYZ1E
 Given pattern : XYZ
 Pattern exists at index 1
 Pattern exists at index 7
 Pattern exists at index 10
 Pattern exists at index 20




Comment

Please share your knowledge to improve code and content standard. Also submit your doubts, and test case. We improve by your feedback. We will try to resolve your query as soon as possible.

New Comment