Posted on by Kalkicode
Code Algorithm

Boyer moore algorithm for pattern searching

The Boyer-Moore algorithm is a popular algorithm for string searching, particularly for finding occurrences of a pattern within a larger string. It was developed by Robert S. Boyer and J Strother Moore in 1977.

The algorithm works by comparing the pattern to the text from right to left. It starts by aligning the rightmost character of the pattern with the corresponding character in the text. If these characters match, the algorithm proceeds leftward, comparing the next character of the pattern with the corresponding character in the text, until either a mismatch is found or the entire pattern is matched.

If a mismatch is found, the algorithm uses two precomputed tables, the "bad character" table and the "good suffix" table, to determine how far to shift the pattern to the right. The bad character table allows the algorithm to shift the pattern so that the mismatched character in the text aligns with the last occurrence of that character in the pattern, while the good suffix table allows the algorithm to shift the pattern to align with the longest suffix of the pattern that matches a prefix of the text.

The algorithm continues this process of shifting the pattern to the right until either the pattern is found in the text or the end of the text is reached.

Overall, the Boyer-Moore algorithm is a very efficient algorithm for string searching, especially when the pattern being searched for is relatively long. It has an average case time complexity of O(n/m), where n is the length of the text and m is the length of the pattern.

Here given code implementation process.

// Java program for
// Boyer moore algorithm for pattern searching
public class Matching
{
	public void findPattern(String text, String pattern)
	{
		// Get the length of given text
		int n = text.length();
		// Get the length of given pattern
		int m = pattern.length();
		// Possible character
		int badchar[] = new int[256];
		// Set initial frequency
		for (int i = 0; i < 256; ++i)
		{
			badchar[i] = -1;
		}
		// Set pattern character position
		for (int i = 0; i < m; ++i)
		{
			badchar[pattern.charAt(i)] = i;
		}
		int j = m - 1;
		int s = 0;
		while (s <= (n - m))
		{
			while (j >= 0 && 
                   pattern.charAt(j) == text.charAt(s + j))
			{
				j--;
			}
			if (j < 0)
			{
				System.out.println("Pattern found at index " + (s));
				if (s + m < n)
				{
					s += m - badchar[text.charAt(s + m)];
				}
				else
				{
					s += 1;
				}
			}
			else
			{
				s += Math.max(1, j - badchar[text.charAt(s + j)]);
			}
			// Rest the value of j
			j = m - 1;
		}
	}
	public static void main(String[] args)
	{
		Matching task = new Matching();
		// Given text
		String text = "LXYZHEQXYZXYZQQHE11HXYZ1E";
		// Search pattern
		String pattern = "XYZ";
		// Find pattern which occurs in given text
		/*
		    Text = LXYZHEQXYZXYZQQHE11HXYZ1E
		    Pattern = XYZ
		    --------------------------------
		    ➀  LXYZHEQXYZXYZQQHE11HXYZ1E
		        ---
		    ➁  LXYZHEQXYZXYZQQHE11HXYZ1E 
		              ---
		    ➂  LXYZHEQXYZXYZQQHE11HXYZ1E 
		                 ---
		    ➃  LXYZHEQXYZXYZQQHE11HXYZ1E 
		                            ---
		*/
		task.findPattern(text, pattern);
	}
}

Output

Pattern found at index 1
Pattern found at index 7
Pattern found at index 10
Pattern found at index 20
// Include header file
#include <iostream>
#include <string>
#include <math.h>

using namespace std;
// C++ program for
// Boyer moore algorithm for pattern searching
class Matching
{
	public: void findPattern(string text, string pattern)
	{
		// Get the length of given text
		int n = text.length();
		// Get the length of given pattern
		int m = pattern.length();
		// Possible character
		int badchar[256];
		// Set initial frequency
		for (int i = 0; i < 256; ++i)
		{
			badchar[i] = -1;
		}
		// Set pattern character position
		for (int i = 0; i < m; ++i)
		{
			badchar[pattern[i]] = i;
		}
		int j = m - 1;
		int s = 0;
		while (s <= (n - m))
		{
			while (j >= 0 && pattern[j] == text[s + j])
			{
				j--;
			}
			if (j < 0)
			{
				cout << "Pattern found at index " 
              		 << s << endl;
				if (s + m < n)
				{
					s += m - badchar[text[s + m]];
				}
				else
				{
					s += 1;
				}
			}
			else
			{
				s += max(1, j - badchar[text[s + j]]);
			}
			// Rest the value of j
			j = m - 1;
		}
	}
};
int main()
{
	Matching *task = new Matching();
	// Given text
	string text = "LXYZHEQXYZXYZQQHE11HXYZ1E";
	// Search pattern
	string pattern = "XYZ";
	// Find pattern which occurs in given text
	/*
	    Text = LXYZHEQXYZXYZQQHE11HXYZ1E
	    Pattern = XYZ
	    --------------------------------
	    ➀  LXYZHEQXYZXYZQQHE11HXYZ1E
	        ---
	    ➁  LXYZHEQXYZXYZQQHE11HXYZ1E 
	              ---
	    ➂  LXYZHEQXYZXYZQQHE11HXYZ1E 
	                 ---
	    ➃  LXYZHEQXYZXYZQQHE11HXYZ1E 
	                            ---
	*/
	task->findPattern(text, pattern);
	return 0;
}

Output

Pattern found at index 1
Pattern found at index 7
Pattern found at index 10
Pattern found at index 20
// Include namespace system
using System;
// Csharp program for
// Boyer moore algorithm for pattern searching
public class Matching
{
	public void findPattern(String text, String pattern)
	{
		// Get the length of given text
		int n = text.Length;
		// Get the length of given pattern
		int m = pattern.Length;
		// Possible character
		int[] badchar = new int[256];
		// Set initial frequency
		for (int i = 0; i < 256; ++i)
		{
			badchar[i] = -1;
		}
		// Set pattern character position
		for (int i = 0; i < m; ++i)
		{
			badchar[pattern[i]] = i;
		}
		int j = m - 1;
		int s = 0;
		while (s <= (n - m))
		{
			while (j >= 0 && pattern[j] == text[s + j])
			{
				j--;
			}
			if (j < 0)
			{
				Console.WriteLine("Pattern found at index " + s);
				if (s + m < n)
				{
					s += m - badchar[text[s + m]];
				}
				else
				{
					s += 1;
				}
			}
			else
			{
				s += Math.Max(1, j - badchar[text[s + j]]);
			}
			// Rest the value of j
			j = m - 1;
		}
	}
	public static void Main(String[] args)
	{
		Matching task = new Matching();
		// Given text
		String text = "LXYZHEQXYZXYZQQHE11HXYZ1E";
		// Search pattern
		String pattern = "XYZ";
		// Find pattern which occurs in given text
		/*
		    Text = LXYZHEQXYZXYZQQHE11HXYZ1E
		    Pattern = XYZ
		    --------------------------------
		    ➀  LXYZHEQXYZXYZQQHE11HXYZ1E
		        ---
		    ➁  LXYZHEQXYZXYZQQHE11HXYZ1E 
		              ---
		    ➂  LXYZHEQXYZXYZQQHE11HXYZ1E 
		                 ---
		    ➃  LXYZHEQXYZXYZQQHE11HXYZ1E 
		                            ---
		*/
		task.findPattern(text, pattern);
	}
}

Output

Pattern found at index 1
Pattern found at index 7
Pattern found at index 10
Pattern found at index 20
package main
import "fmt"
// Go program for
// Boyer moore algorithm for pattern searching
type Matching struct {}
func getMatching() * Matching {
	var me *Matching = &Matching {}
	return me
}
func(this Matching) max(a,b int) int {
	
	if a > b {
		
		return a
	}
	return b
}
func(this Matching) findPattern(text, pattern string) {
	// Get the length of given text
	var n int = len(text)
	// Get the length of given pattern
	var m int = len(pattern)
	// Possible character
	var badchar = make([] int, 256)
	// Set initial frequency
	for i := 0 ; i < 256 ; i++ {
		badchar[i] = -1
	}
	// Set pattern character position
	for i := 0 ; i < m ; i++ {
		badchar[pattern[i]] = i
	}
	var j int = m - 1
	var s int = 0
	for (s <= (n - m)) {
		for (j >= 0 && pattern[j] == text[s + j]) {
			j--
		}
		if j < 0 {
			fmt.Println("Pattern found at index ", s)
			if s + m < n {
				s += m - badchar[text[s + m]]
			} else {
				s += 1
			}
		} else {
			s += this.max(1, j - badchar[text[s + j]])
		}
		// Rest the value of j
		j = m - 1
	}
}
func main() {
	var task * Matching = getMatching()
	// Given text
	var text string = "LXYZHEQXYZXYZQQHE11HXYZ1E"
	// Search pattern
	var pattern string = "XYZ"
	// Find pattern which occurs in given text
	/*
	    Text = LXYZHEQXYZXYZQQHE11HXYZ1E
	    Pattern = XYZ
	    --------------------------------
	    ➀  LXYZHEQXYZXYZQQHE11HXYZ1E
	        ---
	    ➁  LXYZHEQXYZXYZQQHE11HXYZ1E 
	              ---
	    ➂  LXYZHEQXYZXYZQQHE11HXYZ1E 
	                 ---
	    ➃  LXYZHEQXYZXYZQQHE11HXYZ1E 
	                            ---
	*/
	task.findPattern(text, pattern)
}

Output

Pattern found at index 2
Pattern found at index 5
Pattern found at index 6
<?php
// Php program for
// Boyer moore algorithm for pattern searching
class Matching
{
	public	function findPattern($text, $pattern)
	{
		// Get the length of given text
		$n = strlen($text);
		// Get the length of given pattern
		$m = strlen($pattern);
		// 256 Possible character
		// Set initial frequency -1
		$badchar = array_fill(0, 256, -1);
		// Set pattern character position
		for ($i = 0; $i < $m; ++$i)
		{
			$badchar[ord($pattern[$i])] = $i;
		}
		$j = $m - 1;
		$s = 0;
		while ($s <= ($n - $m))
		{
			while ($j >= 0 && $pattern[$j] == $text[$s + $j])
			{
				$j--;
			}
			if ($j < 0)
			{
				echo("Pattern found at index ".$s."\n");
				if ($s + $m < $n)
				{
					$s += $m - $badchar[ord($text[$s + $m])];
				}
				else
				{
					$s += 1;
				}
			}
			else
			{
				$s += max(1, $j - $badchar[ord($text[$s + $j])]);
			}
			// Rest the value of j
			$j = $m - 1;
		}
	}
}

function main()
{
	$task = new Matching();
	// Given text
	$text = "LXYZHEQXYZXYZQQHE11HXYZ1E";
	// Search pattern
	$pattern = "XYZ";
	// Find pattern which occurs in given text
	/*
	    Text = LXYZHEQXYZXYZQQHE11HXYZ1E
	    Pattern = XYZ
	    --------------------------------
	    ➀  LXYZHEQXYZXYZQQHE11HXYZ1E
	        ---
	    ➁  LXYZHEQXYZXYZQQHE11HXYZ1E 
	              ---
	    ➂  LXYZHEQXYZXYZQQHE11HXYZ1E 
	                 ---
	    ➃  LXYZHEQXYZXYZQQHE11HXYZ1E 
	                            ---
	*/
	$task->findPattern($text, $pattern);
}
main();

Output

Pattern found at index 1
Pattern found at index 7
Pattern found at index 10
Pattern found at index 20
// Node JS program for
// Boyer moore algorithm for pattern searching
class Matching
{
	findPattern(text, pattern)
	{
		// Get the length of given text
		var n = text.length;
		// Get the length of given pattern
		var m = pattern.length;
		// 256 Possible character
		// Set initial frequency -1
		var badchar = Array(256).fill(-1);
		// Set pattern character position
		for (var i = 0; i < m; ++i)
		{
			badchar[pattern.charAt(i).charCodeAt(0)] = i;
		}
		var j = m - 1;
		var s = 0;
		while (s <= (n - m))
		{
			while (j >= 0 && pattern.charAt(j) == text.charAt(s + j))
			{
				j--;
			}
			if (j < 0)
			{
				console.log("Pattern found at index " + s);
				if (s + m < n)
				{
					s += m - badchar[text.charCodeAt(s + m)];
				}
				else
				{
					s += 1;
				}
			}
			else
			{
				s += Math.max(1, j - 
                              badchar[text.charCodeAt(s + j)]);
			}
			// Rest the value of j
			j = m - 1;
		}
	}
}

function main()
{
	var task = new Matching();
	// Given text
	var text = "LXYZHEQXYZXYZQQHE11HXYZ1E";
	// Search pattern
	var pattern = "XYZ";
	// Find pattern which occurs in given text
	/*
	    Text = LXYZHEQXYZXYZQQHE11HXYZ1E
	    Pattern = XYZ
	    --------------------------------
	    ➀  LXYZHEQXYZXYZQQHE11HXYZ1E
	        ---
	    ➁  LXYZHEQXYZXYZQQHE11HXYZ1E 
	              ---
	    ➂  LXYZHEQXYZXYZQQHE11HXYZ1E 
	                 ---
	    ➃  LXYZHEQXYZXYZQQHE11HXYZ1E 
	                            ---
	*/
	task.findPattern(text, pattern);
}
main();

Output

Pattern found at index 1
Pattern found at index 7
Pattern found at index 10
Pattern found at index 20
#  Python 3 program for
#  Boyer moore algorithm for pattern searching
class Matching :
	def max(self, a, b) :
		if (a > b) :
			return a
		
		return b
	
	def findPattern(self, text, pattern) :
		#  Get the length of given text
		n = len(text)
		#  Get the length of given pattern
		m = len(pattern)
		#  256 Possible character
		#  Set initial frequency -1
		badchar = [-1] * (256)
		i = 0
		#  Set pattern character position
		while (i < m) :
			badchar[ord(pattern[i])] = i
			i += 1
		
		j = m - 1
		s = 0
		while (s <= (n - m)) :
			while (j >= 0 and pattern[j] == text[s + j]) :
				j -= 1
			
			if (j < 0) :
				print("Pattern found at index ", s)
				if (s + m < n) :
					s += m - badchar[ord(text[s + m])]
				else :
					s += 1
				
			else :
				s += self.max(1, j - badchar[ord(text[s + j])])
			
			#  Rest the value of j
			j = m - 1
		
	

def main() :
	task = Matching()
	#  Given text
	text = "LXYZHEQXYZXYZQQHE11HXYZ1E"
	#  Search pattern
	pattern = "XYZ"
	#  Find pattern which occurs in given text
	#    Text = LXYZHEQXYZXYZQQHE11HXYZ1E
	#    Pattern = XYZ
	#    --------------------------------
	#    ➀  LXYZHEQXYZXYZQQHE11HXYZ1E
	#        ---
	#    ➁  LXYZHEQXYZXYZQQHE11HXYZ1E 
	#              ---
	#    ➂  LXYZHEQXYZXYZQQHE11HXYZ1E 
	#                 ---
	#    ➃  LXYZHEQXYZXYZQQHE11HXYZ1E 
	#                           ---
	task.findPattern(text, pattern)

if __name__ == "__main__": main()

Output

Pattern found at index  1
Pattern found at index  7
Pattern found at index  10
Pattern found at index  20
#  Ruby program for
#  Boyer moore algorithm for pattern searching
class Matching 
	def max(a, b) 
		if (a > b) 
			return a
		end

		return b
	end

	def findPattern(text, pattern) 
		#  Get the length of given text
		n = text.length
		#  Get the length of given pattern
		m = pattern.length
		#  256 Possible character
		#  Set initial frequency -1
		badchar = Array.new(256) {-1}
		i = 0
		#  Set pattern character position
		while (i < m) 
			badchar[pattern[i].ord] = i
			i += 1
		end

		j = m - 1
		s = 0
		while (s <= (n - m)) 
			while (j >= 0 && pattern[j] == text[s + j]) 
				j -= 1
			end

			if (j < 0) 
				print("Pattern found at index ", s, "\n")
				if (s + m < n) 
					s += m - badchar[text[s + m].ord]
				else
 
					s += 1
				end

			else
 
				s += self.max(1, j - badchar[text[s + j].ord])
			end

			#  Rest the value of j
			j = m - 1
		end

	end

end

def main() 
	task = Matching.new()
	#  Given text
	text = "LXYZHEQXYZXYZQQHE11HXYZ1E"
	#  Search pattern
	pattern = "XYZ"
	#  Find pattern which occurs in given text
	#    Text = LXYZHEQXYZXYZQQHE11HXYZ1E
	#    Pattern = XYZ
	#    --------------------------------
	#    ➀  LXYZHEQXYZXYZQQHE11HXYZ1E
	#        ---
	#    ➁  LXYZHEQXYZXYZQQHE11HXYZ1E 
	#              ---
	#    ➂  LXYZHEQXYZXYZQQHE11HXYZ1E 
	#                 ---
	#    ➃  LXYZHEQXYZXYZQQHE11HXYZ1E 
	#                            ---
	task.findPattern(text, pattern)
end

main()

Output

Pattern found at index 1
Pattern found at index 7
Pattern found at index 10
Pattern found at index 20
import scala.collection.mutable._;
// Scala program for
// Boyer moore algorithm for pattern searching
class Matching()
{
	def max(a: Int, b: Int): Int = {
		if (a > b)
		{
			return a;
		}
		return b;
	}
	def findPattern(text: String, pattern: String): Unit = {
		// Get the length of given text
		var n: Int = text.length();
		// Get the length of given pattern
		var m: Int = pattern.length();
		// 256 Possible character
		// Set initial frequency -1
		var badchar: Array[Int] = Array.fill[Int](256)(-1);
		var i: Int = 0;
		// Set pattern character position
		while (i < m)
		{
			badchar(pattern.charAt(i)) = i;
			i += 1;
		}
		var j: Int = m - 1;
		var s: Int = 0;
		while (s <= (n - m))
		{
			while (j >= 0 && 
                   pattern.charAt(j) == text.charAt(s + j))
			{
				j -= 1;
			}
			if (j < 0)
			{
				println("Pattern found at index " + s);
				if (s + m < n)
				{
					s += m - badchar(text.charAt(s + m));
				}
				else
				{
					s += 1;
				}
			}
			else
			{
				s += max(1, j - badchar(text.charAt(s + j)));
			}
			// Rest the value of j
			j = m - 1;
		}
	}
}
object Main
{
	def main(args: Array[String]): Unit = {
		var task: Matching = new Matching();
		// Given text
		var text: String = "LXYZHEQXYZXYZQQHE11HXYZ1E";
		// Search pattern
		var pattern: String = "XYZ";
		// Find pattern which occurs in given text
		/*
		    Text = LXYZHEQXYZXYZQQHE11HXYZ1E
		    Pattern = XYZ
		    --------------------------------
		    ➀  LXYZHEQXYZXYZQQHE11HXYZ1E
		        ---
		    ➁  LXYZHEQXYZXYZQQHE11HXYZ1E 
		              ---
		    ➂  LXYZHEQXYZXYZQQHE11HXYZ1E 
		                 ---
		    ➃  LXYZHEQXYZXYZQQHE11HXYZ1E 
		                            ---
		*/
		task.findPattern(text, pattern);
	}
}

Output

Pattern found at index 1
Pattern found at index 7
Pattern found at index 10
Pattern found at index 20
import Foundation;
// Swift 4 program for
// Boyer moore algorithm for pattern searching
class Matching
{
	func max(_ a: Int, _ b: Int) -> Int
	{
		if (a > b)
		{
			return a;
		}
		return b;
	}
	func findPattern(_ t: String, _ p: String)
	{
      	let text = Array(t);
      	let pattern = Array(p);
		// Get the length of given text
		let n: Int = text.count;
		// Get the length of given pattern
		let m: Int = pattern.count;
		// 256 Possible character
		// Set initial frequency -1
		var badchar: [Int] = Array(repeating: -1, count: 256);
		var i: Int = 0;
		// Set pattern character position
		while (i < m)
		{
			badchar[Int(UnicodeScalar(
              String(pattern[i]))!.value)] = i;
			i += 1;
		}
		var j: Int = m - 1;
		var s: Int = 0;
		while (s <= (n - m))
		{
			while (j >= 0 && pattern[j] == text[s + j])
			{
				j -= 1;
			}
			if (j < 0)
			{
				print("Pattern found at index ", s);
				if (s + m < n)
				{
					s += m - badchar[
                      Int(UnicodeScalar(String(text[s + m]))!.value)];
				}
				else
				{
					s += 1;
				}
			}
			else
			{
				s += self.max(1, j - 
                              badchar[Int(UnicodeScalar(
                                String(text[s + j]))!.value)]);
			}
			// Rest the value of j
			j = m - 1;
		}
	}
}
func main()
{
	let task: Matching = Matching();
	// Given text
	let text: String = "LXYZHEQXYZXYZQQHE11HXYZ1E";
	// Search pattern
	let pattern: String = "XYZ";
	// Find pattern which occurs in given text
	/*
	    Text = LXYZHEQXYZXYZQQHE11HXYZ1E
	    Pattern = XYZ
	    --------------------------------
	    ➀  LXYZHEQXYZXYZQQHE11HXYZ1E
	        ---
	    ➁  LXYZHEQXYZXYZQQHE11HXYZ1E 
	              ---
	    ➂  LXYZHEQXYZXYZQQHE11HXYZ1E 
	                 ---
	    ➃  LXYZHEQXYZXYZQQHE11HXYZ1E 
	                            ---
	*/
	task.findPattern(text, pattern);
}
main();

Output

Pattern found at index  1
Pattern found at index  7
Pattern found at index  10
Pattern found at index  20
// Kotlin program for
// Boyer moore algorithm for pattern searching
class Matching
{
	fun max(a: Int, b: Int): Int
	{
		if (a > b)
		{
			return a;
		}
		return b;
	}
	fun findPattern(text: String, pattern: String): Unit
	{
		// Get the length of given text
		val n: Int = text.length;
		// Get the length of given pattern
		val m: Int = pattern.length;
		// 256 Possible character
		// Set initial frequency -1
		val badchar: Array < Int > = Array(256)
		{
			0
		};
		var i: Int = 0;
		// Set pattern character position
		while (i < m)
		{
			badchar[pattern.get(i).toInt()] = i;
			i += 1;
		}
		var j: Int = m - 1;
		var s: Int = 0;
		while (s <= (n - m))
		{
			while (j >= 0 && pattern.get(j) == text.get(s + j))
			{
				j -= 1;
			}
			if (j < 0)
			{
				println("Pattern found at index " + s);
				if (s + m < n)
				{
					s += m - badchar[text.get(s + m).toInt()];
				}
				else
				{
					s += 1;
				}
			}
			else
			{
				s += this.max(1, j - badchar[text.get(s + j).toInt()]);
			}
			// Rest the value of j
			j = m - 1;
		}
	}
}
fun main(args: Array < String > ): Unit
{
	val task: Matching = Matching();
	// Given text
	val text: String = "LXYZHEQXYZXYZQQHE11HXYZ1E";
	// Search pattern
	val pattern: String = "XYZ";
	// Find pattern which occurs in given text
	/*
	    Text = LXYZHEQXYZXYZQQHE11HXYZ1E
	    Pattern = XYZ
	    --------------------------------
	    ➀  LXYZHEQXYZXYZQQHE11HXYZ1E
	        ---
	    ➁  LXYZHEQXYZXYZQQHE11HXYZ1E 
	              ---
	    ➂  LXYZHEQXYZXYZQQHE11HXYZ1E 
	                 ---
	    ➃  LXYZHEQXYZXYZQQHE11HXYZ1E 
	                           ---
	*/
	task.findPattern(text, pattern);
}

Output

Pattern found at index 1
Pattern found at index 7
Pattern found at index 10
Pattern found at index 20

Comment

Please share your knowledge to improve code and content standard. Also submit your doubts, and test case. We improve by your feedback. We will try to resolve your query as soon as possible.

New Comment