String matching with finite automata
String matching with finite automata is a pattern matching algorithm that uses a finite automaton (also called a finite state machine) to search for a given pattern within a text. The algorithm works by constructing a finite automaton that recognizes the pattern and then using it to scan the text, character by character, to determine if the pattern occurs in the text.
To build the finite automaton, the algorithm first constructs a set of states that represent the different possible matches of the pattern within the text. Each state corresponds to a particular position in the pattern, and the transitions between the states are determined by the characters in the pattern. Specifically, each transition represents the next possible state that the automaton can reach based on the next character in the text.
Once the finite automaton is constructed, the algorithm scans the text one character at a time, updating the state of the automaton based on the current character. If the automaton reaches an accepting state (which represents a complete match of the pattern), the algorithm reports a match.
The advantage of using a finite automaton for string matching is that the algorithm can recognize patterns in linear time, regardless of the length of the text. This makes it an efficient algorithm for searching large amounts of text for a given pattern.
Here given code implementation process.
// Java program for
// String matching with finite automata
public class Matching
{
public int findNextState(String text, int m, int state, int x)
{
if (state < m && x == text.charAt(state))
{
return state + 1;
}
int i = 0;
for (int p = state; p > 0; --p)
{
if (text.charAt(p - 1) == x)
{
while (i < p - 1 &&
text.charAt(i) == text.charAt(state - p + 1 + i))
{
i++;
}
if (i == p - 1)
{
return p;
}
i = 0;
}
}
return 0;
}
public void construction(String text, int m, int[][] track)
{
for (int state = 0; state <= m; ++state)
{
// 256 indicate all possible characters
for (int x = 0; x < 256; ++x)
{
track[state][x] = findNextState(text, m, state, x);
}
}
}
public void findPattern(String text, String pattern)
{
// Get the length of given text
int m = text.length();
// Get the length of given pattern
int n = pattern.length();
// m is a length of given text
// and 256 are indicate possible characters
int[][] track = new int[m + 1][256];
construction(text, m, track);
int state = 0;
for (int i = 0; i < n; i++)
{
state = track[state][pattern.charAt(i)];
if (state == m)
{
System.out.println("Pattern found at index " + (i - m + 1));
}
}
}
public static void main(String[] args)
{
Matching task = new Matching();
// Given text
String text = "LXYZHEQXYZXYZQQHE11HXYZ1E";
// Search pattern
String pattern = "XYZ";
// Find pattern which occurs in given text
/*
Text = LXYZHEQXYZXYZQQHE11HXYZ1E
Pattern = XYZ
--------------------------------
➀ LXYZHEQXYZXYZQQHE11HXYZ1E
---
➁ LXYZHEQXYZXYZQQHE11HXYZ1E
---
➂ LXYZHEQXYZXYZQQHE11HXYZ1E
---
➃ LXYZHEQXYZXYZQQHE11HXYZ1E
---
*/
task.findPattern(pattern, text);
}
}
Output
Pattern found at index 1
Pattern found at index 7
Pattern found at index 10
Pattern found at index 20
// Include header file
#include <iostream>
#include <string>
using namespace std;
// C++ program for
// String matching with finite automata
class Matching
{
public: int findNextState(
string text,
int m,
int state,
int x)
{
if (state < m && x == text[state])
{
return state + 1;
}
int i = 0;
for (int p = state; p > 0; --p)
{
if (text[p - 1] == x)
{
while (i < p - 1 &&
text[i] == text[state - p + 1 + i])
{
i++;
}
if (i == p - 1)
{
return p;
}
i = 0;
}
}
return 0;
}
void construction(string text,
int m,
int track[][256])
{
for (int state = 0; state <= m; ++state)
{
// 256 indicate all possible characters
for (int x = 0; x < 256; ++x)
{
track[state][x] =
this->findNextState(text, m, state, x);
}
}
}
void findPattern(string text, string pattern)
{
// Get the length of given text
int m = text.length();
// Get the length of given pattern
int n = pattern.length();
// m is a length of given text
// and 256 are indicate possible characters
int track[m + 1][256];
this->construction(text, m, track);
int state = 0;
for (int i = 0; i < n; i++)
{
state = track[state][pattern[i]];
if (state == m)
{
cout << "Pattern found at index "
<< (i - m + 1) << endl;
}
}
}
};
int main()
{
Matching *task = new Matching();
// Given text
string text = "LXYZHEQXYZXYZQQHE11HXYZ1E";
// Search pattern
string pattern = "XYZ";
// Find pattern which occurs in given text
/*
Text = LXYZHEQXYZXYZQQHE11HXYZ1E
Pattern = XYZ
--------------------------------
➀ LXYZHEQXYZXYZQQHE11HXYZ1E
---
➁ LXYZHEQXYZXYZQQHE11HXYZ1E
---
➂ LXYZHEQXYZXYZQQHE11HXYZ1E
---
➃ LXYZHEQXYZXYZQQHE11HXYZ1E
---
*/
task->findPattern(pattern, text);
return 0;
}
Output
Pattern found at index 1
Pattern found at index 7
Pattern found at index 10
Pattern found at index 20
// Include namespace system
using System;
// Csharp program for
// String matching with finite automata
public class Matching
{
public int findNextState(String text,
int m,
int state,
int x)
{
if (state < m && x == text[state])
{
return state + 1;
}
int i = 0;
for (int p = state; p > 0; --p)
{
if (text[p - 1] == x)
{
while (i < p - 1 &&
text[i] == text[state - p + 1 + i])
{
i++;
}
if (i == p - 1)
{
return p;
}
i = 0;
}
}
return 0;
}
public void construction(String text,
int m,
int[,] track)
{
for (int state = 0; state <= m; ++state)
{
// 256 indicate all possible characters
for (int x = 0; x < 256; ++x)
{
track[state,x] =
this.findNextState(text, m, state, x);
}
}
}
public void findPattern(String text, String pattern)
{
// Get the length of given text
int m = text.Length;
// Get the length of given pattern
int n = pattern.Length;
// m is a length of given text
// and 256 are indicate possible characters
int[,] track = new int[m + 1,256];
this.construction(text, m, track);
int state = 0;
for (int i = 0; i < n; i++)
{
state = track[state,pattern[i]];
if (state == m)
{
Console.WriteLine("Pattern found at index " +
(i - m + 1));
}
}
}
public static void Main(String[] args)
{
Matching task = new Matching();
// Given text
String text = "LXYZHEQXYZXYZQQHE11HXYZ1E";
// Search pattern
String pattern = "XYZ";
// Find pattern which occurs in given text
/*
Text = LXYZHEQXYZXYZQQHE11HXYZ1E
Pattern = XYZ
--------------------------------
➀ LXYZHEQXYZXYZQQHE11HXYZ1E
---
➁ LXYZHEQXYZXYZQQHE11HXYZ1E
---
➂ LXYZHEQXYZXYZQQHE11HXYZ1E
---
➃ LXYZHEQXYZXYZQQHE11HXYZ1E
---
*/
task.findPattern(pattern, text);
}
}
Output
Pattern found at index 1
Pattern found at index 7
Pattern found at index 10
Pattern found at index 20
package main
import "fmt"
// Go program for
// String matching with finite automata
type Matching struct {}
func getMatching() * Matching {
var me *Matching = &Matching {}
return me
}
func(this Matching) findNextState(text string,
m int,
state int,
x int) int {
if state < m && x == int(text[state]) {
return state + 1
}
var i int = 0
for p := state ; p > 0 ; p-- {
if int(text[p - 1]) == x {
for (i < p - 1 && text[i] == text[state - p + 1 + i]) {
i++
}
if i == p - 1 {
return p
}
i = 0
}
}
return 0
}
func(this Matching) construction(text string,
m int, track[][] int) {
for state := 0 ; state <= m ; state++ {
// 256 indicate all possible characters
for x := 0 ; x < 256 ; x++ {
track[state][x] = this.findNextState(text, m, state, x)
}
}
}
func(this Matching) findPattern(text, pattern string) {
// Get the length of given text
var m int = len(text)
// Get the length of given pattern
var n int = len(pattern)
// m is a length of given text
// and 256 are indicate possible characters
var track = make([][] int, m + 1)
for i := 0;i < m + 1;i++{
track[i] = make([] int, 256)
}
this.construction(text, m, track)
var state int = 0
for i := 0 ; i < n ; i++ {
state = track[state][pattern[i]]
if state == m {
fmt.Println("Pattern found at index ", (i - m + 1))
}
}
}
func main() {
var task * Matching = getMatching()
// Given text
var text string = "LXYZHEQXYZXYZQQHE11HXYZ1E"
// Search pattern
var pattern string = "XYZ"
// Find pattern which occurs in given text
/*
Text = LXYZHEQXYZXYZQQHE11HXYZ1E
Pattern = XYZ
--------------------------------
➀ LXYZHEQXYZXYZQQHE11HXYZ1E
---
➁ LXYZHEQXYZXYZQQHE11HXYZ1E
---
➂ LXYZHEQXYZXYZQQHE11HXYZ1E
---
➃ LXYZHEQXYZXYZQQHE11HXYZ1E
---
*/
task.findPattern(pattern, text)
}
Output
Pattern found at index 1
Pattern found at index 7
Pattern found at index 10
Pattern found at index 20
<?php
// Php program for
// String matching with finite automata
class Matching
{
public function findNextState($text, $m, $state, $x)
{
if ($state < $m && $x == ord($text[$state]))
{
return $state + 1;
}
$i = 0;
for ($p = $state; $p > 0; --$p)
{
if (ord($text[$p - 1]) == $x)
{
while ($i < $p - 1 &&
$text[$i] == $text[$state - $p + 1 + $i])
{
$i++;
}
if ($i == $p - 1)
{
return $p;
}
$i = 0;
}
}
return 0;
}
public function construction($text, $m, &$track)
{
for ($state = 0; $state <= $m; ++$state)
{
// 256 indicate all possible characters
for ($x = 0; $x < 256; ++$x)
{
$track[$state][$x] =
$this->findNextState($text, $m, $state, $x);
}
}
}
public function findPattern($text, $pattern)
{
// Get the length of given text
$m = strlen($text);
// Get the length of given pattern
$n = strlen($pattern);
// m is a length of given text
// and 256 are indicate possible characters
$track = array_fill(0, $m + 1, array_fill(0, 256, 0));
$this->construction($text, $m, $track);
$state = 0;
for ($i = 0; $i < $n; $i++)
{
$state = $track[$state][ord($pattern[$i])];
if ($state == $m)
{
echo("Pattern found at index ".($i - $m + 1)."\n");
}
}
}
}
function main()
{
$task = new Matching();
// Given text
$text = "LXYZHEQXYZXYZQQHE11HXYZ1E";
// Search pattern
$pattern = "XYZ";
// Find pattern which occurs in given text
/*
Text = LXYZHEQXYZXYZQQHE11HXYZ1E
Pattern = XYZ
--------------------------------
➀ LXYZHEQXYZXYZQQHE11HXYZ1E
---
➁ LXYZHEQXYZXYZQQHE11HXYZ1E
---
➂ LXYZHEQXYZXYZQQHE11HXYZ1E
---
➃ LXYZHEQXYZXYZQQHE11HXYZ1E
---
*/
$task->findPattern($pattern, $text);
}
main();
Output
Pattern found at index 1
Pattern found at index 7
Pattern found at index 10
Pattern found at index 20
// Node JS program for
// String matching with finite automata
class Matching
{
findNextState(text, m, state, x)
{
if (state < m && x == text.charAt(state).charCodeAt(0))
{
return state + 1;
}
var i = 0;
for (var p = state; p > 0; --p)
{
if (text.charAt(p - 1).charCodeAt(0) == x)
{
while (i < p - 1 &&
text.charAt(i) == text.charAt(state - p + 1 + i))
{
i++;
}
if (i == p - 1)
{
return p;
}
i = 0;
}
}
return 0;
}
construction(text, m, track)
{
for (var state = 0; state <= m; ++state)
{
// 256 indicate all possible characters
for (var x = 0; x < 256; ++x)
{
track[state][x] =
this.findNextState(text, m, state, x);
}
}
}
findPattern(text, pattern)
{
// Get the length of given text
var m = text.length;
// Get the length of given pattern
var n = pattern.length;
// m is a length of given text
// and 256 are indicate possible characters
var track = Array(m + 1).fill(0).map(() =>
new Array(256).fill(0));
this.construction(text, m, track);
var state = 0;
for (var i = 0; i < n; i++)
{
state = track[state][pattern.charAt(i).charCodeAt(0)];
if (state == m)
{
console.log("Pattern found at index " +
(i - m + 1));
}
}
}
}
function main()
{
var task = new Matching();
// Given text
var text = "LXYZHEQXYZXYZQQHE11HXYZ1E";
// Search pattern
var pattern = "XYZ";
// Find pattern which occurs in given text
/*
Text = LXYZHEQXYZXYZQQHE11HXYZ1E
Pattern = XYZ
--------------------------------
➀ LXYZHEQXYZXYZQQHE11HXYZ1E
---
➁ LXYZHEQXYZXYZQQHE11HXYZ1E
---
➂ LXYZHEQXYZXYZQQHE11HXYZ1E
---
➃ LXYZHEQXYZXYZQQHE11HXYZ1E
---
*/
task.findPattern(pattern, text);
}
main();
Output
Pattern found at index 1
Pattern found at index 7
Pattern found at index 10
Pattern found at index 20
# Python 3 program for
# String matching with finite automata
class Matching :
def findNextState(self, text, m, state, x) :
if (state < m and x == ord(text[state])) :
return state + 1
i = 0
p = state
while (p > 0) :
if (ord(text[p - 1]) == x) :
while (i < p - 1 and
text[i] == text[state - p + 1 + i]) :
i += 1
if (i == p - 1) :
return p
i = 0
p -= 1
return 0
def construction(self, text, m, track) :
state = 0
while (state <= m) :
x = 0
# 256 indicate all possible characters
while (x < 256) :
track[state][x] = self.findNextState(text, m, state, x)
x += 1
state += 1
def findPattern(self, text, pattern) :
# Get the length of given text
m = len(text)
# Get the length of given pattern
n = len(pattern)
# m is a length of given text
# and 256 are indicate possible characters
track = [[0] * (256) for _ in range(m + 1) ]
self.construction(text, m, track)
state = 0
i = 0
while (i < n) :
state = track[state][ord(pattern[i])]
if (state == m) :
print("Pattern found at index ", (i - m + 1))
i += 1
def main() :
task = Matching()
# Given text
text = "LXYZHEQXYZXYZQQHE11HXYZ1E"
# Search pattern
pattern = "XYZ"
# Find pattern which occurs in given text
# Text = LXYZHEQXYZXYZQQHE11HXYZ1E
# Pattern = XYZ
# --------------------------------
# ➀ LXYZHEQXYZXYZQQHE11HXYZ1E
# ---
# ➁ LXYZHEQXYZXYZQQHE11HXYZ1E
# ---
# ➂ LXYZHEQXYZXYZQQHE11HXYZ1E
# ---
# ➃ LXYZHEQXYZXYZQQHE11HXYZ1E
# ---
task.findPattern(pattern, text)
if __name__ == "__main__": main()
Output
Pattern found at index 1
Pattern found at index 7
Pattern found at index 10
Pattern found at index 20
# Ruby program for
# String matching with finite automata
class Matching
def findNextState(text, m, state, x)
if (state < m && x == text[state].ord)
return state + 1
end
i = 0
p = state
while (p > 0)
if (text[p - 1].ord == x)
while (i < p - 1 &&
text[i] == text[state - p + 1 + i])
i += 1
end
if (i == p - 1)
return p
end
i = 0
end
p -= 1
end
return 0
end
def construction(text, m, track)
state = 0
while (state <= m)
x = 0
# 256 indicate all possible characters
while (x < 256)
track[state][x] = self.findNextState(text, m, state, x)
x += 1
end
state += 1
end
end
def findPattern(text, pattern)
# Get the length of given text
m = text.length
# Get the length of given pattern
n = pattern.length
# m is a length of given text
# and 256 are indicate possible characters
track = Array.new(m + 1) {Array.new(256) {0}}
self.construction(text, m, track)
state = 0
i = 0
while (i < n)
state = track[state][pattern[i].ord]
if (state == m)
print("Pattern found at index ",
(i - m + 1), "\n")
end
i += 1
end
end
end
def main()
task = Matching.new()
# Given text
text = "LXYZHEQXYZXYZQQHE11HXYZ1E"
# Search pattern
pattern = "XYZ"
# Find pattern which occurs in given text
# Text = LXYZHEQXYZXYZQQHE11HXYZ1E
# Pattern = XYZ
# --------------------------------
# ➀ LXYZHEQXYZXYZQQHE11HXYZ1E
# ---
# ➁ LXYZHEQXYZXYZQQHE11HXYZ1E
# ---
# ➂ LXYZHEQXYZXYZQQHE11HXYZ1E
# ---
# ➃ LXYZHEQXYZXYZQQHE11HXYZ1E
# ---
task.findPattern(pattern, text)
end
main()
Output
Pattern found at index 1
Pattern found at index 7
Pattern found at index 10
Pattern found at index 20
import scala.collection.mutable._;
// Scala program for
// String matching with finite automata
class Matching()
{
def findNextState(text: String,
m: Int,
state: Int,
x: Int): Int = {
if (state < m && x == text.charAt(state).toInt)
{
return state + 1;
}
var i: Int = 0;
var p: Int = state;
while (p > 0)
{
if (text.charAt(p - 1).toInt == x)
{
while (i < p - 1 &&
text.charAt(i) == text.charAt(state - p + 1 + i))
{
i += 1;
}
if (i == p - 1)
{
return p;
}
i = 0;
}
p -= 1;
}
return 0;
}
def construction(text: String,
m: Int,
track: Array[Array[Int]]): Unit = {
var state: Int = 0;
while (state <= m)
{
var x: Int = 0;
// 256 indicate all possible characters
while (x < 256)
{
track(state)(x) = findNextState(text, m, state, x);
x += 1;
}
state += 1;
}
}
def findPattern(text: String, pattern: String): Unit = {
// Get the length of given text
var m: Int = text.length();
// Get the length of given pattern
var n: Int = pattern.length();
// m is a length of given text
// and 256 are indicate possible characters
var track: Array[Array[Int]] = Array.fill[Int](m + 1, 256)(0);
construction(text, m, track);
var state: Int = 0;
var i: Int = 0;
while (i < n)
{
state = track(state)(pattern.charAt(i));
if (state == m)
{
println("Pattern found at index " + (i - m + 1));
}
i += 1;
}
}
}
object Main
{
def main(args: Array[String]): Unit = {
var task: Matching = new Matching();
// Given text
var text: String = "LXYZHEQXYZXYZQQHE11HXYZ1E";
// Search pattern
var pattern: String = "XYZ";
// Find pattern which occurs in given text
/*
Text = LXYZHEQXYZXYZQQHE11HXYZ1E
Pattern = XYZ
--------------------------------
➀ LXYZHEQXYZXYZQQHE11HXYZ1E
---
➁ LXYZHEQXYZXYZQQHE11HXYZ1E
---
➂ LXYZHEQXYZXYZQQHE11HXYZ1E
---
➃ LXYZHEQXYZXYZQQHE11HXYZ1E
---
*/
task.findPattern(pattern, text);
}
}
Output
Pattern found at index 1
Pattern found at index 7
Pattern found at index 10
Pattern found at index 20
import Foundation;
// Swift 4 program for
// String matching with finite automata
class Matching
{
func findNextState(_ text: [Character],
_ m: Int,
_ state: Int,
_ x: Int) -> Int
{
if (state < m &&
x == Int(UnicodeScalar(String(text[state]))!.value))
{
return state + 1;
}
var i: Int = 0;
var p: Int = state;
while (p > 0)
{
if (Int(UnicodeScalar(String(text[p - 1]))!.value) == x)
{
while (i < p - 1 && text[i] == text[state - p + 1 + i])
{
i += 1;
}
if (i == p - 1)
{
return p;
}
i = 0;
}
p -= 1;
}
return 0;
}
func construction(_ text: [Character],
_ m: Int,
_ track: inout[[Int]])
{
var state: Int = 0;
while (state <= m)
{
var x: Int = 0;
// 256 indicate all possible characters
while (x < 256)
{
track[state][x] = self.findNextState(text, m, state, x);
x += 1;
}
state += 1;
}
}
func findPattern(_ t: String, _ p: String)
{
let text = Array(t);
let pattern = Array(p);
// Get the length of given text
let m: Int = text.count;
// Get the length of given pattern
let n: Int = pattern.count;
// m is a length of given text
// and 256 are indicate possible characters
var track: [[Int]] =
Array(repeating: Array(repeating: 0, count: 256),
count: m + 1);
self.construction(text, m, &track);
var state: Int = 0;
var i: Int = 0;
while (i < n)
{
state = track[state][Int(
UnicodeScalar(String(pattern[i]))!.value)];
if (state == m)
{
print("Pattern found at index ", (i - m + 1));
}
i += 1;
}
}
}
func main()
{
let task: Matching = Matching();
// Given text
let text: String = "LXYZHEQXYZXYZQQHE11HXYZ1E";
// Search pattern
let pattern: String = "XYZ";
// Find pattern which occurs in given text
/*
Text = LXYZHEQXYZXYZQQHE11HXYZ1E
Pattern = XYZ
--------------------------------
➀ LXYZHEQXYZXYZQQHE11HXYZ1E
---
➁ LXYZHEQXYZXYZQQHE11HXYZ1E
---
➂ LXYZHEQXYZXYZQQHE11HXYZ1E
---
➃ LXYZHEQXYZXYZQQHE11HXYZ1E
---
*/
task.findPattern(pattern, text);
}
main();
Output
Pattern found at index 1
Pattern found at index 7
Pattern found at index 10
Pattern found at index 20
// Kotlin program for
// String matching with finite automata
class Matching
{
fun findNextState(text: String,
m: Int,
state: Int,
x: Int): Int
{
if (state < m && x == text.get(state).toInt())
{
return state + 1;
}
var i: Int = 0;
var p: Int = state;
while (p > 0)
{
if (text.get(p - 1).toInt() == x)
{
while (i < p - 1 &&
text.get(i) == text.get(state - p + 1 + i))
{
i += 1;
}
if (i == p - 1)
{
return p;
}
i = 0;
}
p -= 1;
}
return 0;
}
fun construction(text: String,
m: Int,
track: Array < Array < Int >> ): Unit
{
var state: Int = 0;
while (state <= m)
{
var x: Int = 0;
// 256 indicate all possible characters
while (x < 256)
{
track[state][x] =
this.findNextState(text, m, state, x);
x += 1;
}
state += 1;
}
}
fun findPattern(text: String, pattern: String): Unit
{
// Get the length of given text
val m: Int = text.length;
// Get the length of given pattern
val n: Int = pattern.length;
// m is a length of given text
// and 256 are indicate possible characters
val track: Array < Array < Int >> = Array(m + 1)
{
Array(256)
{
0
}
};
this.construction(text, m, track);
var state: Int = 0;
var i: Int = 0;
while (i < n)
{
state = track[state][pattern.get(i).toInt()];
if (state == m)
{
println("Pattern found at index " + (i - m + 1));
}
i += 1;
}
}
}
fun main(args: Array < String > ): Unit
{
val task: Matching = Matching();
// Given text
val text: String = "LXYZHEQXYZXYZQQHE11HXYZ1E";
// Search pattern
val pattern: String = "XYZ";
// Find pattern which occurs in given text
/*
Text = LXYZHEQXYZXYZQQHE11HXYZ1E
Pattern = XYZ
--------------------------------
➀ LXYZHEQXYZXYZQQHE11HXYZ1E
---
➁ LXYZHEQXYZXYZQQHE11HXYZ1E
---
➂ LXYZHEQXYZXYZQQHE11HXYZ1E
---
➃ LXYZHEQXYZXYZQQHE11HXYZ1E
---
*/
task.findPattern(pattern, text);
}
Output
Pattern found at index 1
Pattern found at index 7
Pattern found at index 10
Pattern found at index 20
Please share your knowledge to improve code and content standard. Also submit your doubts, and test case. We improve by your feedback. We will try to resolve your query as soon as possible.
New Comment