Vous êtes sur la page 1sur 19

# /******************************************************************************

## * Compilation: javac RabinKarp.java

* Execution:
java RabinKarp pat txt
* Dependencies: System.out.java
*
* Reads in two strings, the pattern and the input text, and
* searches for the pattern in the input text using the
* Las Vegas version of the Rabin-Karp algorithm.
*
* text:
* match:
*
* pattern: rab
* text:
* match:
rab
*
* pattern: bcara
* text:
*
* text:
* pattern:
*
* text:
*
******************************************************************************/
import java.math.BigInteger;
import java.util.Random;
/**
* The <tt>RabinKarp</tt> class finds the first occurrence of a pattern string
* in a text string.
* <p>
* This implementation uses the Rabin-Karp algorithm.
* <p>
* see <a href="http://algs4.cs.princeton.edu/53substring">Section 5.3</a> of
* <i>Algorithms, 4th Edition</i> by Robert Sedgewick and Kevin Wayne.
*/
public class RabinKarp {
private String pat;
// the pattern // needed only for Las Vegas
private long patHash;
// pattern hash value
private int M;
// pattern length
private long Q;
// a large prime, small enough to avoid long overfl
ow
private int R;
private long RM;
// R^(M-1) % Q
/**
* Preprocesses the pattern string.
*
* @param pattern the pattern string
* @param R the alphabet size

*/
public RabinKarp(char[] pattern, int R) {
throw new UnsupportedOperationException("Operation not supported yet");
}
/**
* Preprocesses the pattern string.
*
* @param pat the pattern string
*/
public RabinKarp(String pat) {
this.pat = pat;
// save pattern (needed only for Las Vegas)
R = 256;
M = pat.length();
Q = longRandomPrime();
// precompute R^(M-1) % Q for use in removing leading digit
RM = 1;
for (int i = 1; i <= M-1; i++)
RM = (R * RM) % Q;
patHash = hash(pat, M);
}
// Compute hash for key[0..M-1].
private long hash(String key, int M) {
long h = 0;
for (int j = 0; j < M; j++)
h = (R * h + key.charAt(j)) % Q;
return h;
}
// Las Vegas version: does pat[] match txt[i..i-M+1] ?
private boolean check(String txt, int i) {
for (int j = 0; j < M; j++)
if (pat.charAt(j) != txt.charAt(i + j))
return false;
return true;
}
// Monte Carlo version: always return true
private boolean check(int i) {
return true;
}
/**
* Returns the index of the first occurrrence of the pattern string
* in the text string.
*
* @param txt the text string
* @return the index of the first occurrence of the pattern string
*
in the text string; N if no such match
*/
public int search(String txt) {
int N = txt.length();
if (N < M) return N;
long txtHash = hash(txt, M);
// check for match at offset 0
if ((patHash == txtHash) && check(txt, 0))
return 0;

// check for hash match; if hash match, check for exact match
for (int i = M; i < N; i++) {
txtHash = (txtHash + Q - RM*txt.charAt(i-M) % Q) % Q;
txtHash = (txtHash*R + txt.charAt(i)) % Q;
// match
int offset = i - M + 1;
if ((patHash == txtHash) && check(txt, offset))
return offset;
}
// no match
return N;
}
// a random 31-bit prime
private static long longRandomPrime() {
BigInteger prime = BigInteger.probablePrime(31, new Random());
return prime.longValue();
}
/**
* Takes a pattern string and an input string as command-line arguments;
* searches for the pattern string in the text string; and prints
* the first occurrence of the pattern string in the text string.
*/
public static void main(String[] args) {
String pat = args[0];
String txt = args[1];
RabinKarp searcher = new RabinKarp(pat);
int offset = searcher.search(txt);
// print results
System.out.println("text:

" + txt);

## // from brute force search method 1

System.out.print("pattern: ");
for (int i = 0; i < offset; i++)
System.out.print(" ");
System.out.println(pat);
}
}

/******************************************************************************
* Compilation: javac KMPplus.java
* Execution:
java KMPplus pattern text
* Dependencies: System.out.java
*
* Knuth-Morris-Pratt algorithm over UNICODE alphabet.

*
* % java KMPplus ABABAC BCBAABACAABABACAA
* text:
BCBAABACAABABACAA
* pattern:
ABABAC
*
* % java KMPplus aabaaaba ccaabaabaabaaabaab
* text:
ccaabaabaabaaabaab
* pattern:
aabaaaba
*
* % java KMPplus aabaaabb ccaabaabaabaaabaab
* text:
ccaabaabaabaaabaab
* pattern:
aabaaabb
*
******************************************************************************/
public class KMPplus {
private String pattern;
private int[] next;
// create Knuth-Morris-Pratt NFA from pattern
public KMPplus(String pattern) {
this.pattern = pattern;
int M = pattern.length();
next = new int[M];
int j = -1;
for (int i = 0; i < M; i++) {
if (i == 0)
next[i]
else if (pattern.charAt(i) != pattern.charAt(j)) next[i]
else
next[i]
while (j >= 0 && pattern.charAt(i) != pattern.charAt(j))
j = next[j];
}
j++;
}

= -1;
= j;
= next[j];
{

## for (int i = 0; i < M; i++)

System.out.println("next[" + i + "] = " + next[i]);
}
// return offset of first occurrence of text in pattern (or N if no match)
// simulate the NFA to find match
public int search(String text) {
int M = pattern.length();
int N = text.length();
int i, j;
for (i = 0, j = 0; i < N && j < M; i++) {
while (j >= 0 && text.charAt(i) != pattern.charAt(j))
j = next[j];
j++;
}
if (j == M) return i - M;
return N;
}
// test client
public static void main(String[] args) {
String pattern = args[0];
String text
= args[1];
int M = pattern.length();

int N = text.length();
// substring search
KMPplus kmp = new KMPplus(pattern);
int offset = kmp.search(text);
// print results
System.out.println("M = " + M + ", N = " + N);
System.out.println("text:
" + text);
System.out.print("pattern: ");
for (int i = 0; i < offset; i++)
System.out.print(" ");
System.out.println(pattern);
}
}

/******************************************************************************
* Compilation: javac SystemSearch.java
* Execution:
java SystemSearch n
* Dependencies: System.out.java
*
* Search for the string a^N b in the string a^2N
* where N = 2^n.
*
*
******************************************************************************/
public class SystemSearch {
public static void main(String[] args) {
int n = Integer.parseInt(args[0]);
String text = "a";
String query = "a";
for (int i = 0; i < n; i++) {
text = text + text;
query = query + query;
}
text = text + text;
query = query + "b";
System.out.println(text.indexOf(query));
}
}

/******************************************************************************
* Compilation: javac Brute.java
* Execution:
java Brute pattern text
* Dependencies: System.out.java
*
* Reads in two strings, the pattern and the input text, and
* searches for the pattern in the input text using brute force.
*

* text:
* pattern:
*
* text:
* pattern:
rab
*
* text:
* pattern:
*
* text:
* pattern:
bcara
*
* text:
*
******************************************************************************/
public class Brute {
/***************************************************************************
* String versions.
***************************************************************************/
// return offset of first match or N if no match
public static int search1(String pat, String txt) {
int M = pat.length();
int N = txt.length();
for (int i = 0; i <= N - M; i++) {
int j;
for (j = 0; j < M; j++) {
if (txt.charAt(i+j) != pat.charAt(j))
break;
}
if (j == M) return i;
// found at offset i
}
return N;
}
// return offset of first match or N if no match
public static int search2(String pat, String txt) {
int M = pat.length();
int N = txt.length();
int i, j;
for (i = 0, j = 0; i < N && j < M; i++) {
if (txt.charAt(i) == pat.charAt(j)) j++;
else {
i -= j;
j = 0;
}
}
if (j == M) return i - M;
// found
else
return N;
}

/***************************************************************************
* char[] array versions.
***************************************************************************/
// return offset of first match or N if no match
public static int search1(char[] pattern, char[] text) {
int M = pattern.length;
int N = text.length;
for (int i = 0; i <= N - M; i++) {
int j;
for (j = 0; j < M; j++) {
if (text[i+j] != pattern[j])
break;
}
if (j == M) return i;
// found at offset i
}
return N;
}
// return offset of first match or N if no match
public static int search2(char[] pattern, char[] text) {
int M = pattern.length;
int N = text.length;
int i, j;
for (i = 0, j = 0; i < N && j < M; i++) {
if (text[i] == pattern[j]) j++;
else {
i -= j;
j = 0;
}
}
if (j == M) return i - M;
// found
else
return N;
}
/**
* Takes a pattern string and an input string as command-line arguments;
* searches for the pattern string in the text string; and prints
* the first occurrence of the pattern string in the text string.
*/
public static void main(String[] args) {
String pat = args[0];
String txt = args[1];
char[] pattern = pat.toCharArray();
char[] text
= txt.toCharArray();
int
int
int
int

offset1a
offset2a
offset1b
offset2b

=
=
=
=

search1(pat, txt);
search2(pat, txt);
search1(pattern, text);
search2(pattern, text);

// print results
System.out.println("text:

" + txt);

## // from brute force search method 1a

System.out.print("pattern: ");
for (int i = 0; i < offset1a; i++)

System.out.print(" ");
System.out.println(pat);
// from brute force search method 2a
System.out.print("pattern: ");
for (int i = 0; i < offset2a; i++)
System.out.print(" ");
System.out.println(pat);
// from brute force search method 1b
System.out.print("pattern: ");
for (int i = 0; i < offset1b; i++)
System.out.print(" ");
System.out.println(pat);
// from brute force search method 2b
System.out.print("pattern: ");
for (int i = 0; i < offset2b; i++)
System.out.print(" ");
System.out.println(pat);
}
}

/******************************************************************************
* Compilation: javac Manacher.java
* Execution:
java Manacher text
* Dependencies: System.out.java
*
* Computes the longest palindromic substring in linear time
* using Manacher's algorithm.
*
* Credits: The code is lifted from the following excellent reference
* http://www.leetcode.com/2011/11/longest-palindromic-substring-part-ii.html
*
******************************************************************************/
public class Manacher
private int[] p;
entered at i
private String s;
private char[] t;

{
// p[i] = length of longest palindromic substring of t, c
// original string
// transformed string

public Manacher(String s) {
this.s = s;
preprocess();
p = new int[t.length];
int center = 0, right = 0;
for (int i = 1; i < t.length-1; i++) {
int mirror = 2*center - i;
if (right > i)
p[i] = Math.min(right - i, p[mirror]);
// attempt to expand palindrome centered at i
while (t[i + (1 + p[i])] == t[i - (1 + p[i])])
p[i]++;

## // if palindrome centered at i expands past right,

// adjust center based on expanded palindrome.
if (i + p[i] > right) {
center = i;
right = i + p[i];
}
}
}
// Transform s into t.
// For example, if s = "abba", then t = "\$#a#b#b#a#@"
// the # are interleaved to avoid even/odd-length palindromes uniformly
// \$ and @ are prepended and appended to each end to avoid bounds checking
private void preprocess() {
t = new char[s.length()*2 + 3];
t[0] = '\$';
t[s.length()*2 + 2] = '@';
for (int i = 0; i < s.length(); i++) {
t[2*i + 1] = '#';
t[2*i + 2] = s.charAt(i);
}
t[s.length()*2 + 1] = '#';
}
// longest palindromic substring
public String longestPalindromicSubstring() {
int length = 0; // length of longest palindromic substring
int center = 0; // center of longest palindromic substring
for (int i = 1; i < p.length-1; i++) {
if (p[i] > length) {
length = p[i];
center = i;
}
}
return s.substring((center - 1 - length) / 2, (center - 1 + length) / 2)
;
}
// longest palindromic substring centered at index i/2
public String longestPalindromicSubstring(int i) {
int length = p[i + 2];
int center = i + 2;
return s.substring((center - 1 - length) / 2, (center - 1 + length) / 2)
;
}

// test client
public static void main(String[] args) {
String s = args[0];
Manacher manacher = new Manacher(s);
System.out.println(manacher.longestPalindromicSubstring());
for (int i = 0; i < 2*s.length(); i++)
System.out.println(i + ": " + manacher.longestPalindromicSubstring(
i));
}

/******************************************************************************
* Compilation: javac BoyerMoore.java
* Execution:
java BoyerMoore pattern text
* Dependencies: System.out.java
*
* Reads in two strings, the pattern and the input text, and
* searches for the pattern in the input text using the
* bad-character rule part of the Boyer-Moore algorithm.
* (does not implement the strong good suffix rule)
*
* text:
* pattern:
*
* text:
* pattern:
rab
*
* text:
* pattern:
bcara
*
* text:
* pattern:
*
* text:
*
******************************************************************************/
/**
* The <tt>BoyerMoore</tt> class finds the first occurrence of a pattern string
* in a text string.
* <p>
* This implementation uses the Boyer-Moore algorithm (with the bad-character
* rule, but not the strong good suffix rule).
* <p>
* see <a href="http://algs4.cs.princeton.edu/53substring">Section 5.3</a> of
* <i>Algorithms, 4th Edition</i> by Robert Sedgewick and Kevin Wayne.
*/
public class BoyerMoore {
private final int R;
private int[] right;
private char[] pattern; // store the pattern as a character array
private String pat;
// or as a string
/**
* Preprocesses the pattern string.
*
* @param pat the pattern string
*/

## public BoyerMoore(String pat) {

this.R = 256;
this.pat = pat;
// position of rightmost occurrence of c in the pattern
right = new int[R];
for (int c = 0; c < R; c++)
right[c] = -1;
for (int j = 0; j < pat.length(); j++)
right[pat.charAt(j)] = j;
}
/**
* Preprocesses the pattern string.
*
* @param pattern the pattern string
* @param R the alphabet size
*/
public BoyerMoore(char[] pattern, int R) {
this.R = R;
this.pattern = new char[pattern.length];
for (int j = 0; j < pattern.length; j++)
this.pattern[j] = pattern[j];
// position of rightmost occurrence of c in the pattern
right = new int[R];
for (int c = 0; c < R; c++)
right[c] = -1;
for (int j = 0; j < pattern.length; j++)
right[pattern[j]] = j;
}
/**
* Returns the index of the first occurrrence of the pattern string
* in the text string.
*
* @param txt the text string
* @return the index of the first occurrence of the pattern string
*
in the text string; N if no such match
*/
public int search(String txt) {
int M = pat.length();
int N = txt.length();
int skip;
for (int i = 0; i <= N - M; i += skip) {
skip = 0;
for (int j = M-1; j >= 0; j--) {
if (pat.charAt(j) != txt.charAt(i+j)) {
skip = Math.max(1, j - right[txt.charAt(i+j)]);
break;
}
}
if (skip == 0) return i;
// found
}
return N;
}
/**
* Returns the index of the first occurrrence of the pattern string

## * in the text string.

*
* @param text the text string
* @return the index of the first occurrence of the pattern string
*
in the text string; N if no such match
*/
public int search(char[] text) {
int M = pattern.length;
int N = text.length;
int skip;
for (int i = 0; i <= N - M; i += skip) {
skip = 0;
for (int j = M-1; j >= 0; j--) {
if (pattern[j] != text[i+j]) {
skip = Math.max(1, j - right[text[i+j]]);
break;
}
}
if (skip == 0) return i;
// found
}
return N;
}
/**
* Takes a pattern string and an input string as command-line arguments;
* searches for the pattern string in the text string; and prints
* the first occurrence of the pattern string in the text string.
*/
public static void main(String[] args) {
String pat = args[0];
String txt = args[1];
char[] pattern = pat.toCharArray();
char[] text
= txt.toCharArray();
BoyerMoore boyermoore1 = new BoyerMoore(pat);
BoyerMoore boyermoore2 = new BoyerMoore(pattern, 256);
int offset1 = boyermoore1.search(txt);
int offset2 = boyermoore2.search(text);
// print results
System.out.println("text:

" + txt);

System.out.print("pattern: ");
for (int i = 0; i < offset1; i++)
System.out.print(" ");
System.out.println(pat);
System.out.print("pattern: ");
for (int i = 0; i < offset2; i++)
System.out.print(" ");
System.out.println(pat);
}
}

/*
Multi Pattern Matching Algorithm :
Preprocessing: O(M) where M: total len of all the keywords
Search : Sublinear to the size of the text.
*/
#include
#include
#include
#include
#include
#include

<iostream>
<algorithm>
<string>
<queue>
"trie.cpp"
<map>

#define NOTDEFINED -1
#define XSIZE 256 //How many types of characters
#define LONGESTSIZE 10000
using namespace std;
class SetOracleBackwardMatching
{
private:
Trie<string,char>* oracle;
void buildTrie();
int minKeyLen;
vector<string> patterns_;
vector<string> replacements_;
string text_;
string replacedText_;
bool checkIfKeywords(set<int> indexes,int pos);
public:
void constructFactorOracleMulti(vector<string> patterns);
void printOracle();
string search(string text, vector<string> replacements);
SetOracleBackwardMatching() { oracle = new Trie<string,char>(); }
~SetOracleBackwardMatching() { delete oracle; }
};

## /* This function preprocess list of Patterns. Result is a Oracle a automata for

representing
all the substrings possible from all combinations of patterns. This is constr
ucted in O(M) where M is the sum of lengths of all the patterns.
*/
void SetOracleBackwardMatching::constructFactorOracleMulti(vector<string> patter
ns)
{
//Need a Set of Reverse Patterns chopped off the min length.
//Find the Shortest Pattern Length
if(patterns.size() == 0)
return;
for(vector<string>::iterator it = patterns.begin(); it != patterns.end()
; it++)

{
patterns_.push_back(*it);
}
int min = 10000;
int len = 0;
int totalLenSum = 0;
for(int i=0;i<patterns.size();i++)
{ len = patterns.at(i).length();
totalLenSum += len;
if(len < min)
min = len;
}
//Record the min
minKeyLen = min;
//Cut the Patterns to min length; reverse them and form a set
set<string> reverseOracles;
map<string, set<int> > prefixesWords;
for(int i=0;i<patterns.size();i++)
{
patterns.at(i) = patterns.at(i).substr(0,min);
reverse(patterns.at(i).begin(),patterns.at(i).end());
pair<set<string>::iterator,bool> mypair = reverseOracles.insert(
patterns.at(i));
if(mypair.second == false) //Same Reverse Prefix
{
(prefixesWords[*mypair.first]).insert(i); //Push this In
dex
}
else
{
set<int> temp;
temp.insert(i);
prefixesWords[*mypair.first] = temp;
}
}
for(set<string>::const_iterator it = reverseOracles.begin(); it != rever
seOracles.end(); it++)
{
}
//Now Traverse the Trie in BFS
Node<char>* current = oracle->getRoot();
queue<Node<char>*> bfsQueue;
Node<char>* supplyNode;
current->setSupply(NULL); //Set the Supply of Root Node to Null
bfsQueue.push(current);
while(!bfsQueue.empty())
{

current = bfsQueue.front();
if(current->getParent()!=NULL) //Not to be done for the Root
{
Node<char>* k = current->getParent()->getSupply(); //Get
the Supply of the Parent
char c = current->getContent(); //Get the label from par
ent to current Node. This is stored in current node itself
while ( (k != NULL) && (supplyNode=k->findChild(c))==NUL
L)
{
k = k->getSupply();
}
if( k == NULL)
{
current->setSupply(oracle->getRoot());
}
else
{
current->setSupply(supplyNode);
}
}
//Continue remaining operation of BFS Algorithm
bfsQueue.pop();
vector<Node<char>*> childrens = current->getChildrens();
for(int i = 0; i < childrens.size(); i++)
{
Node<char>* child = childrens.at(i);
if(!child->getBFSMarker())
{
child->setBFSMarker();
bfsQueue.push(child);
}
}
}
}
//This Function prints the Oracle After it is constructed.
void SetOracleBackwardMatching::printOracle()
{
cout << "----------------------------------------" << endl;
//Now Traverse the Trie in BFS
Node<char>* current = oracle->getRoot();
queue<Node<char>*> bfsQueue;
bfsQueue.push(current);
while(!bfsQueue.empty())
{
current = bfsQueue.front();
/////
cout << "Node: " << current->getContent();
if(current->wordMarker() == true)
{
//Output the Indices

//

## cout << " Found it ";

set<int> indexes = current->getKeywordIndexes();
for(set<int>::iterator it1 = indexes.begin(); it1 != indexes.end
(); it1++){
cout << *it1 << " ";
}
}
cout << "Child: ";
////////////
bfsQueue.pop();

## vector<Node<char>*> childrens = current->getChildrens();

for(int i = 0; i < childrens.size(); i++)
{
Node<char>* child = childrens.at(i);
std::cout << child->getContent() << " -" << child->getBF
SMarker1() << " ";
if(!child->getBFSMarker1())
{
child->setBFSMarker1();
bfsQueue.push(child);
}
}
cout << endl;
}
}
/* Helper function: It used when the search window on the text is accepted by au
tomata. Every terminal node in the algorithm maintains a
set of patterns that it represents.Initially the window in the text is checked w
hether it is a prefix of any of keywords stored at this
termimal.Only when that is successful will it start comparing patterns with the
text starting from that position */
bool SetOracleBackwardMatching::checkIfKeywords(set<int> indexes,int pos)
{
string prefixToMatch = text_.substr(pos,minKeyLen);
string keyword;
bool prefixFound = false;
int i;
int index = 0;
//Check if prefixToMatch is a prefix in a keyword
for(set<int>::iterator it1 = indexes.begin(); it1 != indexes.end(); it1+
+){
index = *it1;
keyword = patterns_.at(index);
for(i=0;i<prefixToMatch.length();i++)
{
if(prefixToMatch.at(i) != keyword.at(i))
break; //Check the Next Keyword
}
if(i == prefixToMatch.length())
{
//Prefix Matched with one of the keywords

prefixFound = true;
break; //Check No more Prefix Matches
}
}
if(prefixFound)
{
for(set<int>::iterator it1 = indexes.begin(); it1 != indexes.end
(); it1++)
{
keyword = patterns_.at(*it1);
if(keyword.compare(text_.substr(pos,keyword.length())) =
= 0 )
{
replacedText_.replace(pos,keyword.length(),repla
cements_.at(*it1));
}
}
}
return prefixFound;
}
/*Searches the text against the pattern. Window of the search is length of the s
hortest len pattern.*/
string SetOracleBackwardMatching::search(string text, vector<string> replacement
s)
{
if(text.empty())
return text;
text_ = text;
replacedText_ = text;
//Copy replacements
for(vector<string>::iterator it = replacements.begin(); it != replacemen
ts.end(); it++)
replacements_.push_back(*it);
int pos = 0;
int j;
while (pos <= text.size() - minKeyLen)
{
Node<char>* current = oracle->getRoot();
j = minKeyLen-1;
while(j>=0 && current!=NULL)
{
current = current->findChild(text.at(pos+j));
j--;
}
if(current!=NULL && j == -1)
{
if(checkIfKeywords(current->getKeywordIndexes(),pos) ==
true )
{
j = -1;
}
}

pos = pos + j + 2;
}
return replacedText_;
}
int main()
{
SetOracleBackwardMatching* SOBM;
vector<string> patterns;
vector<string> replacements;

patterns.push_back("we");
patterns.push_back("SFO");
patterns.push_back("meet");
patterns.push_back("meete");
patterns.push_back("jeeter");
patterns.push_back("beer");
patterns.push_back("cokerre");
patterns.push_back("www.yahoo.com");
patterns.push_back("jaily");
patterns.push_back("MONEY");
replacements.push_back("be");
replacements.push_back("JFK");
replacements.push_back("talk");
replacements.push_back("keete");
replacements.push_back("deeter");
replacements.push_back("coca");
replacements.push_back("joke");
replacements.push_back("social");
replacements.push_back("WEBLOOSER");
replacements.push_back("daily");
replacements.push_back("HONEY");
vector<string> texts;
texts.push_back("Hi can we meet tomorrow at SFO. Get some beer");
texts.push_back("Today is jeeter day. There will be beer and coke");
texts.push_back("Can we this web page at www.yahoo.com");
texts.push_back("Today is jaily party.Event at abc.com");
texts.push_back("I am flying tomorrow from JFK");
texts.push_back("I need my MONEY back");
texts.push_back("You are the best jaily");
texts.push_back("Get the money. we will party");
texts.push_back("abc.com is website where we crack MONEY ");
for(int i= 0; i < texts.size(); i++)
{
SOBM = new SetOracleBackwardMatching();
SOBM->constructFactorOracleMulti(patterns);
cout << SOBM->search(texts.at(i),replacements) << endl;
delete SOBM;
}

return 0;
}