[+] BoyerMoore算法匹配

This commit is contained in:
huiyadanli 2019-12-23 00:47:27 +08:00
parent 3438a07596
commit 13abab93c3
2 changed files with 217 additions and 0 deletions

View File

@ -0,0 +1,216 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace RevokeMsgPatcher.Assistant.Matcher
{
public class BoyerMooreMatcher
{
private static int AlphabetSize = 256;
private static int Max(int a, int b) { return (a > b) ? a : b; }
static int[] PreprocessToBuildBadCharactorHeuristic(byte[] pattern)
{
int m = pattern.Length;
int[] badCharactorShifts = new int[AlphabetSize];
for (int i = 0; i < AlphabetSize; i++)
{
//badCharactorShifts[i] = -1;
badCharactorShifts[i] = m;
}
// fill the actual value of last occurrence of a character
for (int i = 0; i < m; i++)
{
//badCharactorShifts[(int)pattern[i]] = i;
badCharactorShifts[(int)pattern[i]] = m - 1 - i;
}
return badCharactorShifts;
}
static int[] PreprocessToBuildGoodSuffixHeuristic(byte[] pattern)
{
int m = pattern.Length;
int[] goodSuffixShifts = new int[m];
int[] suffixLengthArray = GetSuffixLengthArray(pattern);
for (int i = 0; i < m; ++i)
{
goodSuffixShifts[i] = m;
}
int j = 0;
for (int i = m - 1; i >= -1; --i)
{
if (i == -1 || suffixLengthArray[i] == i + 1)
{
for (; j < m - 1 - i; ++j)
{
if (goodSuffixShifts[j] == m)
{
goodSuffixShifts[j] = m - 1 - i;
}
}
}
}
for (int i = 0; i < m - 1; ++i)
{
goodSuffixShifts[m - 1 - suffixLengthArray[i]] = m - 1 - i;
}
return goodSuffixShifts;
}
static int[] GetSuffixLengthArray(byte[] pattern)
{
int m = pattern.Length;
int[] suffixLengthArray = new int[m];
int f = 0, g = 0, i = 0;
suffixLengthArray[m - 1] = m;
g = m - 1;
for (i = m - 2; i >= 0; --i)
{
if (i > g && suffixLengthArray[i + m - 1 - f] < i - g)
{
suffixLengthArray[i] = suffixLengthArray[i + m - 1 - f];
}
else
{
if (i < g)
{
g = i;
}
f = i;
// find different preceded character suffix
while (g >= 0 && pattern[g] == pattern[g + m - 1 - f])
{
--g;
}
suffixLengthArray[i] = f - g;
}
}
return suffixLengthArray;
}
public static bool TryMatch(byte[] text, byte[] pattern, out int firstShift)
{
firstShift = -1;
int n = text.Length;
int m = pattern.Length;
int s = 0; // s is shift of the pattern with respect to text
int j = 0;
// fill the bad character and good suffix array by preprocessing
int[] badCharShifts = PreprocessToBuildBadCharactorHeuristic(pattern);
int[] goodSuffixShifts = PreprocessToBuildGoodSuffixHeuristic(pattern);
while (s <= (n - m))
{
// starts matching from the last character of the pattern
j = m - 1;
// keep reducing index j of pattern while characters of
// pattern and text are matching at this shift s
while (j >= 0 && pattern[j] == text[s + j])
{
j--;
}
// if the pattern is present at current shift, then index j
// will become -1 after the above loop
if (j < 0)
{
firstShift = s;
return true;
}
else
{
// shift the pattern so that the bad character in text
// aligns with the last occurrence of it in pattern. the
// max function is used to make sure that we get a positive
// shift. We may get a negative shift if the last occurrence
// of bad character in pattern is on the right side of the
// current character.
//s += Max(1, j - badCharShifts[(int)text[s + j]]);
// now, compare bad char shift and good suffix shift to find best
s += Max(goodSuffixShifts[j], badCharShifts[(int)text[s + j]] - (m - 1) + j);
}
}
return false;
}
public static int[] MatchAll(byte[] text, byte[] pattern)
{
int n = text.Length;
int m = pattern.Length;
int s = 0; // s is shift of the pattern with respect to text
int j = 0;
int[] shiftIndexes = new int[n - m + 1];
int c = 0;
// fill the bad character and good suffix array by preprocessing
int[] badCharShifts = PreprocessToBuildBadCharactorHeuristic(pattern);
int[] goodSuffixShifts = PreprocessToBuildGoodSuffixHeuristic(pattern);
while (s <= (n - m))
{
// starts matching from the last character of the pattern
j = m - 1;
// keep reducing index j of pattern while characters of
// pattern and text are matching at this shift s
while (j >= 0 && pattern[j] == text[s + j])
{
j--;
}
// if the pattern is present at current shift, then index j
// will become -1 after the above loop
if (j < 0)
{
shiftIndexes[c] = s;
c++;
// shift the pattern so that the next character in text
// aligns with the last occurrence of it in pattern.
// the condition s+m < n is necessary for the case when
// pattern occurs at the end of text
//s += (s + m < n) ? m - badCharShifts[(int)text[s + m]] : 1;
s += goodSuffixShifts[0];
}
else
{
// shift the pattern so that the bad character in text
// aligns with the last occurrence of it in pattern. the
// max function is used to make sure that we get a positive
// shift. We may get a negative shift if the last occurrence
// of bad character in pattern is on the right side of the
// current character.
//s += Max(1, j - badCharShifts[(int)text[s + j]]);
// now, compare bad char shift and good suffix shift to find best
s += Max(goodSuffixShifts[j], badCharShifts[(int)text[s + j]] - (m - 1) + j);
}
}
int[] shifts = new int[c];
for (int y = 0; y < c; y++)
{
shifts[y] = shiftIndexes[y];
}
return shifts;
}
}
}

View File

@ -68,6 +68,7 @@
<DependentUpon>FormAssisant.cs</DependentUpon>
</Compile>
<Compile Include="JsonData.cs" />
<Compile Include="Matcher\BoyerMooreMatcher.cs" />
<Compile Include="Program.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
<EmbeddedResource Include="FormAssisant.resx">