diff --git a/RevokeMsgPatcher.Assistant/Matcher/BoyerMooreMatcher.cs b/RevokeMsgPatcher.Assistant/Matcher/BoyerMooreMatcher.cs new file mode 100644 index 0000000..9233a0c --- /dev/null +++ b/RevokeMsgPatcher.Assistant/Matcher/BoyerMooreMatcher.cs @@ -0,0 +1,216 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace RevokeMsgPatcher.Assistant.Matcher +{ + public class BoyerMooreMatcher + { + private static int AlphabetSize = 256; + + private static int Max(int a, int b) { return (a > b) ? a : b; } + + static int[] PreprocessToBuildBadCharactorHeuristic(byte[] pattern) + { + int m = pattern.Length; + int[] badCharactorShifts = new int[AlphabetSize]; + + for (int i = 0; i < AlphabetSize; i++) + { + //badCharactorShifts[i] = -1; + badCharactorShifts[i] = m; + } + + // fill the actual value of last occurrence of a character + for (int i = 0; i < m; i++) + { + //badCharactorShifts[(int)pattern[i]] = i; + badCharactorShifts[(int)pattern[i]] = m - 1 - i; + } + + return badCharactorShifts; + } + + static int[] PreprocessToBuildGoodSuffixHeuristic(byte[] pattern) + { + int m = pattern.Length; + int[] goodSuffixShifts = new int[m]; + int[] suffixLengthArray = GetSuffixLengthArray(pattern); + + for (int i = 0; i < m; ++i) + { + goodSuffixShifts[i] = m; + } + + int j = 0; + for (int i = m - 1; i >= -1; --i) + { + if (i == -1 || suffixLengthArray[i] == i + 1) + { + for (; j < m - 1 - i; ++j) + { + if (goodSuffixShifts[j] == m) + { + goodSuffixShifts[j] = m - 1 - i; + } + } + } + } + + for (int i = 0; i < m - 1; ++i) + { + goodSuffixShifts[m - 1 - suffixLengthArray[i]] = m - 1 - i; + } + + return goodSuffixShifts; + } + + static int[] GetSuffixLengthArray(byte[] pattern) + { + int m = pattern.Length; + int[] suffixLengthArray = new int[m]; + + int f = 0, g = 0, i = 0; + + suffixLengthArray[m - 1] = m; + + g = m - 1; + for (i = m - 2; i >= 0; --i) + { + if (i > g && suffixLengthArray[i + m - 1 - f] < i - g) + { + suffixLengthArray[i] = suffixLengthArray[i + m - 1 - f]; + } + else + { + if (i < g) + { + g = i; + } + f = i; + + // find different preceded character suffix + while (g >= 0 && pattern[g] == pattern[g + m - 1 - f]) + { + --g; + } + suffixLengthArray[i] = f - g; + } + } + + return suffixLengthArray; + } + + public static bool TryMatch(byte[] text, byte[] pattern, out int firstShift) + { + firstShift = -1; + int n = text.Length; + int m = pattern.Length; + int s = 0; // s is shift of the pattern with respect to text + int j = 0; + + // fill the bad character and good suffix array by preprocessing + int[] badCharShifts = PreprocessToBuildBadCharactorHeuristic(pattern); + int[] goodSuffixShifts = PreprocessToBuildGoodSuffixHeuristic(pattern); + + while (s <= (n - m)) + { + // starts matching from the last character of the pattern + j = m - 1; + + // keep reducing index j of pattern while characters of + // pattern and text are matching at this shift s + while (j >= 0 && pattern[j] == text[s + j]) + { + j--; + } + + // if the pattern is present at current shift, then index j + // will become -1 after the above loop + if (j < 0) + { + firstShift = s; + return true; + } + else + { + // shift the pattern so that the bad character in text + // aligns with the last occurrence of it in pattern. the + // max function is used to make sure that we get a positive + // shift. We may get a negative shift if the last occurrence + // of bad character in pattern is on the right side of the + // current character. + //s += Max(1, j - badCharShifts[(int)text[s + j]]); + // now, compare bad char shift and good suffix shift to find best + s += Max(goodSuffixShifts[j], badCharShifts[(int)text[s + j]] - (m - 1) + j); + } + } + + return false; + } + + public static int[] MatchAll(byte[] text, byte[] pattern) + { + int n = text.Length; + int m = pattern.Length; + int s = 0; // s is shift of the pattern with respect to text + int j = 0; + int[] shiftIndexes = new int[n - m + 1]; + int c = 0; + + // fill the bad character and good suffix array by preprocessing + int[] badCharShifts = PreprocessToBuildBadCharactorHeuristic(pattern); + int[] goodSuffixShifts = PreprocessToBuildGoodSuffixHeuristic(pattern); + + while (s <= (n - m)) + { + // starts matching from the last character of the pattern + j = m - 1; + + // keep reducing index j of pattern while characters of + // pattern and text are matching at this shift s + while (j >= 0 && pattern[j] == text[s + j]) + { + j--; + } + + // if the pattern is present at current shift, then index j + // will become -1 after the above loop + if (j < 0) + { + shiftIndexes[c] = s; + c++; + + // shift the pattern so that the next character in text + // aligns with the last occurrence of it in pattern. + // the condition s+m < n is necessary for the case when + // pattern occurs at the end of text + //s += (s + m < n) ? m - badCharShifts[(int)text[s + m]] : 1; + s += goodSuffixShifts[0]; + } + else + { + // shift the pattern so that the bad character in text + // aligns with the last occurrence of it in pattern. the + // max function is used to make sure that we get a positive + // shift. We may get a negative shift if the last occurrence + // of bad character in pattern is on the right side of the + // current character. + //s += Max(1, j - badCharShifts[(int)text[s + j]]); + // now, compare bad char shift and good suffix shift to find best + s += Max(goodSuffixShifts[j], badCharShifts[(int)text[s + j]] - (m - 1) + j); + } + } + + int[] shifts = new int[c]; + for (int y = 0; y < c; y++) + { + shifts[y] = shiftIndexes[y]; + } + + return shifts; + } + } +} diff --git a/RevokeMsgPatcher.Assistant/RevokeMsgPatcher.Assistant.csproj b/RevokeMsgPatcher.Assistant/RevokeMsgPatcher.Assistant.csproj index bc77f12..00c5ac4 100644 --- a/RevokeMsgPatcher.Assistant/RevokeMsgPatcher.Assistant.csproj +++ b/RevokeMsgPatcher.Assistant/RevokeMsgPatcher.Assistant.csproj @@ -68,6 +68,7 @@ FormAssisant.cs +