任何人都知道在byte []数组中搜索/匹配字节模式然后返回位置的有效方法.
例如
byte[] pattern = new byte[] {12,3,5,76,8,0,6,125}; byte[] toBeSearched = new byte[] {23,36,43,76,125,56,34,234,12,3,5,76,8,0,6,125,234,56,211,122,22,4,7,89,76,64,12,3,5,76,8,0,6,125}
Jb Evain.. 53
我可以建议一些不涉及创建字符串,复制数组或不安全代码的东西:
using System; using System.Collections.Generic; static class ByteArrayRocks { static readonly int [] Empty = new int [0]; public static int [] Locate (this byte [] self, byte [] candidate) { if (IsEmptyLocate (self, candidate)) return Empty; var list = new List(); for (int i = 0; i < self.Length; i++) { if (!IsMatch (self, i, candidate)) continue; list.Add (i); } return list.Count == 0 ? Empty : list.ToArray (); } static bool IsMatch (byte [] array, int position, byte [] candidate) { if (candidate.Length > (array.Length - position)) return false; for (int i = 0; i < candidate.Length; i++) if (array [position + i] != candidate [i]) return false; return true; } static bool IsEmptyLocate (byte [] array, byte [] candidate) { return array == null || candidate == null || array.Length == 0 || candidate.Length == 0 || candidate.Length > array.Length; } static void Main () { var data = new byte [] { 23, 36, 43, 76, 125, 56, 34, 234, 12, 3, 5, 76, 8, 0, 6, 125, 234, 56, 211, 122, 22, 4, 7, 89, 76, 64, 12, 3, 5, 76, 8, 0, 6, 125 }; var pattern = new byte [] { 12, 3, 5, 76, 8, 0, 6, 125 }; foreach (var position in data.Locate (pattern)) Console.WriteLine (position); } }
编辑(通过IAbstract) - 移动帖子的内容,因为它不是答案
出于好奇,我用不同的答案创建了一个小基准.
以下是一百万次迭代的结果:
solution [Locate]: 00:00:00.7714027 solution [FindAll]: 00:00:03.5404399 solution [SearchBytePattern]: 00:00:01.1105190 solution [MatchBytePattern]: 00:00:03.0658212
你的解决方案在大字节数组上很慢. (3认同)
YujiSoftware.. 25
使用LINQ方法.
public static IEnumerable PatternAt(byte[] source, byte[] pattern)
{
for (int i = 0; i < source.Length; i++)
{
if (source.Skip(i).Take(pattern.Length).SequenceEqual(pattern))
{
yield return i;
}
}
}
非常简单!
我可以建议一些不涉及创建字符串,复制数组或不安全代码的东西:
using System; using System.Collections.Generic; static class ByteArrayRocks { static readonly int [] Empty = new int [0]; public static int [] Locate (this byte [] self, byte [] candidate) { if (IsEmptyLocate (self, candidate)) return Empty; var list = new List(); for (int i = 0; i < self.Length; i++) { if (!IsMatch (self, i, candidate)) continue; list.Add (i); } return list.Count == 0 ? Empty : list.ToArray (); } static bool IsMatch (byte [] array, int position, byte [] candidate) { if (candidate.Length > (array.Length - position)) return false; for (int i = 0; i < candidate.Length; i++) if (array [position + i] != candidate [i]) return false; return true; } static bool IsEmptyLocate (byte [] array, byte [] candidate) { return array == null || candidate == null || array.Length == 0 || candidate.Length == 0 || candidate.Length > array.Length; } static void Main () { var data = new byte [] { 23, 36, 43, 76, 125, 56, 34, 234, 12, 3, 5, 76, 8, 0, 6, 125, 234, 56, 211, 122, 22, 4, 7, 89, 76, 64, 12, 3, 5, 76, 8, 0, 6, 125 }; var pattern = new byte [] { 12, 3, 5, 76, 8, 0, 6, 125 }; foreach (var position in data.Locate (pattern)) Console.WriteLine (position); } }
编辑(通过IAbstract) - 移动帖子的内容,因为它不是答案
出于好奇,我用不同的答案创建了一个小基准.
以下是一百万次迭代的结果:
solution [Locate]: 00:00:00.7714027 solution [FindAll]: 00:00:03.5404399 solution [SearchBytePattern]: 00:00:01.1105190 solution [MatchBytePattern]: 00:00:03.0658212
使用LINQ方法.
public static IEnumerable PatternAt(byte[] source, byte[] pattern)
{
for (int i = 0; i < source.Length; i++)
{
if (source.Skip(i).Take(pattern.Length).SequenceEqual(pattern))
{
yield return i;
}
}
}
非常简单!
使用高效的Boyer-Moore算法.
它的目的是找到带字符串的字符串,但你需要很少的想象力将它投射到字节数组.
一般来说,最好的答案是:使用你喜欢的任何字符串搜索算法:).
最初我发布了一些我用过的旧代码,但对Jb Evain的基准很好奇.我发现我的解决方案很愚蠢.似乎bruno conde的SearchBytePattern是最快的.我无法理解为什么特别是因为他使用了Array.Copy和Extension方法.但是在Jb的测试中有证据,所以对布鲁诺赞不绝口.
我进一步简化了比特,所以希望这将是最清晰,最简单的解决方案.(bruno conde所做的所有努力)增强功能包括:
Buffer.BlockCopy
Array.IndexOf <字节>
while循环而不是for循环
启动索引参数
转换为扩展方法
public static ListIndexOfSequence(this byte[] buffer, byte[] pattern, int startIndex) { List positions = new List (); int i = Array.IndexOf (buffer, pattern[0], startIndex); while (i >= 0 && i <= buffer.Length - pattern.Length) { byte[] segment = new byte[pattern.Length]; Buffer.BlockCopy(buffer, i, segment, 0, pattern.Length); if (segment.SequenceEqual (pattern)) positions.Add(i); i = Array.IndexOf (buffer, pattern[0], i + 1); } return positions; }
我的解决方案
class Program { public static void Main() { byte[] pattern = new byte[] {12,3,5,76,8,0,6,125}; byte[] toBeSearched = new byte[] { 23, 36, 43, 76, 125, 56, 34, 234, 12, 3, 5, 76, 8, 0, 6, 125, 234, 56, 211, 122, 22, 4, 7, 89, 76, 64, 12, 3, 5, 76, 8, 0, 6, 125}; Listpositions = SearchBytePattern(pattern, toBeSearched); foreach (var item in positions) { Console.WriteLine("Pattern matched at pos {0}", item); } } static public List SearchBytePattern(byte[] pattern, byte[] bytes) { List positions = new List (); int patternLength = pattern.Length; int totalLength = bytes.Length; byte firstMatchByte = pattern[0]; for (int i = 0; i < totalLength; i++) { if (firstMatchByte == bytes[i] && totalLength - i >= patternLength) { byte[] match = new byte[patternLength]; Array.Copy(bytes, i, match, 0, patternLength); if (match.SequenceEqual (pattern)) { positions.Add(i); i += patternLength - 1; } } } return positions; } }
这是我的提议,更简单,更快捷:
int Search(byte[] src, byte[] pattern) { int c = src.Length - pattern.Length + 1; int j; for (int i = 0; i < c; i++) { if (src[i] != pattern[0]) continue; for (j = pattern.Length - 1; j >= 1 && src[i + j] == pattern[j]; j--) ; if (j == 0) return i; } return -1; }