在我之前的问题中,我改变了这个图像:
进入这个:
Tesseract OCR解释为:
1O351
在图像周围放置一个框架
实际上改善了OCR结果.
1CB51
但是,我需要正确地将所有5个字符都放到OCR中,所以作为一个实验,我使用Paint.NET旋转并将每个字母对齐到正确的方向:
得出正确答案:
1CB52
我将如何在C#中执行此更正?
我已经对各种文本对齐算法做了一些研究,但它们都假设源图像中存在文本行,可以从中导出旋转角度的行,但是已经包含了正确的间距和方向关系.这些信.
您可以使用以下代码项目文章中的代码来分割每个单独的字符.然而,当试图单独去除这些角色时,你获得的任何结果都不会很好,因为没有太多的信息可以解决.
我尝试使用AForge.NET
s HoughLineTransformation
级,我的角度范围为80-90度.所以我尝试使用以下代码来纠正它们:
private static Bitmap DeskewImageByIndividualChars(Bitmap targetBitmap) { IDictionarycharacters = new CCL().Process(targetBitmap); using (Graphics g = Graphics.FromImage(targetBitmap)) { foreach (var character in characters) { double angle; BitmapData bitmapData = character.Value.LockBits(new Rectangle(Point.Empty, character.Value.Size), ImageLockMode.ReadWrite, PixelFormat.Format8bppIndexed); try { HoughLineTransformation hlt = new HoughLineTransformation(); hlt.ProcessImage(bitmapData); angle = hlt.GetLinesByRelativeIntensity(0.5).Average(l => l.Theta); } finally { character.Value.UnlockBits(bitmapData); } using (Bitmap bitmap = RotateImage(character.Value, 90 - angle, Color.White)) { g.DrawImage(bitmap, character.Key.Location); } } } return targetBitmap; }
用这个RotateImage
方法取自这里.但是,结果似乎并不是最好的.也许你可以尝试让它们变得更好.
以下是代码项目文章中的代码供您参考.因此,它的行为比较安全,比如增加我已经给它的一些变化try-finally
周围LockBits
,并采用正确的对象的配置using
语句等.
using System.Collections.Generic; using System.Drawing; using System.Drawing.Imaging; using System.Linq; namespace ConnectedComponentLabeling { public class CCL { private Bitmap _input; private int[,] _board; public IDictionaryProcess(Bitmap input) { _input = input; _board = new int[_input.Width, _input.Height]; Dictionary > patterns = Find(); var images = new Dictionary (); foreach (KeyValuePair > pattern in patterns) { using (Bitmap bmp = CreateBitmap(pattern.Value)) { images.Add(GetBounds(pattern.Value), (Bitmap)bmp.Clone()); } } return images; } protected virtual bool CheckIsBackGround(Pixel currentPixel) { return currentPixel.color.A == 255 && currentPixel.color.R == 255 && currentPixel.color.G == 255 && currentPixel.color.B == 255; } private unsafe Dictionary > Find() { int labelCount = 1; var allLabels = new Dictionary (); BitmapData imageData = _input.LockBits(new Rectangle(0, 0, _input.Width, _input.Height), ImageLockMode.ReadOnly, PixelFormat.Format24bppRgb); try { int bytesPerPixel = 3; byte* scan0 = (byte*)imageData.Scan0.ToPointer(); int stride = imageData.Stride; for (int i = 0; i < _input.Height; i++) { byte* row = scan0 + (i * stride); for (int j = 0; j < _input.Width; j++) { int bIndex = j * bytesPerPixel; int gIndex = bIndex + 1; int rIndex = bIndex + 2; byte pixelR = row[rIndex]; byte pixelG = row[gIndex]; byte pixelB = row[bIndex]; Pixel currentPixel = new Pixel(new Point(j, i), Color.FromArgb(pixelR, pixelG, pixelB)); if (CheckIsBackGround(currentPixel)) { continue; } IEnumerable neighboringLabels = GetNeighboringLabels(currentPixel); int currentLabel; if (!neighboringLabels.Any()) { currentLabel = labelCount; allLabels.Add(currentLabel, new Label(currentLabel)); labelCount++; } else { currentLabel = neighboringLabels.Min(n => allLabels[n].GetRoot().Name); Label root = allLabels[currentLabel].GetRoot(); foreach (var neighbor in neighboringLabels) { if (root.Name != allLabels[neighbor].GetRoot().Name) { allLabels[neighbor].Join(allLabels[currentLabel]); } } } _board[j, i] = currentLabel; } } } finally { _input.UnlockBits(imageData); } Dictionary > patterns = AggregatePatterns(allLabels); patterns = RemoveIntrusions(patterns, _input.Width, _input.Height); return patterns; } private Dictionary > RemoveIntrusions(Dictionary > patterns, int width, int height) { var patternsCleaned = new Dictionary >(); foreach (var pattern in patterns) { bool bad = false; foreach (Pixel item in pattern.Value) { //Horiz if (item.Position.X == 0) bad = true; else if (item.Position.Y == width - 1) bad = true; //Vert else if (item.Position.Y == 0) bad = true; else if (item.Position.Y == height - 1) bad = true; } if (!bad) patternsCleaned.Add(pattern.Key, pattern.Value); } return patternsCleaned; } private IEnumerable GetNeighboringLabels(Pixel pix) { var neighboringLabels = new List (); for (int i = pix.Position.Y - 1; i <= pix.Position.Y + 2 && i < _input.Height - 1; i++) { for (int j = pix.Position.X - 1; j <= pix.Position.X + 2 && j < _input.Width - 1; j++) { if (i > -1 && j > -1 && _board[j, i] != 0) { neighboringLabels.Add(_board[j, i]); } } } return neighboringLabels; } private Dictionary > AggregatePatterns(Dictionary allLabels) { var patterns = new Dictionary >(); for (int i = 0; i < _input.Height; i++) { for (int j = 0; j < _input.Width; j++) { int patternNumber = _board[j, i]; if (patternNumber != 0) { patternNumber = allLabels[patternNumber].GetRoot().Name; if (!patterns.ContainsKey(patternNumber)) { patterns[patternNumber] = new List (); } patterns[patternNumber].Add(new Pixel(new Point(j, i), Color.Black)); } } } return patterns; } private unsafe Bitmap CreateBitmap(List pattern) { int minX = pattern.Min(p => p.Position.X); int maxX = pattern.Max(p => p.Position.X); int minY = pattern.Min(p => p.Position.Y); int maxY = pattern.Max(p => p.Position.Y); int width = maxX + 1 - minX; int height = maxY + 1 - minY; Bitmap bmp = DrawFilledRectangle(width, height); BitmapData imageData = bmp.LockBits(new Rectangle(0, 0, bmp.Width, bmp.Height), ImageLockMode.ReadWrite, PixelFormat.Format24bppRgb); try { byte* scan0 = (byte*)imageData.Scan0.ToPointer(); int stride = imageData.Stride; foreach (Pixel pix in pattern) { scan0[((pix.Position.X - minX) * 3) + (pix.Position.Y - minY) * stride] = pix.color.B; scan0[((pix.Position.X - minX) * 3) + (pix.Position.Y - minY) * stride + 1] = pix.color.G; scan0[((pix.Position.X - minX) * 3) + (pix.Position.Y - minY) * stride + 2] = pix.color.R; } } finally { bmp.UnlockBits(imageData); } return bmp; } private Bitmap DrawFilledRectangle(int x, int y) { Bitmap bmp = new Bitmap(x, y); using (Graphics graph = Graphics.FromImage(bmp)) { Rectangle ImageSize = new Rectangle(0, 0, x, y); graph.FillRectangle(Brushes.White, ImageSize); } return bmp; } private Rectangle GetBounds(List pattern) { var points = pattern.Select(x => x.Position); var x_query = points.Select(p => p.X); int xmin = x_query.Min(); int xmax = x_query.Max(); var y_query = points.Select(p => p.Y); int ymin = y_query.Min(); int ymax = y_query.Max(); return new Rectangle(xmin, ymin, xmax - xmin, ymax - ymin); } } }
使用上面的代码我得到以下输入/输出:
正如你所看到的那样B
旋转得很好但其他的并不是那么好.
尝试去除个别字符的另一种方法是使用上面的分割例程找到位置.然后将每个单独的角色分别传递到您的识别引擎,看看这是否会改善您的结果.
我使用以下方法使用类中的List
from 来查找字符的角度CCL
.它的工作原理是找到"左下"和"右下"之间的角度.我没有测试过,如果角色是相反的,那么它是否有效.
private double GetAngle(Listpattern) { var pixels = pattern.Select(p => p.Position).ToArray(); Point bottomLeft = pixels.OrderByDescending(p => p.Y).ThenBy(p => p.X).First(); Point rightBottom = pixels.OrderByDescending(p => p.X).ThenByDescending(p => p.Y).First(); int xDiff = rightBottom.X - bottomLeft.X; int yDiff = rightBottom.Y - bottomLeft.Y; double angle = Math.Atan2(yDiff, xDiff) * 180 / Math.PI; return -angle; }
请注意我的绘图代码有点破,所以这就是为什么5
在右边切断但这段代码产生以下输出:
请注意,由于它们的曲率,旋转B
和5
旋转的程度超出了您的预期.
通过从左右边缘获得角度,然后选择最佳角度,使用以下代码,旋转似乎更好.注意我只用需要顺时针旋转的字母进行了测试,所以如果它们需要采用相反的方式,它可能效果不好.
这也是"象限"像素,以便从它自己的象限中选择每个像素,而不是得到两个太近的像素.
选择最佳角度的想法是,如果它们相似,在彼此相差1.5度的时刻,但可以很容易地更新,平均它们.另外,我们选择最接近零的那个.
private double GetAngle(Listpattern, Rectangle bounds) { int halfWidth = bounds.X + (bounds.Width / 2); int halfHeight = bounds.Y + (bounds.Height / 2); double leftEdgeAngle = GetAngleLeftEdge(pattern, halfWidth, halfHeight); double rightEdgeAngle = GetAngleRightEdge(pattern, halfWidth, halfHeight); if (Math.Abs(leftEdgeAngle - rightEdgeAngle) <= 1.5) { return (leftEdgeAngle + rightEdgeAngle) / 2d; } if (Math.Abs(leftEdgeAngle) > Math.Abs(rightEdgeAngle)) { return rightEdgeAngle; } else { return leftEdgeAngle; } } private double GetAngleLeftEdge(List pattern, double halfWidth, double halfHeight) { var topLeftPixels = pattern.Select(p => p.Position).Where(p => p.Y < halfHeight && p.X < halfWidth).ToArray(); var bottomLeftPixels = pattern.Select(p => p.Position).Where(p => p.Y > halfHeight && p.X < halfWidth).ToArray(); Point topLeft = topLeftPixels.OrderBy(p => p.X).ThenBy(p => p.Y).First(); Point bottomLeft = bottomLeftPixels.OrderByDescending(p => p.Y).ThenBy(p => p.X).First(); int xDiff = bottomLeft.X - topLeft.X; int yDiff = bottomLeft.Y - topLeft.Y; double angle = Math.Atan2(yDiff, xDiff) * 180 / Math.PI; return 90 - angle; } private double GetAngleRightEdge(List pattern, double halfWidth, double halfHeight) { var topRightPixels = pattern.Select(p => p.Position).Where(p => p.Y < halfHeight && p.X > halfWidth).ToArray(); var bottomRightPixels = pattern.Select(p => p.Position).Where(p => p.Y > halfHeight && p.X > halfWidth).ToArray(); Point topRight = topRightPixels.OrderBy(p => p.Y).ThenByDescending(p => p.X).First(); Point bottomRight = bottomRightPixels.OrderByDescending(p => p.X).ThenByDescending(p => p.Y).First(); int xDiff = bottomRight.X - topRight.X; int yDiff = bottomRight.Y - topRight.Y; double angle = Math.Atan2(xDiff, yDiff) * 180 / Math.PI; return Math.Abs(angle); }
现在生成以下输出,我的绘图代码再次略有破坏.请注意,C
看起来没有很好地纠正,但仔细观察它只是导致这种情况发生的形状.
我改进了绘图代码,并尝试将字符放到相同的基线上:
private static Bitmap DeskewImageByIndividualChars(Bitmap bitmap) { IDictionary> characters = new CCL().Process(bitmap); Bitmap deskewedBitmap = new Bitmap(bitmap.Width, bitmap.Height, bitmap.PixelFormat); deskewedBitmap.SetResolution(bitmap.HorizontalResolution, bitmap.VerticalResolution); using (Graphics g = Graphics.FromImage(deskewedBitmap)) { g.FillRectangle(Brushes.White, new Rectangle(Point.Empty, deskewedBitmap.Size)); int baseLine = characters.Max(c => c.Key.Bottom); foreach (var character in characters) { int y = character.Key.Y; if (character.Key.Bottom != baseLine) { y += (baseLine - character.Key.Bottom - 1); } using (Bitmap characterBitmap = RotateImage(character.Value.Item1, character.Value.Item2, Color.White)) { g.DrawImage(characterBitmap, new Point(character.Key.X, y)); } } } return deskewedBitmap; }
然后,这将产生以下输出.请注意,每个角色不在完全相同的基线上,因为需要使用预旋转底部来完成它.需要使用后期轮换的基线来改进代码.在做基线之前对图像进行阈值处理也会有所帮助.
另一个改进是计算Right
每个旋转的字符位置,因此在绘制下一个旋转的字符时,它不会与前一个和切割的位重叠.因为你可以在输出中看到2
它略微切入5
.
输出现在非常类似于OP中手动创建的输出.