使用OpenCV检测手写框

Question

使用OpenCV检测手写框

pythonopencvimage-processinghandwriting-recognition

4

我有以下图片：

我想要提取出框中的图表，如下所示：

这是我的尝试：

import cv2
import matplotlib.pyplot as plt

# Load the image
image = cv2.imread('diagram.jpg')

# Convert to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# Apply thresholding to create a binary image
_, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)

# Find contours
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

# Draw the contours
cv2.drawContours(image, contours, -1, (0, 0, 255), 2)

# Show the final image
plt.imshow(image), plt.show()

然而，我意识到由于轮廓不封闭，提取图表将会很困难:

我尝试使用形态学闭运算来关闭盒子边缘中的空隙:

# Define a rectangular kernel for morphological closing
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))

# Perform morphological closing to close the gaps in the box edges
closed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)

但这几乎没有改变什么。我该如何解决这个问题？

- QuestioningAll

2个回答

2

只需要膨胀图像以关闭矩形，然后为轮廓的面积定义一个阈值：

import cv2

# Load the image
image = cv2.imread('diagram.jpg')

# Convert to grayscale
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)

# Apply thresholding to create a binary image
ret,thresh = cv2.threshold(gray,200,255,1)

# Need to dilate the image to make the contours closed
dilate = cv2.dilate(thresh,None)
erode = cv2.erode(dilate,None)

# Find contours
contours,hierarchy = cv2.findContours(erode,cv2.RETR_CCOMP,cv2.CHAIN_APPROX_SIMPLE)

for i,cnt in enumerate(contours):
    # Check if it is an external contour and its area is more than 8000
    if hierarchy[0,i,3] == -1 and cv2.contourArea(cnt)>8000:
        x,y,w,h = cv2.boundingRect(cnt)
        cv2.rectangle(image,(x,y),(x+w,y+h),(0,255,0),2)
        cv2.imwrite('template {0}.jpg'.format(i), image[y:y+h,x:x+w])
cv2.imshow('img',image)

你将会获得：

- HMH1013

网页内容由stack overflow 提供, 点击上面的

可以查看英文原文，
原文链接

- Rotem · Accepted Answer

我们可以用膨胀再腐蚀的方法替换形态学闭运算，但需要填充膨胀和腐蚀之间的轮廓。

为了填补空隙，卷积核大小应该比5x5大得多（我使用了51x51）。

假设手写框是彩色的，我们可以从BGR转换到HSV，并在HSV的饱和度通道上应用阈值：

hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)  # Convert from BGR to HSV color space 
gray = hsv[:, :, 1]  # Use saturation from HSV channel as "gray".
_, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU)  # Apply automatic thresholding (use THRESH_OTSU).

使用大核心进行膨胀操作，并使用drawContours函数填充轮廓：

kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (51, 51))  # Use relatively large kernel for closing the gaps   
dilated = cv2.dilate(thresh, kernel)  # Dilate with large kernel

contours, hierarchy = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cv2.drawContours(dilated, contours, -1, 255, -1)

在填充轮廓后应用腐蚀膨胀后的腐蚀相当于闭运算，但在这里我们是在填充后再进行闭运算。

closed = cv2.erode(dilated, kernel)

代码示例：

import cv2
import numpy as np

# Load the image
image = cv2.imread('diagram.png')

hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)  # Convert from BGR to HSV color space 

# Convert to grayscale
#gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = hsv[:, :, 1]  # Use saturation from HSV channel as "gray".

# Apply thresholding to create a binary image
_, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU)  # Apply automatic thresholding (use THRESH_OTSU).

thresh = np.pad(thresh, ((100, 100), (100, 100)))  # Add zero padding (required due to large dilate kernels).

# Define a rectangular kernel for morphological operations.
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (51, 51))  # Use relatively large kernel for closing the gaps

dilated = cv2.dilate(thresh, kernel)  # Dilate with large kernel

# Fill the contours, before applying erode.
contours, hierarchy = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cv2.drawContours(dilated, contours, -1, 255, -1)

closed = cv2.erode(dilated, kernel)  # Apply erode after filling the contours.

closed = closed[100:-100, 100:-100]  # Remove the padding.

# Find contours
contours, hierarchy = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

# Draw the contours
cv2.drawContours(image, contours, -1, (255, 0, 0), 2)

# Show images for testing
# plt.imshow(image), plt.show()
cv2.imshow('gray', gray)
cv2.imshow('thresh', thresh)
cv2.imshow('dilated', dilated)
cv2.imshow('closed', closed)
cv2.imshow('image', image)
cv2.waitKey()
cv2.destroyAllWindows()

结果：

gray（饱和度通道）：

thresh：

dilated（填充后）：

closed：