如何在OpenCV Python中合并相邻的边界框

9

我正在进行一项与图像处理相关的大学课程项目。这是我的原始图片:enter image description here

我想要将相邻/重叠的边界框合并成单独的文本行图像,但我不知道该怎么做。到目前为止,我的代码如下(感谢 @HansHirse 的帮助):

import os
import cv2
import numpy as np
from scipy import stats
image = cv2.imread('example.png')

gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
ret,thresh = cv2.threshold(gray,127,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)

#dilation
kernel = np.ones((5,5), np.uint8)
img_dilation = cv2.dilate(thresh, kernel, iterations=1)

#find contours
ctrs, hier = cv2.findContours(img_dilation.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

# https://www.pyimagesearch.com/2015/04/20/sorting-contours-using-python-and-opencv/
def sort_contours(cnts, method="left-to-right"):
    # initialize the reverse flag and sort index
    reverse = False
    i = 0

    # handle if we need to sort in reverse
    if method == "right-to-left" or method == "bottom-to-top":
        reverse = True

    # handle if we are sorting against the y-coordinate rather than
    # the x-coordinate of the bounding box
    if method == "top-to-bottom" or method == "bottom-to-top":
        i = 1

    # construct the list of bounding boxes and sort them from top to
    # bottom
    boundingBoxes = [cv2.boundingRect(c) for c in cnts]
    (cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes),
                                        key=lambda b: b[1][i], reverse=reverse))

    # return the list of sorted contours and bounding boxes
    return (cnts, boundingBoxes)

sortedctrs,sortedbbs=sort_contours(ctrs)
xyminmax=[]
for cnt in sortedctrs:
    x, y, w, h = cv2.boundingRect(cnt)
    xyminmax.append([x,y,x+w,y+h])

distances=[]
for i in range(len(xyminmax)):
    try:
        first_xmax = xyminmax[i][2]
        second_xmin = xyminmax[i + 1][0]
        distance=abs(second_xmin-first_xmax)
        distances.append(distance)
    except IndexError:
        pass

THRESHOLD=stats.mode(distances, axis=None)[0][0]

new_rects=[]
for i in range(len(xyminmax)):
    try:
        # [xmin,ymin,xmax,ymax]
        first_ymin=xyminmax[i][1]
        first_ymax=xyminmax[i][3]

        second_ymin=xyminmax[i+1][1]
        second_ymax=xyminmax[i+1][3]

        first_xmax = xyminmax[i][2]
        second_xmin = xyminmax[i+1][0]

        firstheight=abs(first_ymax-first_ymin)
        secondheight=abs(second_ymax-second_ymin)

        distance=abs(second_xmin-first_xmax)

        if distance<THRESHOLD:
            new_xmin=xyminmax[i][0]
            new_xmax=xyminmax[i+1][2]
            if first_ymin>second_ymin:
                new_ymin=second_ymin
            else:
                new_ymin = first_ymin

            if firstheight>secondheight:
                new_ymax = first_ymax
            else:
                new_ymax = second_ymax
            new_rects.append([new_xmin,new_ymin,new_xmax,new_ymax])
        else:
            new_rects.append(xyminmax[i])
    except IndexError:
        pass

for rect in new_rects:
    cv2.rectangle(image, (rect[0], rect[1]), (rect[2], rect[3]), (121, 11, 189), 2)
cv2.imwrite("result.png",image) 

这会生成如下图所示的图像: 带有边界框的文本行图像

我想要将非常接近或重叠的边界框(如下图所示)合并成一个边界框,以便公式不被分成单个字符。我尝试使用cv2.groupRectangles,但print的结果只是NULL

输入图片描述

输入图片描述


3
按从左到右的顺序对轮廓进行排序。对于每个轮廓,计算xMinxMaxyMinyMax。定义一些距离阈值来合并。从左到右迭代所有轮廓,考虑两个相邻的轮廓(注意在y方向上不同的轮廓,比如等号)。针对第一个轮廓的xMax和第二个轮廓的xMin与您的阈值进行比较。通过创建具有适当的新xMin等的新Rect来合并。将所有被接受的Rects存储在一个新列表中。如果这样还不行,请提供您的原始图像,我会看一下。 - HansHirse
@HansHirse 我已经尝试了您建议的方法(代码在我上面的帖子中),但我没有得到期望的结果。请看一下,如果我没有正确理解您的建议,我很抱歉。 - Igor Krakowski
点击这里了解详情。 - Miki
提供另一种方法:这个答案提供了一个类似问题的另一种解决方案。它将相邻和重叠的轮廓矩形连接在一起。如果还要连接接近的矩形,可以在掩模上使用膨胀或闭合。 - J.D.
4个回答

10

所以,这里是我的解决方案。我部分修改了你的(初始)代码,按照我喜欢的方式更改了命名等。此外,我对所有的东西都进行了注释,我加入了一些。

import cv2
import numpy as np

image = cv2.imread('images/example.png')

gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

kernel = np.ones((5, 5), np.uint8)
img_dilated = cv2.dilate(thresh, kernel, iterations = 1)

cnts, _ = cv2.findContours(img_dilated.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

# Array of initial bounding rects
rects = []

# Bool array indicating which initial bounding rect has
# already been used
rectsUsed = []

# Just initialize bounding rects and set all bools to false
for cnt in cnts:
    rects.append(cv2.boundingRect(cnt))
    rectsUsed.append(False)

# Sort bounding rects by x coordinate
def getXFromRect(item):
    return item[0]

rects.sort(key = getXFromRect)

# Array of accepted rects
acceptedRects = []

# Merge threshold for x coordinate distance
xThr = 5

# Iterate all initial bounding rects
for supIdx, supVal in enumerate(rects):
    if (rectsUsed[supIdx] == False):

        # Initialize current rect
        currxMin = supVal[0]
        currxMax = supVal[0] + supVal[2]
        curryMin = supVal[1]
        curryMax = supVal[1] + supVal[3]

        # This bounding rect is used
        rectsUsed[supIdx] = True

        # Iterate all initial bounding rects
        # starting from the next
        for subIdx, subVal in enumerate(rects[(supIdx+1):], start = (supIdx+1)):

            # Initialize merge candidate
            candxMin = subVal[0]
            candxMax = subVal[0] + subVal[2]
            candyMin = subVal[1]
            candyMax = subVal[1] + subVal[3]

            # Check if x distance between current rect
            # and merge candidate is small enough
            if (candxMin <= currxMax + xThr):

                # Reset coordinates of current rect
                currxMax = candxMax
                curryMin = min(curryMin, candyMin)
                curryMax = max(curryMax, candyMax)

                # Merge candidate (bounding rect) is used
                rectsUsed[subIdx] = True
            else:
                break

        # No more merge candidates possible, accept current rect
        acceptedRects.append([currxMin, curryMin, currxMax - currxMin, curryMax - curryMin])

for rect in acceptedRects:
    img = cv2.rectangle(image, (rect[0], rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (121, 11, 189), 2)

cv2.imwrite("images/result.png", image)

针对你的例子:

exampel

我得到了以下输出:

output

现在,您需要找到一个适当的阈值来满足您的期望。也许还有一些工作要做,特别是获取整个公式,因为距离变化不是那么大。

免责声明:我对Python总体和特别是OpenCV的Python API(C++获胜)都很陌生。欢迎提出意见、改进或强调Python不适用的情况!


谢谢!这很简洁,易于理解,并让我意识到了我的错误。现在我可以继续处理等号和其他事情了。非常好用! - Igor Krakowski

5

这里有一个稍微不同的方法,使用OpenCV Wrapper library

import cv2
import opencv_wrapper as cvw

image = cv2.imread("example.png")

gray = cvw.bgr2gray(image)
thresh = cvw.threshold_otsu(gray, inverse=True)

# dilation
img_dilation = cvw.dilate(thresh, 5)

# Find contours
contours = cvw.find_external_contours(img_dilation)
# Map contours to bounding rectangles, using bounding_rect property
rects = map(lambda c: c.bounding_rect, contours)
# Sort rects by top-left x (rect.x == rect.tl.x)
sorted_rects = sorted(rects, key=lambda r: r.x)

# Distance threshold
dt = 5

# List of final, joined rectangles
final_rects = [sorted_rects[0]]

for rect in sorted_rects[1:]:
    prev_rect = final_rects[-1]

    # Shift rectangle `dt` back, to find out if they overlap
    shifted_rect = cvw.Rect(rect.tl.x - dt, rect.tl.y, rect.width, rect.height)
    intersection = cvw.rect_intersection(prev_rect, shifted_rect)
    if intersection is not None:
        # Join the two rectangles
        min_y = min((prev_rect.tl.y, rect.tl.y))
        max_y = max((prev_rect.bl.y, rect.bl.y))
        max_x = max((prev_rect.br.x, rect.br.x))
        width = max_x - prev_rect.tl.x
        height = max_y - min_y
        new_rect = cvw.Rect(prev_rect.tl.x, min_y, width, height)
        # Add new rectangle to final list, making it the new prev_rect
        # in the next iteration
        final_rects[-1] = new_rect
    else:
        # If no intersection, add the box
        final_rects.append(rect)

for rect in sorted_rects:
    cvw.rectangle(image, rect, cvw.Color.MAGENTA, line_style=cvw.LineStyle.DASHED)

for rect in final_rects:
    cvw.rectangle(image, rect, cvw.Color.GREEN, thickness=2)

cv2.imwrite("result.png", image)

结果如下: Final result

绿色方框是最终结果,品红色方框是原始结果。

我使用了与 @HansHirse 相同的阈值。

等号仍需要一些工作。可以使用更高的膨胀核大小或在垂直方向上使用相同的技术。

声明:我是 OpenCV Wrapper 的作者。


谢谢您的回答,您的代码注释清晰易懂。我一定会在不久的将来尝试使用OpenCV Wrapper。 - Igor Krakowski

1
易于阅读的解决方案:
contours = get_contours(frame)
boxes = [cv2.boundingRect(c) for c in contours]
boxes = merge_boxes(boxes, x_val=40, y_val=20) # Where x_val and y_val are axis thresholds

def get_contours(frame):  # Returns a list of contours
    contours = cv2.findContours(frame, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contours = imutils.grab_contours(contours)
    return contours


def merge_boxes(boxes, x_val, y_val):
    size = len(boxes)
    if size < 2:
        return boxes

    if size == 2:
        if boxes_mergeable(boxes[0], boxes[1], x_val, y_val):
            boxes[0] = union(boxes[0], boxes[1])
            del boxes[1]
        return boxes

    boxes = sorted(boxes, key=lambda r: r[0])
    i = size - 2
    while i >= 0:
        if boxes_mergeable(boxes[i], boxes[i + 1], x_val, y_val):
            boxes[i] = union(boxes[i], boxes[i + 1])
            del boxes[i + 1]
        i -= 1
    return boxes


def boxes_mergeable(box1, box2, x_val, y_val):
    (x1, y1, w1, h1) = box1
    (x2, y2, w2, h2) = box2
    return max(x1, x2) - min(x1, x2) - minx_w(x1, w1, x2, w2) < x_val \
        and max(y1, y2) - min(y1, y2) - miny_h(y1, h1, y2, h2) < y_val


def minx_w(x1, w1, x2, w2):
    return w1 if x1 <= x2 else w2


def miny_h(y1, h1, y2, h2):
    return h1 if y1 <= y2 else h2


def union(a, b):
    x = min(a[0], b[0])
    y = min(a[1], b[1])
    w = max(a[0] + a[2], b[0] + b[2]) - x
    h = max(a[1] + a[3], b[1] + b[3]) - y
    return x, y, w, h

0

--> 如果您有边界框并希望沿X和Y方向合并,请使用此代码片段

--> 调整x_pixel_value和y_pixel_value以符合您的偏好

--> 但是,为此,您需要拥有边界框

import cv2

img = cv2.imread(your image path)

x_pixel_value = 5
y_pixel_value = 6

bboxes_list = [] # your bounding boxes list
rects_used = []

for i in bboxes_list:
    rects_used.append(False)
end_bboxes_list = []

for enum,i in enumerate(bboxes_list):
    if rects_used[enum] == True:
        continue
    xmin = i[0]
    xmax = i[2]
    ymin = i[1]
    ymax = i[3]
    
    for enum1,j in enumerate(bboxes_list[(enum+1):], start = (enum+1)):
        i_xmin = j[0]
        i_xmax = j[2]
        i_ymin = j[1]
        i_ymax = j[3]
        
        if rects_used[enum1] == False:
            if abs(ymin - i_ymin) < x_pixel_value:
                if abs(xmin-i_xmax) < y_pixel_value or abs(xmax-i_xmin) < y_pixel_value:
                    rects_used[enum1] = True
                    xmin = min(xmin,i_xmin)
                    xmax = max(xmax,i_xmax)
                    ymin = min(ymin,i_ymin)
                    ymax = max(ymax,i_ymax)
    final_box = [xmin,ymin,xmax,ymax]
    end_bboxes_list.append(final_box)
for i in end_bboxes_list:
    cv2.rectangle(img,(i[0],i[1]),(i[2],i[3]), color = [0,255,0], thickness = 2)
cv2.imshow("Image",img)
cv2.waitKey(10000)
cv2.destroyAllWindows()

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接