处理一张表格图像以从中获取数据

Question

处理一张表格图像以从中获取数据

16

我有这张桌子的图片（如下所示）。我正在尝试获取与表格图像中的第一行类似的表格数据：

rows[0] = [x,x, , , , ,x, ,x,x, ,x, ,x, , , , ,x, , , ,x,x,x, ,x, ,x, , , , ]

我需要x的数量以及空格的数量，还会有其他类似于这个表格的图像（都有x和相同的列数）。

enter image description here

到目前为止，我能够使用x的图像检测出所有的x。我也可以在一定程度上检测出线条。我正在使用Python中的open cv2。我还在使用houghTransform来检测水平和垂直线（这非常有效）。

我正在尝试找出如何逐行进行并将信息存储在列表中。

这些是训练图像：用于检测x（代码中的train1.png） enter image description here

用于检测线路（代码中的train2.png） enter image description here

用于检测线路（代码中的train3.png） enter image description here

这是我到目前为止的代码：

# process images
from pytesser import *
from PIL import Image
from matplotlib import pyplot as plt
import pytesseract
import numpy as np
import cv2
import math
import os

# the table images
images = ['table1.png', 'table2.png', 'table3.png', 'table4.png', 'table5.png']

# the template images used for training
templates = ['train1.png', 'train2.png', 'train3.png']

def hough_transform(im):
    img = cv2.imread('imgs/'+im)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 50, 150, apertureSize=3)

    lines = cv2.HoughLines(edges, 1, np.pi/180, 200)

    i = 1
    for rho, theta in lines[0]:
        a = np.cos(theta)
        b = np.sin(theta)
        x0 = a*rho
        y0 = b*rho
        x1 = int(x0 + 1000*(-b))
        y1 = int(y0 + 1000*(a))
        x2 = int(x0 - 1000*(-b))
        y2 = int(y0 - 1000*(a))

        #print '%s - 0:(%s,%s) 1:(%s,%s), 2:(%s,%s)' % (i,x0,y0,x1,y1,x2,y2)

        cv2.line(img, (x1,y1), (x2,y2), (0,0,255), 2)
        i += 1

    fn = os.path.splitext(im)[0]+'-lines'
    cv2.imwrite('imgs/'+fn+'.png', img)


def match_exes(im, te):
    img_rgb = cv2.imread('imgs/'+im)
    img_gry = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY)
    template = cv2.imread('imgs/'+te, 0)
    w, h = template.shape[::-1]

    res = cv2.matchTemplate(img_gry, template, cv2.TM_CCOEFF_NORMED)
    threshold = 0.71
    loc = np.where(res >= threshold)

    pts = []
    exes = []
    blanks = []
    for pt in zip(*loc[::-1]):
        pts.append(pt)
        cv2.rectangle(img_rgb, pt, (pt[0]+w, pt[1]+h), (0,0,255), 1)


    fn = os.path.splitext(im)[0]+'-exes'
    cv2.imwrite('imgs/'+fn+'.png', img_rgb)

    return pts, exes, blanks


def match_horizontal_lines(im, te, te2):
    img_rgb = cv2.imread('imgs/'+im)
    img_gry = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY)
    template = cv2.imread('imgs/'+te, 0)
    w1, h1 = template.shape[::-1]
    template2 = cv2.imread('imgs/'+te2, 0)
    w2, h2 = template2.shape[::-1]

    # first line template (the downward facing line)
    res1 = cv2.matchTemplate(img_gry, template, cv2.TM_CCOEFF_NORMED)
    threshold1 = 0.8
    loc1 = np.where(res1 >= threshold1)

    # second line template (the upward facing line)
    res2 = cv2.matchTemplate(img_gry, template2, cv2.TM_CCOEFF_NORMED)
    threshold2 = 0.8
    loc2 = np.where(res2 >= threshold2)

    pts = []
    exes = []
    blanks = []

    # find first line template (the downward facing line)
    for pt in zip(*loc1[::-1]):
        pts.append(pt)
        cv2.rectangle(img_rgb, pt, (pt[0]+w1, pt[1]+h1), (0,0,255), 1)

    # find second line template (the upward facing line)
    for pt in zip(*loc2[::-1]):
        pts.append(pt)
        cv2.rectangle(img_rgb, pt, (pt[0]+w2, pt[0]+h2), (0,0,255), 1)

    fn = os.path.splitext(im)[0]+'-horiz'
    cv2.imwrite('imgs/'+fn+'.png', img_rgb)

    return pts, exes, blanks


# process
text = ''
for img in images:
    print 'processing %s' % img
    hough_transform(img)
    pts, exes, blanks = match_exes(img, templates[0])
    pts1, exes1, blanks1 = match_horizontal_lines(img, templates[1], templates[2])
    text += '%s: %s x\'s & %s horizontal lines\n' % (img, len(pts), len(pts1))

# statistics file
outputFile = open('counts.txt', 'w')
outputFile.write(text)
outputFile.close()

而且，输出的图像看起来像这样（您可以看到，所有的x都被检测到了，但不是所有的线）：
x's
enter image description here

水平线：
enter image description here

霍夫变换：
enter image description here

就像我说的那样，我实际上只是想从表格中获取数据，类似于这种形式（表格图像的第一行）：

row a = [x,x, , , , ,x, ,x,x, ,x, ,x, , , , ,x, , , ,x,x,x, ,x, ,x, , , , ]

我需要x的数量以及空格的数量。还会有其他类似于这个表格的图片（所有表格都有相同数量的列和不同数量的行且都有x）。

另外，我正在使用Python 2.7。

- user

1

你似乎非常接近了。看着你的霍夫线，你应该能够得出第一个单元格（行0，列0）的边界。然后只在这些边界内查找“x”，并相应地更新表格。不幸的是，我的Python水平相对较弱，否则我会发表更具体的答案。 - beaker

我注意到霍夫变换的问题在于它为桌子上的每条线绘制了2条线。我将线宽从2设置为1以查看差异。现在，我正在尝试使用模板匹配来映射所有x，并查看哪些在同一行等等... - user

双倍线可能是因为“前景”是黑色的，而“背景”是白色的。首先尝试反转颜色。 - beaker

在反转图像后，我仍然得到了双重线条，这些线条看起来像是在表格的每一行上方和下方都画了一条线。 - user

我找到了如何去除重复项，并将行列表分成了两个垂直和水平的列表。这使得现在更容易了。 - user

1个回答

网页内容由stack overflow 提供, 点击上面的

可以查看英文原文，
原文链接

- user · Accepted Answer

好的，我已经想通了。我使用了@beaker提供的建议，在网格线之间查找。

在这之前，我需要从霍夫变换代码中删除重复线条。然后，我将剩余的线条分为两个列表，垂直和水平。从那里开始，我可以循环遍历水平和垂直，并创建感兴趣区域（roi）图像。每个roi图像代表表格主图像中的一个“单元格”。我检查了每个单元格的轮廓，并注意到当单元格中有“x”时，len(contours) >= 2。因此，任何len(contours) <2都是空白处（我进行了几个测试程序来找出这一点）。以下是我用来使其工作的代码：

import cv2
import numpy as np
import os

# the list of images (tables)
images = ['table1.png', 'table2.png', 'table3.png', 'table4.png', 'table5.png']

# the list of templates (used for template matching)
templates = ['train1.png']

def remove_duplicates(lines):
    # remove duplicate lines (lines within 10 pixels of eachother)
    for x1, y1, x2, y2 in lines:
        for index, (x3, y3, x4, y4) in enumerate(lines):
            if y1 == y2 and y3 == y4:
                diff = abs(y1-y3)
            elif x1 == x2 and x3 == x4:
                diff = abs(x1-x3)
            else:
                diff = 0
            if diff < 10 and diff is not 0:
                del lines[index]
    return lines


def sort_line_list(lines):
    # sort lines into horizontal and vertical
    vertical = []
    horizontal = []
    for line in lines:
        if line[0] == line[2]:
            vertical.append(line)
        elif line[1] == line[3]:
            horizontal.append(line)
    vertical.sort()
    horizontal.sort(key=lambda x: x[1])
    return horizontal, vertical


def hough_transform_p(image, template, tableCnt):
    # open and process images
    img = cv2.imread('imgs/'+image)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 50, 150, apertureSize=3)

    # probabilistic hough transform
    lines = cv2.HoughLinesP(edges, 1, np.pi/180, 200, minLineLength=20, maxLineGap=999)[0].tolist()

    # remove duplicates
    lines = remove_duplicates(lines)

    # draw image
    for x1, y1, x2, y2 in lines:
        cv2.line(img, (x1, y1), (x2, y2), (0, 0, 255), 1)

    # sort lines into vertical & horizontal lists
    horizontal, vertical = sort_line_list(lines)

    # go through each horizontal line (aka row)
    rows = []
    for i, h in enumerate(horizontal):
        if i < len(horizontal)-1:
            row = []
            for j, v in enumerate(vertical):
                if i < len(horizontal)-1 and j < len(vertical)-1:
                    # every cell before last cell
                    # get width & height
                    width = horizontal[i+1][1] - h[1]
                    height = vertical[j+1][0] - v[0]

                else:
                    # last cell, width = cell start to end of image
                    # get width & height
                    width = tW
                    height = tH
                tW = width
                tH = height

                # get roi (region of interest) to find an x
                roi = img[h[1]:h[1]+width, v[0]:v[0]+height]

                # save image (for testing)
                dir = 'imgs/table%s' % (tableCnt+1)
                if not os.path.exists(dir):
                     os.makedirs(dir)
                fn = '%s/roi_r%s-c%s.png' % (dir, i, j)
                cv2.imwrite(fn, roi)

                # if roi contains an x, add x to array, else add _
                roi_gry = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
                ret, thresh = cv2.threshold(roi_gry, 127, 255, 0)
                contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

                if len(contours) > 1:
                    # there is an x for 2 or more contours
                    row.append('x')
                else:
                    # there is no x when len(contours) is <= 1
                    row.append('_')
            row.pop()
            rows.append(row)

    # save image (for testing)
    fn = os.path.splitext(image)[0] + '-hough_p.png'
    cv2.imwrite('imgs/'+fn, img)


def process():
    for i, img in enumerate(images):
        # perform probabilistic hough transform on each image
        hough_transform_p(img, templates[0], i)


if __name__ == '__main__':
    process()

因此，示例图像：输入图像描述

而且，输出结果（为了简洁起见删除了生成文本文件的代码）：输入图像描述

正如您所看到的，文本文件包含与图像中相同位置的x数量相同的字符。现在难点已经解决，我可以继续我的任务了！