使用Python将相对路径转换为绝对路径

4

我已经按照许多SO用户的请求,使用beautifulsoup重新编写了完整的代码来获取href和src链接。以下是代码:

import os
from bs4 import BeautifulSoup
from urllib.parse import urlparse

path = urlpars(http://www.example.com/dynamic/search.aspx?searchtype=cat&class_id=2566&city_id=55)
lpath = os.path.dirname(path.path)

html = u"<html class=\"\"><head id=\"pageHead\"><title>\n    Beauty Salons | Best Beauty Care &amp; Treatments | Listings @ Phonebook Online\n</title>\n    <!--\n    <meta http-equiv=\"Cache-Control\" content=\"no-cache, no-store, must-revalidate\" /><meta http-equiv=\"Pragma\" content=\"no-cache\" /><meta http-equiv=\"Expires\" content=\"0\" />\n    -->\n    <meta name=\"viewport\" content=\"width=device-width, initial-scale=1\"><link rel=\"stylesheet\" href=\"../css_responsive/category.css\" type=\"text/css\" media=\"screen\">\n    <script async=\"\" src=\"//www.google-analytics.com/analytics.js\"></script><script async=\"\" src=\"//www.google.com/adsense/search/async-ads.js\"></script><script type=\"text/javascript\" src=\"../styles/scripts/jquery-1.9.1.min.js\"></script>\n    <link rel=\"shortcut icon\" type=\"image/png\" href=\"/PhoneBook.ico\">\n    <!-- #Begin Css Plugin -->\n    <link rel=\"stylesheet\" href=\"../css_responsive/fontsss.css\"><link rel=\"stylesheet\" href=\"../css_responsive/bootstrap-3.3.4-dist/css/bootstrap.css\" type=\"text/css\" media=\"screen\"><link rel=\"stylesheet\" href=\"../styles/scripts/fancybox/jquery.fancybox.css\" type=\"text/css\" media=\"screen\"><link rel=\"stylesheet\" href=\"../css_responsive/icon-detail.css\" type=\"text/css\" media=\"screen\">\n    <!-- #Finish Css Plugin-->\n    <!--<script src=\"http://www.google.com/adsense/search/ads.js\" type=\"text/javascript\"></script> -->\n    <script type=\"text/javascript\" charset=\"utf-8\">\n            (function (G, o, O, g, L, e) {\n                G[g] = G[g] || function () {\n                    (G[g]['q'] = G[g]['q'] || []).push(\n       arguments)\n                }, G[g]['t'] = 1 * new Date; L = o.createElement(O), e = o.getElementsByTagName(\n       O)[0]; L.async = 1; L.src = '//www.google.com/adsense/search/async-ads.js';\n                e.parentNode.insertBefore(L, e)\n            })(window, document, 'script', '_googCsa');\n    </script>\n    <!-- Script For Mobile Base Banner-->\n        <script async=\"\" src=\"//pagead2.googlesyndication.com/pagead/js/adsbygoogle.js\"></script>\n        <script>\n            (adsbygoogle = window.adsbygoogle || []).push({\n                google_ad_client: \"ca-pub-6517686434458516\",\n                enable_page_level_ads: true\n            });\n        </script>\n    <!-- Script For Mobile Base Banner END-->\n\n\n    <script type=\"text/javascript\">\n        function AddClass(Class, Element, HasPriority) {\n            if (HasPriority == 0) {\n                this.className = 'container ' + Class;\n            }\n        }\n    </script>\n    \n<meta name=\"description\" content=\"Best Beauty Salons in Abbottabad for quality beauty care and treatments. \"><meta name=\"keywords\" content=\"beauty salons,beauty care,beauty treatments\"><style type=\"text/css\">.fancybox-margin{margin-right:17px;}</style></head>\n<body style=\"text-shadow: rgba(255, 255, 255, 0.4) 0px 1px 1px; background-color: rgb(240, 240, 240);\">\n<div class=\"wapper\">\n        <div class=\"pagecontent search_width c-no-t-margin\">\n            <div class=\"cblock ele-margin-t-b-15 m-on-mob-hide\"><a href=\"../../default.aspx\">Home</a> &gt; <a href=\"../../dynamic/categories.aspx\">Search by category</a> &gt; <a href=\"../../dynamic/categories.aspx?class_id=12\">Personal Care</a> &gt; <a href=\"../../dynamic/categories.aspx?class_id=134\">Barbers, Beauty Salons &amp; Spas</a> &gt; Beauty Salons in Abbottabad</div>\n            <div class=\"refine\">\n                <span>Refine Result</span>\n                <span>Show Result With</span>\n                <ul>\n                    <li>\n                        <input class=\"csortType csortTypeAll \" type=\"checkbox\" value=\"100\" name=\"\" checked=\"checked\" disabled=\"disabled\">\n                        <span class=\"\">All</span>\n                    </li>\n                    <li>\n                        <input class=\"csortType css-checkbox\" type=\"checkbox\" value=\"1\" name=\"\">\n                        <i class=\"icon-star-full c-icon-starfull-stroke\"></i>\n                        <span>Reviews</span>\n                    </li>\n                    <li>\n                        <input class=\"csortType\" type=\"checkbox\" value=\"2\" name=\"\">\n                        <i class=\"icon-price-tag cColor-Red\"></i>\n                        <span>Deals &amp; Coupons</span>\n                    </li>\n                     <li>\n                        <input class=\"csortType\" type=\"checkbox\" value=\"5\" name=\"\">\n                        <i class=\"icon-bullhorn\"></i>\n                        <span>Announcements</span>\n                    </li>\n                    <li>\n                        <input class=\"csortType\" type=\"checkbox\" value=\"3\" name=\"\">\n                        <i class=\"icon-location\"></i>\n                        <span>Map</span>\n                    </li>\n                    <li>\n                        <input class=\"csortType\" type=\"checkbox\" value=\"4\" name=\"\">\n                        <i class=\"icon-film\"></i>\n                        <span>Video</span>\n                    </li>\n                </ul>\n                \n                <div class=\"tab\" onclick=\"SlideTogle('Location')\">\n                    Search by location\n                </div>\n                \n                        <ul id=\"Location\" style=\"display: none;\">\n                    \n                        <li><a href=\"search.aspx?searchtype=cat&amp;class_id=2566&amp;city_id=1\">Karachi</a></li>\n                    \n                        <li><a href=\"search.aspx?searchtype=cat&amp;class_id=2566&amp;city_id=2\">Lahore</a></li>\n                    \n                        <li><a href=\"search.aspx?searchtype=cat&amp;class_id=2566&amp;city_id=56\">Islamabad</a></li>\n                    \n                        <li><a href=\"search.aspx?searchtype=cat&amp;class_id=2566&amp;city_id=79\">Rawalpindi</a></li>\n                    \n                        <li><a href=\"search.aspx?searchtype=cat&amp;class_id=2566&amp;city_id=49\">Faisalabad</a></li>\n                    \n                        <li><a href=\"search.aspx?searchtype=cat&amp;class_id=2566&amp;city_id=81\">Gujranwala</a></li>\n                    \n                        <li><a href=\"search.aspx?searchtype=cat&amp;class_id=2566&amp;city_id=78\">Peshawar</a></li>\n                    \n                        <li><a href=\"search.aspx?searchtype=cat&amp;class_id=2566&amp;city_id=82\">Sialkot</a></li>\n                    \n                        <li><a href=\"search.aspx?searchtype=cat&amp;class_id=2566&amp;city_id=53\">Sargodha</a></li>\n                    \n                        </ul>\n                    \n                <div class=\"tab\" onclick=\"SlideTogle('Category')\">\n                    Search by category\n                </div>\n                \n                        <ul id=\"Category\" style=\"display: none;\">\n                    \n                        <li><a href=\"search.aspx?searchtype=cat&amp;class_id=2571\">Hairstylists</a></li>\n                    \n                        <li><a href=\"search.aspx?searchtype=cat&amp;class_id=2575\">Hair Removal, Wax, Threading Body &amp; Face</a></li>\n                    \n                        <li><a href=\"search.aspx?searchtype=cat&amp;class_id=2584\">Manicuring</a></li>\n                    \n                        <li><a href=\"search.aspx?searchtype=cat&amp;class_id=2574\">Nail Salons &amp; Services</a></li>\n                    \n                        <li><a href=\"search.aspx?searchtype=cat&amp;class_id=2572\">Spas-Beauty, Health And Destination</a></li>\n                    \n                        <li><a href=\"search.aspx?searchtype=cat&amp;class_id=2564\">Beauty Institutes</a></li>\n                    \n                        <li><a href=\"search.aspx?searchtype=cat&amp;class_id=2569\">Estheticians</a></li>\n                    \n                        </ul>\n            </div>\n            <div id=\"cResultMainControl\">\n                <div class=\"result_hldr\" id=\"cResultContainer\">\n                    <div class=\"h1\"><h1>Beauty Salons in Abbottabad.</h1></div>\n                    <div class=\"h1 page_desc cfont-12 cNo-Margin ele-pad-r-l-20 m-on-mob-hide\"><p class=\"cNo-Margin margin-t m-ele-top-no-margin \" style=\"line-height:18px;\">Best Beauty Salons in Abbottabad for quality beauty care and treatments, <a href=\"http://www.phonebook.com.pk/dynamic/search.aspx?SearchType=kl&amp;k=bridal+makeup\" title=\"Bridal Makeup\" target=\"_blank\">bridal makeup</a>, <a href=\"http://www.phonebook.com.pk/dynamic/search.aspx?SearchType=kl&amp;k=body+massage\" title=\"Body Massage\" target=\"_blank\">body massage</a>.</p></div>\n                    <div class=\"cMobileHidden col-md-12 col-xs-12 text-center overflow-visible cheight-25 margin-t\" style=\"background-color: rgb(240, 240, 240);\">\n                        <script async=\"\" src=\"//pagead2.googlesyndication.com/pagead/js/adsbygoogle.js\"></script>\n                        <!-- New Line Link Ad -->\n                        <ins class=\"adsbygoogle\" style=\"display:inline-block;width:468px;height:15px;background-color: rgb(240, 240, 240);\" data-ad-client=\"ca-pub-6517686434458516\" data-ad-slot=\"4522680219\"></ins>\n                        <script>\n                            (adsbygoogle = window.adsbygoogle || []).push({});\n                        </script>\n                    </div>\n                    <div id=\"cAlpNav\" class=\"margin-t-10 cAlpNav m-on-mob-hide\">\n                    <div class=\"text-center\"><a href=\"search.aspx?searchtype=cat&amp;class_id=2566&amp;city_id=55\">all</a><a href=\"search.aspx?searchtype=cat&amp;class_id=2566&amp;city_id=55&amp;alp=a\">a</a><a href=\"search.aspx?searchtype=cat&amp;class_id=2566&amp;city_id=55&amp;alp=b\">b</a><a href=\"search.aspx?searchtype=cat&amp;class_id=2566&amp;city_id=55&amp;alp=c\">c</a><a href=\"search.aspx?searchtype=cat&amp;class_id=2566&amp;city_id=55&amp;alp=d\">d</a><a href=\"search.aspx?searchtype=cat&amp;class_id=2566&amp;city_id=55&amp;alp=e\">e</a><a href=\"search.aspx?searchtype=cat&amp;class_id=2566&amp;city_id=55&amp;alp=f\">f</a><a href=\"search.aspx?searchtype=cat&amp;class_id=2566&amp;city_id=55&amp;alp=g\">g</a><a href=\"search.aspx?searchtype=cat&amp;class_id=2566&amp;city_id=55&amp;alp=h\">h</a><a href=\"search.aspx?searchtype=cat&amp;class_id=2566&amp;city_id=55&amp;alp=i\">i</a><a href=\"search.aspx?searchtype=cat&amp;class_id=2566&amp;city_id=55&amp;alp=j\">j</a><a href=\"search.aspx?searchtype=cat&amp;class_id=2566&amp;city_id=55&amp;alp=k\">k</a><a href=\"search.aspx?searchtype=cat&amp;class_id=2566&amp;city_id=55&amp;alp=l\">l</a><a href=\"search.aspx?searchtype=cat&amp;class_id=2566&amp;city_id=55&amp;alp=m\">m</a><a href=\"search.aspx?searchtype=cat&amp;class_id=2566&amp;city_id=55&amp;alp=n\">n</a><a href=\"search.aspx?searchtype=cat&amp;class_id=2566&amp;city_id=55&amp;alp=o\">o</a><a href=\"search.aspx?searchtype=cat&amp;class_id=2566&amp;city_id=55&amp;alp=p\">p</a><a href=\"search.aspx?searchtype=cat&amp;class_id=2566&amp;city_id=55&amp;alp=q\">q</a><a href=\"search.aspx?searchtype=cat&amp;class_id=2566&amp;city_id=55&amp;alp=r\">r</a><a href=\"search.aspx?searchtype=cat&amp;class_id=2566&amp;city_id=55&amp;alp=s\">s</a><a href=\"search.aspx?searchtype=cat&amp;class_id=2566&amp;city_id=55&amp;alp=t\">t</a><a href=\"search.aspx?searchtype=cat&amp;class_id=2566&amp;city_id=55&amp;alp=u\">u</a><a href=\"search.aspx?searchtype=cat&amp;class_id=2566&amp;city_id=55&amp;alp=v\">v</a><a href=\"search.aspx?searchtype=cat&amp;class_id=2566&amp;city_id=55&amp;alp=w\">w</a><a href=\"search.aspx?searchtype=cat&amp;class_id=2566&amp;city_id=55&amp;alp=x\">x</a><a href=\"search.aspx?searchtype=cat&amp;class_id=2566&amp;city_id=55&amp;alp=y\">y</a><a href=\"search.aspx?searchtype=cat&amp;class_id=2566&amp;city_id=55&amp;alp=z\">z</a></div></div>\n                    <div>\n                        <div id=\"cListingHldr\" class=\"listing\">\n                        \n<div class=\"container\">\n    <div class=\"comp_info\">\n        <h2><a href=\"../../company/51529-Beena-Beauty-Parlour\">Beena's Beauty Parlour</a></h2>\n        <!--<img class=\"margin-t\" alt=\"Comapny Rating\" src=\"../../images/Stars>.png\" />-->\n        <i class=\"cfont-12 cnoPad left icon-zero-star\"></i>\n        \n            <span class=\"blue margin-t\">(No Review)</span>\n        \n                <span class=\"cfontBold margin-t cColor-Black cColor-SilverDark\">\n                Main Mansehra Road, Near Radio Pakistan, Abbottabad.\n            </span>\n        \n        <div class=\"inline-block  cMobile-Right\">\n            <ul class=\"margin-t cMobile-Text-Align-Right\">\n                <li>\n                    <a data-fancybox-type=\"iframe\" href=\"../../dynamic/emailtocustomer.aspx?Request_ID=26207&amp;comp_name=Beena-Beauty-Parlour&amp;isAdvertizer=0\" class=\"other_links fancybox\">Email</a>\n                </li>\n                 <li>\n                    <a title=\"Call Now\" href=\"tel:+92-992-335556\" class=\"c_circle cMobileShow\"></a>\n                </li>\n                <li>\n                    <a class=\"other_links\" href=\"../../company/51529-Beena-Beauty-Parlour\" title=\"Company Detail\">Detail</a>\n                </li>\n                 \n             </ul>\n        </div>\n    </div>\n    <div class=\"comp_info contact_info\">\n        <strong><a class=\"tel\" href=\"tel:+92-992-335556\">+92-992-335556</a></strong>\n        \n    </div>\n</div>\n<div class=\"container\">\n    <div class=\"comp_info\">\n        <h2><a href=\"../../company/86977-Unique-Beauty-Salon\">Unique Beauty Salon</a></h2>\n        <!--<img class=\"margin-t\" alt=\"Comapny Rating\" src=\"../../images/Stars>.png\" />-->\n        <i class=\"cfont-12 cnoPad left icon-zero-star\"></i>\n        \n            <span class=\"blue margin-t\">(No Review)</span>\n        \n                <span class=\"cfontBold margin-t cColor-Black cColor-SilverDark\">\n                Palki Wedding Hall, Mandian , Abbottabad.\n            </span>\n        \n        <div class=\"inline-block  cMobile-Right\">\n            <ul class=\"margin-t cMobile-Text-Align-Right\">\n                <li>\n                    <a data-fancybox-type=\"iframe\" href=\"../../dynamic/emailtocustomer.aspx?Request_ID=61717&amp;comp_name=Unique-Beauty-Salon&amp;isAdvertizer=0\" class=\"other_links fancybox\">Email</a>\n                </li>\n                 <li>\n                    <a title=\"Call Now\" href=\"tel:+92-313-5856739\" class=\"c_circle cMobileShow\"></a>\n                </li>\n                <li>\n                    <a class=\"other_links\" href=\"../../company/86977-Unique-Beauty-Salon\" title=\"Company Detail\">Detail</a>\n                </li>\n                 \n             </ul>\n        </div>\n    </div>\n    <div class=\"comp_info contact_info\">\n        <strong><a class=\"tel\" href=\"tel:+92-313-5856739\">+92-313-5856739</a></strong>\n        \n    </div>\n</div></div>\n                        <div id=\"cRecoredInfo\" class=\"listing dotted\">Displaying listings from 1 to 10 of 10</div>\n                        <div class=\"text-center m-pad-l-r-10\">\n                            <div id=\"related-suggestions\" class=\"listing inline-block text-center cPad-b-t-10\"><span class=\"left cfont-14\"><b>Related Searches:</b></span> <div class=\"newsssss left inline\" style=\"font-style: italic;font-weight:bold;\"><a href=\"search.aspx?searchtype=cat&amp;class_id=2584\" class=\"left ele-pad-r-l-20 text-underline cfont-14\">Manicuring</a></div><div class=\"newsssss left inline\" style=\"font-style: italic;font-weight:bold;\"><a href=\"search.aspx?searchtype=cat&amp;class_id=2575\" class=\"left ele-pad-r-l-20 text-underline cfont-14\">Hair Removal, Wax, Threading Body &amp; Face</a></div><div class=\"newsssss left inline\" style=\"font-style: italic;font-weight:bold;\"><a href=\"search.aspx?searchtype=cat&amp;class_id=2571\" class=\"left ele-pad-r-l-20 text-underline cfont-14\">Hairstylists</a></div>\n                                <div class=\"text-left ele-margin-t-b-15 left inline\"><b>Need help with your search?</b> Browse by:<a class=\"text-left ele-pad-r-l-20 text-underline\" onclick=\"hide_show('#related-locations',this);$('#related-categories').addClass('hide');\" href=\"javascript:void(0)\">other locations <img alt=\"\" class=\"margin-l\" width=\"18\" src=\"../../images/plus.png\"></a><a class=\"text-left ele-pad-r-l-20 text-underline\" onclick=\"hide_show('#related-categories',this);$('#related-locations').addClass('hide');\" href=\"javascript:void(0)\">similar categories <img alt=\"\" class=\"margin-l\" width=\"18\" src=\"../../images/plus.png\"></a></div><ul id=\"related-locations\" class=\"col-xs-12 col-sm-12 sugesstion-box hide\">\n                                <li class=\"left cblock margin-l col-xs-3 col-sm-2\"><a href=\"search.aspx?searchtype=cat&amp;class_id=2566&amp;city_id=1\" class=\"left\">Karachi</a></li><li class=\"left cblock margin-l col-xs-3 col-sm-2\"><a href=\"search.aspx?searchtype=cat&amp;class_id=2566&amp;city_id=2\" class=\"left\">Lahore</a></li><li class=\"left cblock margin-l col-xs-3 col-sm-2\"><a href=\"search.aspx?searchtype=cat&amp;class_id=2566&amp;city_id=56\" class=\"left\">Islamabad</a></li><li class=\"left cblock margin-l col-xs-3 col-sm-2\"><a href=\"search.aspx?searchtype=cat&amp;class_id=2566&amp;city_id=79\" class=\"left\">Rawalpindi</a></li><li class=\"left cblock margin-l col-xs-3 col-sm-2\"><a href=\"search.aspx?searchtype=cat&amp;class_id=2566&amp;city_id=49\" class=\"left\">Faisalabad</a></li><li class=\"left cblock margin-l col-xs-3 col-sm-2\"><a href=\"search.aspx?searchtype=cat&amp;class_id=2566&amp;city_id=81\" class=\"left\">Gujranwala</a></li><li class=\"left cblock margin-l col-xs-3 col-sm-2\"><a href=\"search.aspx?searchtype=cat&amp;class_id=2566&amp;city_id=78\" class=\"left\">Peshawar</a></li><li class=\"left cblock margin-l col-xs-3 col-sm-2\"><a href=\"search.aspx?searchtype=cat&amp;class_id=2566&amp;city_id=82\" class=\"left\">Sialkot</a></li><li class=\"left cblock margin-l col-xs-3 col-sm-2\"><a href=\"search.aspx?searchtype=cat&amp;class_id=2566&amp;city_id=53\" class=\"left\">Sargodha</a></li></ul>\n                                <ul id=\"related-categories\" class=\"col-xs-12 col-sm-12 sugesstion-box hide\">\n                                <li class=\"left cblock margin-l col-xs-4 col-sm-4 text-left\"><a href=\"search.aspx?searchtype=cat&amp;class_id=2574\" class=\"left\">Nail Salons &amp; Services</a></li><li class=\"left cblock margin-l col-xs-4 col-sm-4 text-left\"><a href=\"search.aspx?searchtype=cat&amp;class_id=2572\" class=\"left\">Spas-Beauty, Health And Destination</a></li><li class=\"left cblock margin-l col-xs-4 col-sm-4 text-left\"><a href=\"search.aspx?searchtype=cat&amp;class_id=2564\" class=\"left\">Beauty Institutes</a></li><li class=\"left cblock margin-l col-xs-4 col-sm-4 text-left\"><a href=\"search.aspx?searchtype=cat&amp;class_id=2569\" class=\"left\">Estheticians</a></li></ul>\n                            </div>\n                        </div>\n                        <div class=\"text-center\">\n                        </div>\n                    </div>\n                </div>\n            </div>\n        </div>\n    </div>\n    \n<div class=\"container-fluid bg-silver m-on-mob-hide\">\n    <div class=\"row cPad-b-t-10\" style=\"border-bottom:1px solid #ECECEC;\">\n            \n    </div>\n</div>\n<script>\n    (function (i, s, o, g, r, a, m) {\n        i['GoogleAnalyticsObject'] = r; i[r] = i[r] || function () {\n            (i[r].q = i[r].q || []).push(arguments)\n        }, i[r].l = 1 * new Date(); a = s.createElement(o),\n  m = s.getElementsByTagName(o)[0]; a.async = 1; a.src = g; m.parentNode.insertBefore(a, m)\n    })(window, document, 'script', '//www.google-analytics.com/analytics.js', 'ga');\n\n    ga('create', 'UA-2028280-1', 'auto');\n    ga('send', 'pageview');\n</script>\n<script type=\"text/javascript\" src=\"../css_responsive/script/global_functions.js\"></script>\n<script type=\"text/javascript\" src=\"../styles/scripts/fancybox/jquery.fancybox.js?v=2.1.5\"></script>\n<script type=\"text/javascript\" src=\"../css_responsive/bootstrap-3.3.4-dist/js/bootstrap.js\"></script>\n</body></html>"

soup = BeautifulSoup(html, "lxml")

for allLinks in soup.find_all(href=True):
    if allLinks['href'] and not allLinks['href'].startswith("http") and not allLinks['href'].startswith("jav"):
        print (allLinks['href'])

for allLinks in soup.find_all(src=True):
    if allLinks['src'] and not allLinks['src'].startswith("http") and not allLinks['src'].startswith("jav"):
        print (allLinks['src'])

这段代码会在控制台打印所有链接,我可以使用 if-elif-else 来区分 "../../"、"../"、"/" 和 "//" 并将它们成功地转换为绝对路径。但问题是,当我尝试使用 "re.sub" 替换它们时,整个 HTML 再次混乱了。我使用 BS4 而不是正则表达式,但问题仍然存在。由于字符计数的限制,我无法在此处发布输出,但出于知识的考虑,它还会破坏 "" 或任何其他 HTML 标签。请建议我任何一种方法来更改这些链接并将它们放回它们应该在的位置。注意:根据 akashkarothiya's 的建议,代码已经被最小化了。

我想我必须回答自己的问题。 :) - Mansoor Akram
你能否发布一些混乱的输出示例,以便我可以找出问题所在? - akash karothiya
@akashkarothiya,目前我按照您之前的回答解决了我的问题。我将在几分钟内发布答案。谢谢。 - Mansoor Akram
1
干得好,冠军! - akash karothiya
@akashkarothiya 看看我下面的代码。我还没有接受它,因为我确定它不是 Pythonic 或完美的。 - Mansoor Akram
@akashkarothiya 我已经成功完成了脚本,并将其发布为答案。 :| - Mansoor Akram
2个回答

1

感谢 Akash Karothiya提供的解决方案,最终代码已经完美运行。

该代码可以将任何给定HTML代码中的各种相对链接更改为绝对链接。

import os, re
from bs4 import BeautifulSoup
from urllib.parse import urlparse, unquote

unquoteURL = unquote("http://webpy_server/?link=http%3A//www.example.com/dynamic/search.aspx%3Fsearchtype%3Dcat%26class_id%3D4520%26page%3D1")

path = urlparse(urlparse(unquoteURL).query.replace("link=", ""))
lpath = os.path.dirname(os.path.abspath(path.path))

html = u"\n<!DOCTYPE html class=\"\"><head id=\"pageHead\"><title>\n    Yarn Manufacturers &amp; Suppliers | Listings @ Phonebook Online\n</title>\n    <!--\n    <meta http-equiv=\"Cache-Control\" content=\"no-cache, no-store, must-revalidate\" /><meta http-equiv=\"Pragma\" content=\"no-cache\" /><meta http-equiv=\"Expires\" content=\"0\" />\n    -->\n    <meta name=\"viewport\" content=\"width=device-width, initial-scale=1\"><link rel=\"stylesheet\" href=\"../css_responsive/category.css\" type=\"text/css\" media=\"screen\">\n    <script async=\"\" src=\"//www.google-analytics.com/analytics.js\"></script><script async=\"\" src=\"//www.google.com/adsense/search/async-ads.js\"></script><script type=\"text/javascript\" src=\"../styles/scripts/jquery-1.9.1.min.js\"></script>\n    <link rel=\"shortcut icon\" type=\"image/png\" href=\"/PhoneBook.ico\">\n    <!-- #Begin Css Plugin -->\n    <link rel=\"stylesheet\" href=\"../css_responsive/fontsss.css\"><link rel=\"stylesheet\" href=\"../css_responsive/bootstrap-3.3.4-dist/css/bootstrap.css\" type=\"text/css\" media=\"screen\"><link rel=\"stylesheet\" href=\"../styles/scripts/fancybox/jquery.fancybox.css\" type=\"text/css\" media=\"screen\"><link rel=\"stylesheet\" href=\"../css_responsive/icon-detail.css\" type=\"text/css\" media=\"screen\">\n    <!-- #Finish Css Plugin-->\n    <!--<script src=\"http://www.google.com/adsense/search/ads.js\" type=\"text/javascript\"></script> -->\n    <script type=\"text/javascript\" charset=\"utf-8\">\n            (function (G, o, O, g, L, e) {\n                G[g] = G[g] || function () {\n                    (G[g]['q'] = G[g]['q'] || []).push(\n       arguments)\n                }, G[g]['t'] = 1 * new Date; L = o.createElement(O), e = o.getElementsByTagName(\n       O)[0]; L.async = 1; L.src = '//www.google.com/adsense/search/async-ads.js';\n                e.parentNode.insertBefore(L, e)\n            })(window, document, 'script', '_googCsa');\n    </script>\n    <!-- Script For Mobile Base Banner-->\n        <script async=\"\" src=\"//pagead2.googlesyndication.com/pagead/js/adsbygoogle.js\"></script>\n        <script>\n            (adsbygoogle = window.adsbygoogle || []).push({\n                google_ad_client: \"ca-pub-6517686434458516\",\n                enable_page_level_ads: true\n            });\n        </script>\n    <!-- Script For Mobile Base Banner END-->\n\n\n    <script type=\"text/javascript\">\n        function AddClass(Class, Element, HasPriority) {\n            if (HasPriority == 0) {\n                this.className = 'container ' + Class;\n            }\n        }\n    </script>\n    \n<meta name=\"description\" content=\"Online Directory of Yarn Manufacturers &amp; Suppliers in Pakistan, providing list of names, contact numbers, addresses and reviews.\"><meta name=\"keywords\" content=\"Yarn Manufacturers &amp; Suppliers\"><style type=\"text/css\">.fancybox-margin{margin-right:17px;}</style></head>\n<body style=\"text-shadow: rgba(255, 255, 255, 0.4) 0px 1px 1px; background-color: rgb(240, 240, 240);\">\n    <!--Top Nav Bar Start -->\n    \n<div class=\"wapper bg-h\">\n    <div class=\"container-fluid\">\n        <div class=\"col-xs-12 col-md-12\">\n            <div class=\"col-xs-12 col-md-12\">\n                <div class=\"ele-block text-right ele-color-white ele-pad-t-5 m-text-center cMobileTextCenter cfont-12\" style=\"padding-top:5px;\">\n                    <a class=\"\" href=\"../dynamic/free-basic-listing.aspx\"> Free basic listing</a> \n                    | \n                    <a class=\"\" href=\"/advertisement-center/\"> Advertise with us</a>\n                </div>\n            </div>\n        </div>\n    </div>\n    <div class=\"header\">\n        <div class=\"logo\">\n            <div class=\"cMobileHidden left cPad-b-t-25\">\n                <img alt=\"Slider\" height=\"26\" class=\"left\" src=\"../../images/list-icon-slvr.png\" onclick=\"DefaultSliderMenu()\" style=\"cursor:pointer;\">\n            </div>\n            <div class=\"cDesktopHidden cMobileShow\">\n                <img alt=\"Slider\" height=\"26\" class=\"ele-float-left\" src=\"../../images/list-icon-slvr.png\" onclick=\"SlideMenu()\" style=\"cursor:pointer;vertical-align: baseline !important; \">\n            </div>\n            <!--<span class=\"home-slide-icon icon-list2 cPad-b-t-10 cDesktopHidden\" onclick=\"SlideMenu()\"></span>-->\n            <a class=\"left ele-margin-t-b-15 cMobileFloatNone\" style=\"text-decoration:none !important\" href=\"../../\">\n                <img alt=\"Phonebook\" class=\"\" width=\"205\" src=\"../../images/final-logo2s.png\">\n            </a>\n            <div class=\"cDesktopHidden cMobileShow\">\n                <img alt=\"Slider\" width=\"38\" height=\"26\" class=\"ele-float-left\" src=\"/images/magnify-glass-2.png\" onclick=\"enableMobileSearchOption() \" style=\"cursor:pointer;vertical-align: baseline !important; \">\n            </div>\n            <!--<a href=\"../../default.aspx\"><img height=\"60\" alt=\"Phonebook\" src=\"../images/Phonebook-Online-Logo-Big-new2.png\" /></a>-->\n            <!--<h2 class=\"mColorWhite\">Your Online Search Engine</h2>-->\n        </div>\n        <div id=\"cHeader_sky_banner\" class=\"sky_banner\"><embed src=\"http://www.phonebook.com.pk/images/advertisement/swf/79042_8_160614_61864_1.swf\" pluginspage=\"http://www.adobe.com/shockwave/download/download.cgi?P1_Prod_Version=ShockwaveFlash\" width=\"700\" height=\"90\" quality=\"high\" value=\"autostart=true\" wmode=\"transparent\"></div>\n    </div>\n</div>\n<div class=\"wapper bg-h bg-fixed flow-visible m-on-mob-hide\" style=\"top: 0px;\">\n    <div class=\"header\">\n        <form method=\"POST\" action=\"../redirect.aspx?searchtype=kl\">\n            <input class=\"icon-search\" type=\"text\" name=\"keyword\" placeholder=\"What ? (Name or Keyword)\" autocomplete=\"off\" required=\"\">\n            <input class=\"icon-loc\" type=\"text\" name=\"location\" placeholder=\"Where ? (City or Area)\" autocomplete=\"off\">\n            <input class=\"submit\" type=\"submit\" value=\"Find\">\n        </form>\n    </div>\n    <i class=\"after icon-circle-up\"></i>\n</div>\n    <!--Top Nav Bar End -->\n    <div class=\"wapper\">\n        <div class=\"pagecontent search_width c-no-t-margin\">\n            <div class=\"cblock ele-margin-t-b-15 m-on-mob-hide\"><a href=\"../../default.aspx\">Home</a> &gt; <a href=\"../../dynamic/categories.aspx\">Search by category</a> &gt; <a href=\"../../dynamic/categories.aspx?class_id=19\">Industrial supplies &amp; services</a> &gt; <a href=\"../../dynamic/categories.aspx?class_id=234\">Textiles</a> &gt; Yarn Wholesale &amp; Manufacturers in Pakistan</div>\n            \n            \n            \n            <div id=\"cResultMainControl\">\n                <div class=\"result_hldr\" id=\"cResultContainer\">\n                    \n                    \n                    <div class=\"cMobileHidden col-md-12 col-xs-12 text-center overflow-visible cheight-25 margin-t\" style=\"background-color: rgb(240, 240, 240);\">\n                        <script async=\"\" src=\"//pagead2.googlesyndication.com/pagead/js/adsbygoogle.js\"></script>\n                        <!-- New Line Link Ad -->\n                        <ins class=\"adsbygoogle\" style=\"display:inline-block;width:468px;height:15px;background-color: rgb(240, 240, 240);\" data-ad-client=\"ca-pub-6517686434458516\" data-ad-slot=\"4522680219\"></ins>\n                        <script>\n                            (adsbygoogle = window.adsbygoogle || []).push({});\n                        </script>\n                    </div>\n                    <div id=\"cAlpNav\" class=\"margin-t-10 cAlpNav m-on-mob-hide\">\n                    <div class=\"text-center\"><a href=\"search.aspx?searchtype=cat&amp;class_id=4520\">all</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=a\">a</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=b\">b</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=c\">c</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=d\">d</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=e\">e</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=f\">f</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=g\">g</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=h\">h</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=i\">i</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=j\">j</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=k\">k</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=l\">l</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=m\">m</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=n\">n</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=o\">o</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=p\">p</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=q\">q</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=r\">r</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=s\">s</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=t\">t</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=u\">u</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=v\">v</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=w\">w</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=x\">x</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=y\">y</a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;alp=z\">z</a></div></div>\n                    <div>\n                        <div id=\"cListingHldr\" class=\"listing\">\n                        \n<div class=\"container\">\n    <div class=\"comp_info\">\n        <h2><a href=\"../../company/77683-A-J-Apparels-Pvt-Ltd\">A &amp; J Apparels (Pvt) Ltd.</a></h2>\n        <!--<img class=\"margin-t\" alt=\"Comapny Rating\" src=\"../../images/Stars>.png\" />-->\n        <i class=\"cfont-12 cnoPad left icon-zero-star\"></i>\n        \n            <span class=\"blue margin-t\">(No Review)</span>\n        \n                <span class=\"cfontBold margin-t cColor-Black cColor-SilverDark\">\n                LA/6-A Block  22, F. B Area, Karachi\n            </span>\n        \n        <div class=\"inline-block  cMobile-Right\">\n            <ul class=\"margin-t cMobile-Text-Align-Right\">\n                <li>\n                    <a data-fancybox-type=\"iframe\" href=\"../../dynamic/emailtocustomer.aspx?Request_ID=8127&amp;comp_name=A-J-Apparels-Pvt-Ltd&amp;isAdvertizer=0\" class=\"other_links fancybox\">Email</a>\n                </li>\n                 <li>\n                    <a title=\"Call Now\" href=\"tel:+92-21-36342521\" class=\"c_circle cMobileShow\"></a>\n                </li>\n                <li>\n                    <a class=\"other_links\" href=\"../../company/77683-A-J-Apparels-Pvt-Ltd\" title=\"Company Detail\">Detail</a>\n                </li>\n                 \n             </ul>\n        </div>\n    </div>\n    <div class=\"comp_info contact_info\">\n        <strong><a class=\"tel\" href=\"tel:+92-21-36342521\">+92-21-36342521</a></strong>\n        \n    </div>\n</div>\n\n\n\n\n\n\n\n\n</div>\n                        <div id=\"cRecoredInfo\" class=\"listing dotted\">Displaying listings from 1 to 10 of 161</div>\n                        <div class=\"text-center m-pad-l-r-10\">\n                            <div id=\"related-suggestions\" class=\"listing inline-block text-center cPad-b-t-10\"><span class=\"left cfont-14\"><b>Related Searches:</b></span> <div class=\"newsssss left inline\" style=\"font-style: italic;font-weight:bold;\"><a href=\"search.aspx?searchtype=cat&amp;class_id=1030\" class=\"left ele-pad-r-l-20 text-underline cfont-14\">Importers</a></div><div class=\"newsssss left inline\" style=\"font-style: italic;font-weight:bold;\"><a href=\"search.aspx?searchtype=cat&amp;class_id=4499\" class=\"left ele-pad-r-l-20 text-underline cfont-14\">Textiles Wholesale &amp; Manufacturers</a></div><div class=\"newsssss left inline\" style=\"font-style: italic;font-weight:bold;\"><a href=\"search.aspx?searchtype=cat&amp;class_id=1029\" class=\"left ele-pad-r-l-20 text-underline cfont-14\">Exporters</a></div>\n                                <div class=\"text-left ele-margin-t-b-15 left inline\"><b>Need help with your search?</b> Browse by:<a class=\"text-left ele-pad-r-l-20 text-underline\" onclick=\"hide_show('#related-locations',this);$('#related-categories').addClass('hide');\" href=\"javascript:void(0)\">other locations <img alt=\"\" class=\"margin-l\" width=\"18\" src=\"../../images/plus.png\"></a><a class=\"text-left ele-pad-r-l-20 text-underline\" onclick=\"hide_show('#related-categories',this);$('#related-locations').addClass('hide');\" href=\"javascript:void(0)\">similar categories <img alt=\"\" class=\"margin-l\" width=\"18\" src=\"../../images/plus.png\"></a></div><ul id=\"related-locations\" class=\"col-xs-12 col-sm-12 sugesstion-box hide\">\n                                <li class=\"left cblock margin-l col-xs-3 col-sm-2\"><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;city_id=1\" class=\"left\">Karachi</a></li><li class=\"left cblock margin-l col-xs-3 col-sm-2\"><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;city_id=2\" class=\"left\">Lahore</a></li><li class=\"left cblock margin-l col-xs-3 col-sm-2\"><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;city_id=49\" class=\"left\">Faisalabad</a></li><li class=\"left cblock margin-l col-xs-3 col-sm-2\"><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;city_id=77\" class=\"left\">Multan</a></li><li class=\"left cblock margin-l col-xs-3 col-sm-2\"><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;city_id=81\" class=\"left\">Gujranwala</a></li><li class=\"left cblock margin-l col-xs-3 col-sm-2\"><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;city_id=15\" class=\"left\">Hub</a></li><li class=\"left cblock margin-l col-xs-3 col-sm-2\"><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;city_id=79\" class=\"left\">Rawalpindi</a></li><li class=\"left cblock margin-l col-xs-3 col-sm-2\"><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;city_id=76\" class=\"left\">Hyderabad</a></li><li class=\"left cblock margin-l col-xs-3 col-sm-2\"><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;city_id=62\" class=\"left\">Muzaffar Garh</a></li><li class=\"left cblock margin-l col-xs-3 col-sm-2\"><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;city_id=60\" class=\"left\">Layyah</a></li></ul>\n                                <ul id=\"related-categories\" class=\"col-xs-12 col-sm-12 sugesstion-box hide\">\n                                <li class=\"left cblock margin-l col-xs-4 col-sm-4 text-left\"><a href=\"search.aspx?searchtype=cat&amp;class_id=4470\" class=\"left\">Knitted Fabrics</a></li><li class=\"left cblock margin-l col-xs-4 col-sm-4 text-left\"><a href=\"search.aspx?searchtype=cat&amp;class_id=4489\" class=\"left\">Synthetic &amp; Blended Fabrics Wholesale &amp; Manufacturers</a></li><li class=\"left cblock margin-l col-xs-4 col-sm-4 text-left\"><a href=\"search.aspx?searchtype=cat&amp;class_id=2391\" class=\"left\">Aprons Wholesale &amp; Manufacturers</a></li><li class=\"left cblock margin-l col-xs-4 col-sm-4 text-left\"><a href=\"search.aspx?searchtype=cat&amp;class_id=2109\" class=\"left\">Linens Wholesale &amp; Manufacturers</a></li></ul>\n                            </div>\n                        </div>\n                        <div class=\"text-center\">\n                            <div id=\"cPagination\" class=\"listing\">\n                                <img class=\"left\" alt=\"\" src=\"../../images/page-1.png\">\n                            <a id=\"ctl39_cPageUrl\" class=\"pagi_anchor\">\n    <span id=\"ctl39_cAlp\">B</span>\n    <span id=\"ctl39_cPageNo\"></span>\n </a><a href=\"search.aspx?searchtype=cat&amp;class_id=4520&amp;page=1\" id=\"ctl40_cPageUrl\" class=\"pagi_anchor\">\n    <span id=\"ctl40_cAlp\" style=\"color:red !important;;\">O</span>\n    <span id=\"ctl40_cPageNo\" style=\"color:red !important;;\">1</span>\n </a></div>\n                        </div>\n                         \n                    </div>\n                </div>\n            </div>\n            \n\n            <div class=\"srch_banner\"> \n                \n            \n                \n                \n                \n                \n            </div>\n        </div>\n    </div><div style=\"height: 0px; visibility: hidden; font-weight: normal; text-align: center;\"><iframe frameborder=\"0\" marginwidth=\"0\" marginheight=\"0\" allowtransparency=\"true\" scrolling=\"no\" width=\"100%\" name=\"{&quot;name&quot;:&quot;master-2&quot;,&quot;slave-0-2&quot;:{&quot;container&quot;:&quot;adNewNTRSearchPagecontainer2&quot;,&quot;linkTarget&quot;:&quot;_top&quot;,&quot;lines&quot;:3,&quot;colorBackground&quot;:&quot;#e0e0e0&quot;,&quot;colorBorder&quot;:&quot;#0b0b0b&quot;,&quot;fontFamily&quot;:&quot;verdana&quot;,&quot;adIconLocation&quot;:&quot;ad-left&quot;,&quot;width&quot;:&quot;300px&quot;,&quot;type&quot;:&quot;ads&quot;,&quot;hl&quot;:&quot;en&quot;,&quot;columns&quot;:1,&quot;horizontalAlignment&quot;:&quot;left&quot;,&quot;resultsPageQueryParam&quot;:&quot;query&quot;},&quot;master-2&quot;:{&quot;linkTarget&quot;:&quot;_top&quot;,&quot;lines&quot;:3,&quot;colorBackground&quot;:&quot;#e0e0e0&quot;,&quot;colorBorder&quot;:&quot;#0b0b0b&quot;,&quot;fontFamily&quot;:&quot;verdana&quot;,&quot;adIconLocation&quot;:&quot;ad-left&quot;,&quot;width&quot;:&quot;300px&quot;,&quot;type&quot;:&quot;ads&quot;,&quot;hl&quot;:&quot;en&quot;,&quot;columns&quot;:1,&quot;horizontalAlignment&quot;:&quot;left&quot;,&quot;resultsPageQueryParam&quot;:&quot;query&quot;}}\" id=\"master-2\" src=\"https://www.google.com/afs/ads?q=Yarn%20Wholesale%20%26%20Manufacturers&amp;adpage=1&amp;r=m&amp;fexp=21404%2C7000107&amp;client=pub-6517686434458516&amp;channel=3589710218&amp;hl=en&amp;type=0&amp;oe=UTF-8&amp;ie=UTF-8&amp;jsei=3&amp;format=n2&amp;ad=n2&amp;nocache=3631469793737437&amp;num=0&amp;output=uds_ads_only&amp;v=3&amp;allwcallad=1&amp;preload=true&amp;adext=as1%2Csr1%2Cctc1&amp;bsl=10&amp;u_his=3&amp;u_tz=300&amp;dt=1469793737439&amp;u_w=1366&amp;u_h=768&amp;biw=1349&amp;bih=599&amp;psw=1349&amp;psh=1589&amp;frm=0&amp;uio=uv3vp1sl1sr1cc1-wi300ff1&amp;jsv=12350&amp;rurl=http%3A%2F%2Fwww.phonebook.com.pk%2Fdynamic%2Fsearch.aspx%3Fsearchtype%3Dcat%26class_id%3D4520#master-2\" style=\"visibility: hidden; height: 0px;\"></iframe></div>\n    \n<div class=\"container-fluid bg-silver m-on-mob-hide\">\n    <div class=\"row cPad-b-t-10\" style=\"border-bottom:1px solid #ECECEC;\">\n            <!--\n            <div class=\"col-md-12 col-lg-12 col-xs-12\">\n            <img height=\"40\" alt=\"\" src=\"../images/Phonebook-Online-Logo-Big-new.png\" />\n            </div>\n            -->\n    </div>\n</div>\n<div class=\"wapper pad-top-10 footerBg bg-white m-pad-zero\">\n    <div class=\"width footer m-on-mob-hide cMobileHiddenblock \">\n        <ul class=\"list-unstyled col-sm-4 m-on-mob-hide cMobileHidden\">\n            <li class=\"\"><strong style=\"color:#37aef0;\">Popular Keywords :</strong></li>\n            <li>\n                <ul class=\"list-unstyled\">\n                    <li><a href=\"../../dynamic/search.aspx?searchtype=kl&amp;k=restaurants&amp;l=pakistan\">Restaurants</a>,</li>\n                    <li><a href=\"../../dynamic/search.aspx?searchtype=kl&amp;k=pizza&amp;l=pakistan\">Pizza</a>,</li>\n                    <li><a href=\"../../dynamic/search.aspx?searchtype=kl&amp;k=hajj+%26+umrah&amp;l=pakistan\">Hajj &amp; Umrah</a>,</li>\n                    \n                    \n                    \n                    \n                    \n                    \n                    \n                    \n                    \n                </ul>\n            </li>\n            <li class=\"margin-t\"><strong style=\"color:#37aef0;\">Popular Cities :</strong></li>\n            <li>\n                <ul class=\"list-unstyled\">\n                    <li><a href=\"../../dynamic/city_categories.aspx?city_id=1\">Karachi</a>,</li>\n                    <li><a href=\"../../dynamic/city_categories.aspx?city_id=2\">Lahore</a>,</li>\n                    \n                    \n                    \n                    \n                    \n                    \n                    \n                    \n                    \n                    <li><a href=\"../../dynamic/city_categories.aspx?city_id=75\">Sukkur</a></li>\n                </ul>\n            </li>\n        </ul>\n        <ul class=\"col-xs-6 col-sm-2 styled\">\n            <li class=\"\"><strong style=\"color:#37aef0;\">ADVERTISE :</strong></li>\n            <li><a href=\"/advertisement-center/\">Advertise with us</a></li>\n            <li><a href=\"../../dynamic/free-basic-listing.aspx\">Get a Free Listings</a></li>\n            \n        </ul>\n        <ul class=\"col-xs-6 col-sm-2 styled\">\n            <li class=\"\"><strong style=\"color:#37aef0;\">QUICK LINKS :</strong></li>\n            <li><a href=\"../../dynamic/categories.aspx\">Search by Category</a>,</li>\n            \n            \n            <li><a href=\"javascript:void(0)\">Browse by Video</a></li>\n        </ul>\n        <ul class=\"col-xs-6 col-sm-2 styled\">\n            <li class=\"\"><strong style=\"color:#37aef0;\">ABOUT US:</strong></li>\n            <li><a href=\"../../static/contact-us.aspx\">Contact Us</a></li>\n            <li><a href=\"javscript:void(0)\">Report an Error</a></li>\n            \n            \n            \n            \n        </ul>\n        <ul class=\"col-xs-6 col-sm-2 styled\">\n            <li class=\"\"><strong style=\"color:#37aef0;\">PARTNERS:</strong></li>\n            <li><a href=\"http://jang.com.pk/\">Jang Group of Newspapers</a></li>\n            \n            \n            <li><a href=\"http://www.ptcl.com.pk/\">PTCL - White Page Telephone Directory Data</a></li>\n        </ul>\n    </div>\n    <div class=\"col-xs-12 m-footer-wapper m-hidden-on-desktop\">\n        <div class=\"col-xs-3\">\n            <a title=\"Home\" href=\"/\"><img class=\"col-xs-12 cNoPad ele-pad-zero\" alt=\"Home\" src=\"../images/footer-icon-home.png\"></a>\n        </div>\n        <div class=\"col-xs-3\">\n            <a title=\"Free Basic Listing\" href=\"/dynamic/free-basic-listing.aspx\"><img class=\"col-xs-12 cNoPad ele-pad-zero\" alt=\"Home\" src=\"../images/footer-icon-free-listing.png\"></a>\n        </div>\n        <div class=\"col-xs-3\">\n            <a title=\"Contact Us\" href=\"/static/contact-us.aspx\"><img class=\"col-xs-12 cNoPad ele-pad-zero\" alt=\"Home\" src=\"../images/footer-icon-contact.png\"></a>\n        </div>\n        <div class=\"col-xs-3\">\n            <a title=\"Free Basic Listing\" href=\"/advertisement-center\"><img class=\"col-xs-12 cNoPad ele-pad-zero\" alt=\"Home\" src=\"../images/footer-icon-advertisewithus.png\"></a>\n        </div>\n    </div>\n</div>\n\n \n<script>\n    (function (i, s, o, g, r, a, m) {\n        i['GoogleAnalyticsObject'] = r; i[r] = i[r] || function () {\n            (i[r].q = i[r].q || []).push(arguments)\n        }, i[r].l = 1 * new Date(); a = s.createElement(o),\n  m = s.getElementsByTagName(o)[0]; a.async = 1; a.src = g; m.parentNode.insertBefore(a, m)\n    })(window, document, 'script', '//www.google-analytics.com/analytics.js', 'ga');\n\n    ga('create', 'UA-2028280-1', 'auto');\n    ga('send', 'pageview');\n</script>\n \n    <div class=\"modal\" id=\"cSlideMenu\" onclick=\"SlideMenu2()\">\n    </div>\n\n\n<div class=\"slideMenu cfont-12 ie-ele-none\" id=\"defaultSliderMenu\" style=\"max-height: 599px; overflow: auto;\">\n    <ul>\n        <!--\n        <li class=\"ele-pad-t-b-30\"></li>\n        -->\n        <li>\n            <a class=\"icon-circle-down\" href=\"javascript:void(0)\" onclick=\"showSubMenu(this,'.menuSearchType')\">Business search </a>\n            <ul class=\"hide menuSearchType\">\n                <li><a href=\"../../dynamic/categories.aspx\">Search by category</a></li>\n                <li><a href=\"../../dynamic/city_select.aspx\">Search by city</a></li>\n                <li><a href=\"../../searchbyphone.aspx\">Search by phone</a></li>\n                <li><a href=\"../../searchbyaddress.aspx\">Search by address</a></li>\n                <li><a href=\"../../searchbybrand.aspx\">Search by brand</a></li>\n            </ul>\n        </li>\n        <li>\n            <a class=\"icon-circle-down\" href=\"javascript:void(0)\" onclick=\"showSubMenu(this,'.menuSearchFap')\">People search</a>\n            <ul class=\"hide menuSearchFap\">\n                <li><a href=\"../../findaperson/findaperson.aspx?type=name\">Search by name</a></li>\n                <li><a href=\"../../findaperson/findaperson.aspx?type=number\">Search by number</a></li>\n            </ul>\n        </li>\n        <li>\n            <a class=\"icon-circle-down\" href=\"javascript:void(0)\" onclick=\"showSubMenu(this,'.menuGuides')\">Specialized Guides</a>\n            <ul class=\"hide menuGuides\">\n                <li><a href=\"../../dynamic/search.aspx?searchtype=cat&amp;class_id=4710\">Development Sector &amp; NGOs</a></li>\n                <li><a href=\"../../dynamic/search.aspx?searchtype=cat&amp;class_id=863\">Associations &amp; Trade Bodies</a></li>\n                <li><a href=\"../../dynamic/search.aspx?searchtype=cat&amp;class_id=864\">Chambers of Commerce</a></li>\n                <li><a href=\"../../dynamic/search.aspx?SearchType=cat&amp;class_id=1514\">Embassies &amp; Foreign Missions</a></li>\n                <li><a href=\"../../dynamic/categories.aspx?class_Id=65\">Import &amp; Export</a></li>\n                <li><a href=\"../../dynamic/search.aspx?SearchType=cat&amp;class_id=1517\">Federal Government</a></li>\n                <li><a href=\"../../dynamic/categories.aspx?class_id=4638\">Emergency &amp; Complain</a></li>\n                <li><a href=\"../../static/nwdcode.aspx\">NWD Codes</a></li>\n            </ul>\n        </li>\n        <li><a href=\"/advertisement-center/\">Advertise with us</a></li>\n        <li><a href=\"javascript:void(0)\">Help</a></li>\n    </ul>\n</div>\n<div class=\"modal in\" id=\"cSlideMenu\" onclick=\"SlideMenu2()\" aria-hidden=\"false\" style=\"display:none; padding-right: 17px;\">\n</div>\n\n  \n \n<script type=\"text/javascript\" src=\"../css_responsive/script/global_functions.js\"></script>\n<script type=\"text/javascript\" src=\"../styles/scripts/fancybox/jquery.fancybox.js?v=2.1.5\"></script>\n<script type=\"text/javascript\" src=\"../css_responsive/bootstrap-3.3.4-dist/js/bootstrap.js\"></script>\n\n\n</body></html>"

soup = BeautifulSoup(html, "lxml")

all = soup.find_all(href=True)
for i in all:
    try:
        output = re.sub(r'(?is)(href="../../)([^.])', 'href="' + path.scheme + '://' + os.path.normpath(path.netloc) + '/'+r'\2', str(html))
    except:
        output = i

html = output

for i in all:
    try:
        output = re.sub(r'(?is)(href="../)([^.])', 'href="' + path.scheme + '://' + os.path.normpath(path.netloc) + '/'+r'\2', str(html))
    except:
        output = i

html = output

for i in all:
    try:
        output = re.sub(r'(?is)(href="/)([^./])', 'href="' + path.scheme + "://" + path.netloc + '/'+r'\2', str(html))
    except:
        output = i

html = output


for i in all:
    try:
        output = re.sub(r'(?is)(href=")([^.|jav|ht|//|/|../|../../])', 'href="' + path.scheme + '://' + path.netloc + lpath+r'\2', str(html))
    except:
        output = i

html = output

all = soup.find_all(src=True)
for i in all:
    try:
        output = re.sub(r'(?is)(src="../)([^.])', 'src="' + path.scheme + '://' + os.path.normpath(path.netloc) + '/'+r'\2', str(html))
    except:
        output = i

html = output

for i in all:
    try:
        output = re.sub(r'(?is)(src="/)([^./])', 'src="' + path.scheme + "://" + path.netloc + '/'+r'\2', str(html))
    except:
        output = i

html = output

for i in all:
    try:
        output = re.sub(r'(?is)(src="../../)([^.])', 'src="' + path.scheme + '://' + os.path.normpath(path.netloc) + '/'+r'\2', str(html))
    except:
        output = i

html = output

all = soup.find_all(action=True)
for i in all:
    try:
        output = re.sub(r'(?is)(action="../)([^.])', 'action="' + path.scheme + '://' + os.path.normpath(path.netloc) + '/'+r'\2', str(html))
    except:
        output = i

html = output

print (html)

1

我发现使用re.sub更简单的解决方案,因为它可以接受函数作为替换参数。

import re
from urllib.parse import urljoin

abs_url = "https://sample.com/sample-page.html"
my_html = """
    <div class="sample-class">
        <a href="../new-page.html">New page</a>
        <img src="../sample-image.jpg" alt="">
        <img src="../sample-image2.jpg" alt="">
    </div>"""

# "src"
absolutize = lambda m: ' src="' + urljoin(abs_url, m.group(1)) + '"'
my_html = re.sub(r' src="([^"]+)"', absolutize, my_html)
# "href"
absolutize2 = lambda m: ' href="' + urljoin(abs_url, m.group(1)) + '"'
my_html = re.sub(r' href="([^"]+)"', absolutize2, my_html)

# my_html
    """
    <div class="sample-class">
        <a href="https://sample.com/new-page.html">New page</a>
        <img src="https://sample.com/sample-image.jpg" alt="">
        <img src="https://sample.com/sample-image2.jpg" alt="">
    </div>"""

尚未测试多级相对URL(例如src="../../hello.jpg"),但应该可以正常工作。

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接