response ='''
<a class="screen-reader-text skip-link" href="#content" title="Skip to content">Skip to content</a> <div class="top-bar grid-container top-bar-align-right">
<div class="inside-top-bar">
<aside id="block-10" class="widget inner-padding widget_block widget_text">
<p class="has-small-font-size"></p>
</aside><aside id="block-19" class="widget inner-padding widget_block">
<div class="wp-block-group is-layout-flow"><div class="wp-block-group__inner-container">
<div class="wp-block-columns is-layout-flex wp-container-2">
<div class="wp-block-column is-layout-flow" style="flex-basis:100%">
<p><a href="https://thepornlinks.com/" title="ThePornLinks.com" target="_blank" data-wpel-link="external" rel="external noopener noreferrer">ThePornLinks.com</a></p>
<meta content="160728" name="xubstercom">
</div>
</div>
</div></div>
</aside> </div>
</div>
<header class="site-header" id="masthead" aria-label="Site" itemtype="https://schema.org/WPHeader" itemscope>
<div class="inside-header grid-container">
<div class="site-branding">
<p class="main-title" itemprop="headline">
<a href="https://pornchil.com/" rel="home noopener noreferrer" data-wpel-link="internal" target="_blank">
PornChil
</a>
'''
selector = parsel.Selector(text=response)
# print(selector.get())
# exit()
#用屬性把連結撈出來
link = selector.css('.wp-block-columns.is-layout-flex.wp-container-2').getall()
print(link)
'''
這個P標籤沒有class屬性
為什麼會被提取出來
<p><a href="https://thepornlinks.com/" title="ThePornLinks.com" target="_blank" data-wpel-link="external" rel="external noopener noreferrer">ThePornLinks.com</a></p>
Dears,
因為你的程式語法link = selector.css('.wp-block-columns.is-layout-flex.wp-container-2').getall()
是查找符合這個CSS條件底下的所有資料,然後<p>
標籤的階層在<div>
標籤底下,所以才會得到這個結果。
以下幫你把階層關係整理一下。
CODE:
<div class="wp-block-columns is-layout-flex wp-container-2">
<div class="wp-block-column is-layout-flow" style="flex-basis:100%">
<p>
<a href="https://thepornlinks.com/" title="ThePornLinks.com" target="_blank" data-wpel-link="external" rel="external noopener noreferrer">ThePornLinks.com</a>
</p>
<meta content="160728" name="xubstercom">
</div>
</div>