...原來 BeautifulSoup
有提供我想要的效果
from bs4 import BeautifulSoup, Tag
class CustomTag(Tag):
def strict_select_one(...):
...
class CustomBeautifulSoup(BeautifulSoup, CustomTag):
...
# element_classes: A dictionary mapping BeautifulSoup classes like Tag and NavigableString, to other classes you'd like to be instantiated instead as the parse tree is built. This is useful for subclassing Tag or NavigableString to modify default behavior.
soup = CustomBeautifulSoup(..., element_classes={ Tag: CustomTag })
抱歉,我以為只要將 Tag
的實例變為 CustomTag
的實例就好了。這是我的嘗試
from typing import Any
from bs4 import BeautifulSoup, Tag
import inspect
# first one is `self`
_bs4_tag_init_params = list(inspect.signature(Tag.__init__).parameters)[1:]
class CustomTag(Tag):
@classmethod
def _create_from_bs4_tag(cls, tag: Tag):
custom_tag = CustomTag(**{param: getattr(tag, param) for param in _bs4_tag_init_params})
custom_tag.extend(tag.children)
return custom_tag
def strict_select_one(self, selector: str, **kwargs: Any):
if element := super().select_one(selector, **kwargs):
return self._create_from_bs4_tag(element)
raise TypeError(f"Element not found: '{selector}'")
class CustomBeautifulSoup(BeautifulSoup, CustomTag):
...
soup = CustomBeautifulSoup("<h1><a href='1.html'>Hello World</a></h1>", "lxml")
print(soup.strict_select_one("h1").strict_select_one("a"))
以上是能運..行...但在使用 __eq__, __contains__, Tag().decompose()
等等的時候都是失敗的
我想從 Tag
擴充一些方法
from typing import Any
from bs4 import BeautifulSoup, Tag
class CustomTag(Tag):
def strict_select_one(self, selector: str, **kwargs: Any):
element: Tag | None = super().select_one(selector, **kwargs)
# 返回 `element` 或 raise error
class CustomBeautifulSoup(BeautifulSoup, CustomTag):
...
soup = CustomBeautifulSoup("<h1>123</h1>", "lxml")
因為我想用方法鏈 soup.strict_select_one(...).strict_select_one(...)
但我不知道怎麼將 Tag
的實例變為 CustomTag
的實例
(雖然直接操作 None
(select_one(...).text
) 的時候會引起錯誤,但主要是想學習一下)
或者除了繼承外有其他解決辦法嗎?
class CustomTag(Tag):
def __init__(self, testprop, *args, **kwargs):
super().__init__(*args, **kwargs)
self.testprop = testprop
t1 = CustomTag(name="b", testprop="a")
t2 = CustomTag(name="b", testprop="a")
t1.append(t2)
print(type(t1.select_one("b")))
這樣就行了?