在上一篇我們抽出了一個 Base 的抽象類別,今天我們換一個方式重構
原本 Home 及 Base 的程式如下
<?php
namespace Recca0120\Ithome30\Crawlers;
use GuzzleHttp\Psr7\Request;
use Psr\Http\Client\ClientInterface;
abstract class Base
{
protected string $baseUrl = 'https://www.ptt.cc';
private array $headers = [
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
'Accept-Encoding' => 'gzip, deflate, br',
'Accept-Language' => 'zh-TW,zh;q=0.8',
'Cache-Control' => 'max-age=0',
'Cookie' => 'over18=1',
'Referer' => 'https://www.ptt.cc/bbs/Gossiping/index.html',
'Sec-Ch-Ua' => '"Brave";v="117", "Not;A=Brand";v="8", "Chromium";v="117"',
'Sec-Ch-Ua-Mobile' => '?0',
'Sec-Ch-Ua-Platform' => '"macOS"',
'Sec-Fetch-Dest' => 'document',
'Sec-Fetch-Mode' => 'navigate',
'Sec-Fetch-Site' => 'same-origin',
'Sec-Fetch-User' => '?1',
'Sec-Gpc' => '1',
'Upgrade-Insecure-Requests' => '1',
'User-Agent' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36',
];
public function __construct(protected ClientInterface $httpClient)
{
}
protected function sendRequest($url)
{
$request = new Request('GET', $url, $this->headers);
$response = $this->httpClient->sendRequest($request);
$html = (string)$response->getBody();
return $html;
}
}
<?php
// src/Crawlers/Home.php
namespace Recca0120\Ithome30\Crawlers;
class Home extends Base
{
public function all()
{
$html = $this->sendRequest($this->baseUrl . '/bbs/hotboards.html');
return array_map(
fn (string $row) => $this->parseCols($row),
$this->parseRows($html)
);
}
private function parseCols($row)
{
preg_match('/href="(?<url>.+)"/', $row, $matched);
preg_match_all('/"board-(?<name>\w+)">(?<value>.+?)<\/div>/', $row, $matches);
$cols = ['url' => $this->baseUrl . $matched['url']];
foreach (array_keys($matches[0]) as $index) {
$name = $matches['name'][$index];
$value = $matches['value'][$index];
$cols[$name] = str_replace('◎', '', strip_tags($value));
}
return $cols;
}
private function parseRows($html)
{
preg_match_all('/<a\sclass="board"[^>]*>.+?<\/a>/s', $html, $matches);
return $matches[0];
}
}
我們快速的重構成
<?php
namespace Recca0120\Ithome30;
use GuzzleHttp\Psr7\Request;
use Psr\Http\Client\ClientInterface;
class Client
{
protected string $baseUrl = 'https://www.ptt.cc';
private array $headers = [
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
'Accept-Encoding' => 'gzip, deflate, br',
'Accept-Language' => 'zh-TW,zh;q=0.8',
'Cache-Control' => 'max-age=0',
'Cookie' => 'over18=1',
'Referer' => 'https://www.ptt.cc/bbs/Gossiping/index.html',
'Sec-Ch-Ua' => '"Brave";v="117", "Not;A=Brand";v="8", "Chromium";v="117"',
'Sec-Ch-Ua-Mobile' => '?0',
'Sec-Ch-Ua-Platform' => '"macOS"',
'Sec-Fetch-Dest' => 'document',
'Sec-Fetch-Mode' => 'navigate',
'Sec-Fetch-Site' => 'same-origin',
'Sec-Fetch-User' => '?1',
'Sec-Gpc' => '1',
'Upgrade-Insecure-Requests' => '1',
'User-Agent' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36',
];
public function __construct(protected ClientInterface $httpClient)
{
}
public function sendRequest($url)
{
$request = new Request('GET', $url, $this->headers);
$response = $this->httpClient->sendRequest($request);
$html = (string)$response->getBody();
return $html;
}
public function getBaseUrl()
{
return $this->baseUrl;
}
}
<?php
// src/Crawlers/Home.php
namespace Recca0120\Ithome30\Crawlers;
use Psr\Http\Client\ClientInterface;
use Recca0120\Ithome30\Client;
class Home
{
private Client $client;
public function __construct(ClientInterface $clientInterface)
{
$this->client = new Client($clientInterface);
}
public function all()
{
$html = $this->client->sendRequest($this->client->getBaseUrl() . '/bbs/hotboards.html');
return array_map(
fn (string $row) => $this->parseCols($row),
$this->parseRows($html)
);
}
private function parseCols($row)
{
preg_match('/href="(?<url>.+)"/', $row, $matched);
preg_match_all('/"board-(?<name>\w+)">(?<value>.+?)<\/div>/', $row, $matches);
$cols = ['url' => $this->client->getBaseUrl() . $matched['url']];
foreach (array_keys($matches[0]) as $index) {
$name = $matches['name'][$index];
$value = $matches['value'][$index];
$cols[$name] = str_replace('◎', '', strip_tags($value));
}
return $cols;
}
private function parseRows($html)
{
preg_match_all('/<a\sclass="board"[^>]*>.+?<\/a>/s', $html, $matches);
return $matches[0];
}
}
如果單就消除重覆程式碼,上一篇的方式和這一篇的方式都可以達到目的,但程式的表示力就截然不同,當需求再增加時,兩者的程式調整方式也截然不同。但我們真的認真來討論哪個方案比較好?在新需求還沒出現前,我們都沒辦法預知哪個方案比較好,但還好我們都有測試保護了,在新需求出現時我們再來決定方案就好了