iT邦幫忙

2023 iThome 鐵人賽

DAY 28
0
Software Development

開心撰寫 PHPUnit系列 第 28

Day 28. 消除重覆程式碼 - 抽象類別

  • 分享至 

  • xImage
  •  

到目前為止我們已經能把 Ptt 的文章全部抓取回來了,也算是完成了我們目前的所有需求了,但我們再去 code review 會發現有一些重覆的程式碼

construct 重覆

class Home {
    public function __construct(private ClientInterface $httpClient)
    {
    }
}

class Board {
    public function __construct(private ClientInterface $httpClient, private ?int $take = null)
    {
    }
}

sendRequest 重覆

class Home {
    public function all()
    {
        // 重覆程式 Start
        $request = new Request('GET', 'https://www.ptt.cc/bbs/hotboards.html');
        $response = $this->httpClient->sendRequest($request);
        // 重覆程式 End
        $html = (string) $response->getBody();

        return array_map(
            fn (string $row) => $this->parseCols($row),
            $this->parseRows($html)
        );
    }
}

class Board {
   private function sendRequest($url)
    {
        // 重覆程式 Start
        $request = new Request('GET', $url, [
            'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
            'Accept-Encoding' => 'gzip, deflate, br',
            'Accept-Language' => 'zh-TW,zh;q=0.8',
            'Cache-Control' => 'max-age=0',
            'Cookie' => 'over18=1',
            'Referer' => 'https://www.ptt.cc/bbs/Gossiping/index.html',
            'Sec-Ch-Ua' => '"Brave";v="117", "Not;A=Brand";v="8", "Chromium";v="117"',
            'Sec-Ch-Ua-Mobile' => '?0',
            'Sec-Ch-Ua-Platform' => '"macOS"',
            'Sec-Fetch-Dest' => 'document',
            'Sec-Fetch-Mode' => 'navigate',
            'Sec-Fetch-Site' => 'same-origin',
            'Sec-Fetch-User' => '?1',
            'Sec-Gpc' => '1',
            'Upgrade-Insecure-Requests' => '1',
            'User-Agent' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36',
        ]);
        $response = $this->httpClient->sendRequest($request);
        $html = (string)$response->getBody();
        // 重覆程式 End

        return $html;
    }
}

所以這時我們就可以再把這些重覆的程式碼提出來成為一個抽象類別(修改程式前記得執行 phpunit)

<?php

namespace Recca0120\Ithome30\Crawlers;

use GuzzleHttp\Psr7\Request;
use Psr\Http\Client\ClientInterface;

abstract class Base
{
    protected string $baseUrl = 'https://www.ptt.cc';

    private array $headers = [
        'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
        'Accept-Encoding' => 'gzip, deflate, br',
        'Accept-Language' => 'zh-TW,zh;q=0.8',
        'Cache-Control' => 'max-age=0',
        'Cookie' => 'over18=1',
        'Referer' => 'https://www.ptt.cc/bbs/Gossiping/index.html',
        'Sec-Ch-Ua' => '"Brave";v="117", "Not;A=Brand";v="8", "Chromium";v="117"',
        'Sec-Ch-Ua-Mobile' => '?0',
        'Sec-Ch-Ua-Platform' => '"macOS"',
        'Sec-Fetch-Dest' => 'document',
        'Sec-Fetch-Mode' => 'navigate',
        'Sec-Fetch-Site' => 'same-origin',
        'Sec-Fetch-User' => '?1',
        'Sec-Gpc' => '1',
        'Upgrade-Insecure-Requests' => '1',
        'User-Agent' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36',
    ];

    public function __construct(protected ClientInterface $httpClient)
    {
    }

    protected function sendRequest($url)
    {
        $request = new Request('GET', $url, $this->headers);
        $response = $this->httpClient->sendRequest($request);
        $html = (string)$response->getBody();

        return $html;
    }
}

我們再接著重構 Home.php, Board.php 將程式改為

<?php
// src/Crawlers/Home.php

namespace Recca0120\Ithome30\Crawlers;

use GuzzleHttp\Psr7\Request;

class Home extends Base
{

    public function all()
    {
        $html = $this->sendRequest($this->baseUrl . '/bbs/hotboards.html');

        return array_map(
            fn (string $row) => $this->parseCols($row),
            $this->parseRows($html)
        );
    }

    private function parseCols($row)
    {
        preg_match('/href="(?<url>.+)"/', $row, $matched);
        preg_match_all('/"board-(?<name>\w+)">(?<value>.+?)<\/div>/', $row, $matches);
        $cols = ['url' => $this->baseUrl . $matched['url']];
        foreach (array_keys($matches[0]) as $index) {
            $name = $matches['name'][$index];
            $value = $matches['value'][$index];
            $cols[$name] = str_replace('◎', '', strip_tags($value));
        }

        return $cols;
    }

    private function parseRows($html)
    {
        preg_match_all('/<a\sclass="board"[^>]*>.+?<\/a>/s', $html, $matches);

        return $matches[0];
    }
}
<?php

namespace Recca0120\Ithome30\Crawlers;

use Generator;
use Recca0120\Ithome30\Paginator;
use Psr\Http\Client\ClientInterface;

class Board extends Base
{
    public function __construct(ClientInterface $httpClient, private ?int $take = null)
    {
        parent::__construct($httpClient);
    }

    public function fetch(array $board): Generator
    {
        $url = $board['url'];

        $page = 0;
        do {
            $page++;

            $html = $this->sendRequest($url);
            $rows = array_map(
                fn (string $row)  => $this->parseCols($row, $board),
                $this->parseRows($html)
            );

            yield $paginator = new Paginator($html, $rows, $page);

            if ($this->take !== null && $paginator->currentPage >= $this->take) {
                break;
            }

            $url = $paginator->meta['prev'];
        } while ($paginator->hasMorePage());
    }

    private function parseCols($row, $board)
    {
        preg_match_all('/<div class="(?<name>(nrec|title|author|date))"[^>]*>(?<value>.*?)<\/div>/s', $row, $matches);

        $cols = [
            'board_name' => $board['name'],
            'board_class' => $board['class'],
        ];

        foreach (array_keys($matches[0]) as $index) {
            $cols[$matches['name'][$index]] = trim($matches['value'][$index]);
        }
        $cols['nrec'] = strip_tags($cols['nrec']);

        preg_match('/href="(.*)"/', $cols['title'], $matched);
        $cols['url'] = $this->baseUrl . $matched[1];

        preg_match('/\[(.+)\](.+)/', strip_tags($cols['title']), $matched);
        $cols['type'] = trim($matched[1]);
        $cols['title'] = trim($matched[2]);

        return $cols;
    }

    private function parseRows($html)
    {
        preg_match_all('/class="r-ent">.+<div class="mark">(.+)<\/div>/sU', $html, $matches);

        return $matches[0];
    }
}

修已完畢後再執行一次 phpunit 確定程式都正確後,我們就在測試的保護下完成了消除重覆的程式碼。

但抽出一個抽象類別的方式是比較好的做法嗎?明天我們再來看另外一種消除重覆程式碼的方式,再來比較看看哪個方式比較好


上一篇
Day 27. 兩個物件互動測試 - PHPVCR
下一篇
Day 29. 消除重覆程式碼
系列文
開心撰寫 PHPUnit30
圖片
  直播研討會
圖片
{{ item.channelVendor }} {{ item.webinarstarted }} |
{{ formatDate(item.duration) }}
直播中

尚未有邦友留言

立即登入留言