Files
backend/app/Command/spider/BaseSpider.php
2025-06-18 10:31:43 +08:00

171 lines
4.6 KiB
PHP
Executable File

<?php
namespace App\Command\spider;
use App\Model\AppArticle;
use App\Model\AppSpiderArticle;
use Hyperf\Command\Command;
use Hyperf\Contract\StdoutLoggerInterface;
use Hyperf\Coroutine\Coroutine;
use Hyperf\Di\Annotation\Inject;
use Laminas\Stdlib\ArrayUtils;
use Swoole\Coroutine\Channel;
use Swoole\Timer;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;
use function Hyperf\Coroutine\co;
class BaseSpider extends Command
{
/**
* 最大协程数量
* @var int
*/
protected int $maxCo = 10;
protected ?\Swoole\Coroutine\Channel $channel = null;
/**
* @var string
*/
protected string $baseUrl = '';
#[Inject]
protected ?StdoutLoggerInterface $logger = null;
protected array $coroutineList = [];
protected const PLATFORM = '';
private bool $isInit = false;
protected int|bool $timer = false;
protected array $commandConfigure = [];
private function init()
{
// 因为最外层还有个父协程, 所以加一
$this->channel = new Channel($this->maxCo + 1);
$this->timer = Timer::tick(1000 * 30, function () use (&$coList) {
// count(\Swoole\Coroutine::getElapsed());
var_dump(count($this->coroutineList));
// var_dump($list);
});
for ($i = 0; $i < $this->maxCo + 1; $i++) {
$this->channel->push(1);
}
}
public function configure()
{
parent::configure();
$this->addOption('prod', '', InputOption::VALUE_NEGATABLE, '是否关闭devMode.', false);
}
public static function getPlatform(): string
{
return static::PLATFORM;
}
public function getBaseUrl(): string
{
return rtrim($this->baseUrl, '/');
}
protected function getArticleModel(array $condition)
{
return AppSpiderArticle::query()->where($condition)->first() ?: new AppSpiderArticle();
}
protected function request(string $url): array
{
$ch = curl_init();
curl_setopt_array($ch, array(
CURLOPT_URL => $url,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_ENCODING => '',
CURLOPT_MAXREDIRS => 10,
CURLOPT_TIMEOUT => 15,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
CURLOPT_CUSTOMREQUEST => 'GET',
));
// curl_setopt($ch, CURLOPT_URL, $url);
// curl_setopt($ch, CURLOPT_HEADER, false);
// curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
// curl_setopt($ch, CURLOPT_TIMEOUT, 10);
$result = curl_exec($ch);
curl_close($ch);
$httpCode = curl_getinfo($ch,CURLINFO_HTTP_CODE);
return [$result, $httpCode];
}
protected function returnPool()
{
return $this->channel->push(1);
}
protected function getPool(): bool
{
return $this->channel->pop();
}
protected function createCoroutine(\Closure $func): void
{
if ($this->isInit === false) {
$this->isInit = true;
$this->init();
}
$this->getPool();
$cid = co(function () use ($func) {
\Co\defer(function() {
unset($this->coroutineList[Coroutine::id()]);
$this->returnPool();
});
$func();
});
$this->coroutineList[$cid] = 1;
}
protected function debugPrint(array|string $message = '', $level = 0)
{
if ($this->getCommandConfigure('prod') === false) {
$printTime = date('H:i:s');
echo "[spider-debug][$printTime]" . print_r($message, true) . PHP_EOL;
}
}
/**
* 用于单元测试
* @param string $methodName
* @param $args
* @return mixed
*/
public function testMethod(string $methodName, $args = [])
{
return $this->{$methodName}(...$args);
}
public function setCommandConfigure($options): void
{
$this->commandConfigure = $options;
}
public function getCommandConfigure($key = null, $defaultValue = null)
{
if (!$key) {
return $this->commandConfigure;
}
return $this->commandConfigure[$key] ?? $defaultValue;
}
public function execute(InputInterface $input, OutputInterface $output): int
{
$this->setCommandConfigure($input->getOptions());
return 0;
}
}