first commit
This commit is contained in:
171
app/Command/spider/BaseSpider.php
Executable file
171
app/Command/spider/BaseSpider.php
Executable file
@ -0,0 +1,171 @@
|
||||
<?php
|
||||
|
||||
namespace App\Command\spider;
|
||||
|
||||
use App\Model\AppArticle;
|
||||
use App\Model\AppSpiderArticle;
|
||||
use Hyperf\Command\Command;
|
||||
use Hyperf\Contract\StdoutLoggerInterface;
|
||||
use Hyperf\Coroutine\Coroutine;
|
||||
use Hyperf\Di\Annotation\Inject;
|
||||
use Laminas\Stdlib\ArrayUtils;
|
||||
use Swoole\Coroutine\Channel;
|
||||
use Swoole\Timer;
|
||||
use Symfony\Component\Console\Input\InputInterface;
|
||||
use Symfony\Component\Console\Input\InputOption;
|
||||
use Symfony\Component\Console\Output\OutputInterface;
|
||||
use function Hyperf\Coroutine\co;
|
||||
|
||||
class BaseSpider extends Command
|
||||
{
|
||||
/**
|
||||
* 最大协程数量
|
||||
* @var int
|
||||
*/
|
||||
protected int $maxCo = 10;
|
||||
|
||||
protected ?\Swoole\Coroutine\Channel $channel = null;
|
||||
|
||||
/**
|
||||
* @var string
|
||||
*/
|
||||
protected string $baseUrl = '';
|
||||
|
||||
#[Inject]
|
||||
protected ?StdoutLoggerInterface $logger = null;
|
||||
|
||||
protected array $coroutineList = [];
|
||||
|
||||
protected const PLATFORM = '';
|
||||
|
||||
private bool $isInit = false;
|
||||
|
||||
protected int|bool $timer = false;
|
||||
|
||||
protected array $commandConfigure = [];
|
||||
|
||||
private function init()
|
||||
{
|
||||
// 因为最外层还有个父协程, 所以加一
|
||||
$this->channel = new Channel($this->maxCo + 1);
|
||||
$this->timer = Timer::tick(1000 * 30, function () use (&$coList) {
|
||||
// count(\Swoole\Coroutine::getElapsed());
|
||||
var_dump(count($this->coroutineList));
|
||||
// var_dump($list);
|
||||
});
|
||||
for ($i = 0; $i < $this->maxCo + 1; $i++) {
|
||||
$this->channel->push(1);
|
||||
}
|
||||
}
|
||||
|
||||
public function configure()
|
||||
{
|
||||
parent::configure();
|
||||
$this->addOption('prod', '', InputOption::VALUE_NEGATABLE, '是否关闭devMode.', false);
|
||||
}
|
||||
|
||||
public static function getPlatform(): string
|
||||
{
|
||||
return static::PLATFORM;
|
||||
}
|
||||
|
||||
public function getBaseUrl(): string
|
||||
{
|
||||
return rtrim($this->baseUrl, '/');
|
||||
}
|
||||
|
||||
protected function getArticleModel(array $condition)
|
||||
{
|
||||
return AppSpiderArticle::query()->where($condition)->first() ?: new AppSpiderArticle();
|
||||
}
|
||||
|
||||
protected function request(string $url): array
|
||||
{
|
||||
$ch = curl_init();
|
||||
curl_setopt_array($ch, array(
|
||||
CURLOPT_URL => $url,
|
||||
CURLOPT_RETURNTRANSFER => true,
|
||||
CURLOPT_ENCODING => '',
|
||||
CURLOPT_MAXREDIRS => 10,
|
||||
CURLOPT_TIMEOUT => 15,
|
||||
CURLOPT_FOLLOWLOCATION => true,
|
||||
CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
|
||||
CURLOPT_CUSTOMREQUEST => 'GET',
|
||||
));
|
||||
// curl_setopt($ch, CURLOPT_URL, $url);
|
||||
// curl_setopt($ch, CURLOPT_HEADER, false);
|
||||
// curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
|
||||
// curl_setopt($ch, CURLOPT_TIMEOUT, 10);
|
||||
$result = curl_exec($ch);
|
||||
curl_close($ch);
|
||||
$httpCode = curl_getinfo($ch,CURLINFO_HTTP_CODE);
|
||||
return [$result, $httpCode];
|
||||
}
|
||||
|
||||
protected function returnPool()
|
||||
{
|
||||
return $this->channel->push(1);
|
||||
}
|
||||
|
||||
protected function getPool(): bool
|
||||
{
|
||||
return $this->channel->pop();
|
||||
}
|
||||
|
||||
protected function createCoroutine(\Closure $func): void
|
||||
{
|
||||
if ($this->isInit === false) {
|
||||
$this->isInit = true;
|
||||
$this->init();
|
||||
}
|
||||
$this->getPool();
|
||||
$cid = co(function () use ($func) {
|
||||
\Co\defer(function() {
|
||||
unset($this->coroutineList[Coroutine::id()]);
|
||||
$this->returnPool();
|
||||
});
|
||||
|
||||
$func();
|
||||
});
|
||||
$this->coroutineList[$cid] = 1;
|
||||
}
|
||||
|
||||
protected function debugPrint(array|string $message = '', $level = 0)
|
||||
{
|
||||
if ($this->getCommandConfigure('prod') === false) {
|
||||
$printTime = date('H:i:s');
|
||||
echo "[spider-debug][$printTime]" . print_r($message, true) . PHP_EOL;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 用于单元测试
|
||||
* @param string $methodName
|
||||
* @param $args
|
||||
* @return mixed
|
||||
*/
|
||||
public function testMethod(string $methodName, $args = [])
|
||||
{
|
||||
return $this->{$methodName}(...$args);
|
||||
}
|
||||
|
||||
public function setCommandConfigure($options): void
|
||||
{
|
||||
$this->commandConfigure = $options;
|
||||
}
|
||||
|
||||
public function getCommandConfigure($key = null, $defaultValue = null)
|
||||
{
|
||||
if (!$key) {
|
||||
return $this->commandConfigure;
|
||||
}
|
||||
|
||||
return $this->commandConfigure[$key] ?? $defaultValue;
|
||||
}
|
||||
|
||||
public function execute(InputInterface $input, OutputInterface $output): int
|
||||
{
|
||||
$this->setCommandConfigure($input->getOptions());
|
||||
return 0;
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user