Files
backend/app/Command/spider/TheImpressionStreetCommand.php
2025-06-18 10:31:43 +08:00

117 lines
3.8 KiB
PHP
Executable File

<?php
namespace App\Command\spider;
use App\Enums\ArticleModuleEnum;
use App\Helpers\AppHelper;
use Hyperf\Command\Annotation\Command;
use Hyperf\Logger\LoggerFactory;
use Psr\Container\ContainerInterface;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\DomCrawler\Crawler;
#[Command]
class TheImpressionStreetCommand extends BaseSpider
{
protected const PLATFORM = 'theimpression-street';
public function __construct(protected ContainerInterface $container)
{
parent::__construct('spider:theimpression-street');
}
public function configure()
{
parent::configure();
$this->setDescription('自动采集 https://theimpression.com/street-style');
}
public function execute(InputInterface $input, OutputInterface $output): int
{
parent::execute($input, $output);
$url = 'https://theimpression.com/street-style';
[$res, $httpCode] = $this->request($url);
if ($httpCode != 200) {
$this->debugPrint("{$url} 请求失败.");
return 0;
}
// 取banner的图
(new Crawler($res))
->filter('.parallax .mask-overlay')->each(function ($node) {
$href = $node->attr('href');
$text = trim($node->attr('aria-label'));
$this->debugPrint("标题: {$text}");
$this->debugPrint("链接: {$href}");
$this->getDetail($href, $text);
});
$articleList = [];
// 取前五十页
for ($i = 1; $i < 2; $i++) {
$url = "https://theimpression.com/wp-json/codetipi-zeen/v1/block?paged={$i}&type=1&data%5Bargs%5D%5Bcat%5D=1";
[$res, $httpCode] = $this->request($url);
if ($httpCode != 200) {
$this->debugPrint("{$url} 请求失败.");
return 0;
}
$res = json_decode($res, true);
(new Crawler($res[1]))
->filter('article')->each(function (Crawler $node) use (&$articleList) {
$href = $node->filter('.mask-img')->attr('href', '');
$title = $node->filter('.title-wrap')->text('');
if (!$href || !$title) {
$this->debugPrint("找不到标题或链接.");
return 0;
}
$this->getDetail($href, $title);
});
}
return 0;
// return 0;
}
public function getDetail(string $url, $title)
{
$model = $this->getArticleModel(['title' => $title, 'platform' => static::getPlatform(), 'brand' => 0]);
$model->title = $title;
$model->platform = static::getPlatform();
$model->module = ArticleModuleEnum::STREET->value;
$model->year = AppHelper::getYear($title);
[$res, $httpCode] = $this->request($url);
$model->source_url = $url;
if ($httpCode != 200) {
$this->debugPrint("{$url} 请求失败.");
return 0;
}
$images = [];
(new Crawler($res))
->filter('figure a img')->each(function ($node) use (&$images) {
if ($node->attr('src') && !isset($images[$node->attr('src')])) {
$this->debugPrint("采集图片: {$node->attr('src')}");
$images[$node->attr('src')] = [
'src' => $node->attr('src')
];
}
});
if ($images) {
$model->cover = current($images)['src'];
$model->images = json_encode(array_values($images));
$model->save();
}
}
}