117 lines
3.8 KiB
PHP
Executable File
117 lines
3.8 KiB
PHP
Executable File
<?php
|
|
|
|
namespace App\Command\spider;
|
|
|
|
use App\Enums\ArticleModuleEnum;
|
|
use App\Helpers\AppHelper;
|
|
use Hyperf\Command\Annotation\Command;
|
|
use Hyperf\Logger\LoggerFactory;
|
|
use Psr\Container\ContainerInterface;
|
|
use Symfony\Component\Console\Input\InputInterface;
|
|
use Symfony\Component\Console\Input\InputOption;
|
|
use Symfony\Component\Console\Output\OutputInterface;
|
|
use Symfony\Component\DomCrawler\Crawler;
|
|
|
|
#[Command]
|
|
class TheImpressionStreetCommand extends BaseSpider
|
|
{
|
|
protected const PLATFORM = 'theimpression-street';
|
|
|
|
public function __construct(protected ContainerInterface $container)
|
|
{
|
|
parent::__construct('spider:theimpression-street');
|
|
}
|
|
|
|
public function configure()
|
|
{
|
|
parent::configure();
|
|
$this->setDescription('自动采集 https://theimpression.com/street-style');
|
|
}
|
|
|
|
public function execute(InputInterface $input, OutputInterface $output): int
|
|
{
|
|
parent::execute($input, $output);
|
|
|
|
$url = 'https://theimpression.com/street-style';
|
|
[$res, $httpCode] = $this->request($url);
|
|
|
|
if ($httpCode != 200) {
|
|
$this->debugPrint("{$url} 请求失败.");
|
|
return 0;
|
|
}
|
|
|
|
// 取banner的图
|
|
(new Crawler($res))
|
|
->filter('.parallax .mask-overlay')->each(function ($node) {
|
|
$href = $node->attr('href');
|
|
$text = trim($node->attr('aria-label'));
|
|
$this->debugPrint("标题: {$text}");
|
|
$this->debugPrint("链接: {$href}");
|
|
$this->getDetail($href, $text);
|
|
});
|
|
|
|
$articleList = [];
|
|
// 取前五十页
|
|
for ($i = 1; $i < 2; $i++) {
|
|
$url = "https://theimpression.com/wp-json/codetipi-zeen/v1/block?paged={$i}&type=1&data%5Bargs%5D%5Bcat%5D=1";
|
|
[$res, $httpCode] = $this->request($url);
|
|
if ($httpCode != 200) {
|
|
$this->debugPrint("{$url} 请求失败.");
|
|
return 0;
|
|
}
|
|
|
|
$res = json_decode($res, true);
|
|
(new Crawler($res[1]))
|
|
->filter('article')->each(function (Crawler $node) use (&$articleList) {
|
|
|
|
$href = $node->filter('.mask-img')->attr('href', '');
|
|
$title = $node->filter('.title-wrap')->text('');
|
|
if (!$href || !$title) {
|
|
$this->debugPrint("找不到标题或链接.");
|
|
return 0;
|
|
}
|
|
|
|
$this->getDetail($href, $title);
|
|
|
|
});
|
|
}
|
|
|
|
return 0;
|
|
|
|
// return 0;
|
|
}
|
|
|
|
|
|
public function getDetail(string $url, $title)
|
|
{
|
|
$model = $this->getArticleModel(['title' => $title, 'platform' => static::getPlatform(), 'brand' => 0]);
|
|
$model->title = $title;
|
|
$model->platform = static::getPlatform();
|
|
$model->module = ArticleModuleEnum::STREET->value;
|
|
$model->year = AppHelper::getYear($title);
|
|
|
|
[$res, $httpCode] = $this->request($url);
|
|
$model->source_url = $url;
|
|
if ($httpCode != 200) {
|
|
$this->debugPrint("{$url} 请求失败.");
|
|
return 0;
|
|
}
|
|
$images = [];
|
|
|
|
(new Crawler($res))
|
|
->filter('figure a img')->each(function ($node) use (&$images) {
|
|
if ($node->attr('src') && !isset($images[$node->attr('src')])) {
|
|
$this->debugPrint("采集图片: {$node->attr('src')}");
|
|
$images[$node->attr('src')] = [
|
|
'src' => $node->attr('src')
|
|
];
|
|
}
|
|
});
|
|
|
|
if ($images) {
|
|
$model->cover = current($images)['src'];
|
|
$model->images = json_encode(array_values($images));
|
|
$model->save();
|
|
}
|
|
}
|
|
} |