first commit
This commit is contained in:
117
app/Command/spider/TheImpressionStreetCommand.php
Executable file
117
app/Command/spider/TheImpressionStreetCommand.php
Executable file
@ -0,0 +1,117 @@
|
||||
<?php
|
||||
|
||||
namespace App\Command\spider;
|
||||
|
||||
use App\Enums\ArticleModuleEnum;
|
||||
use App\Helpers\AppHelper;
|
||||
use Hyperf\Command\Annotation\Command;
|
||||
use Hyperf\Logger\LoggerFactory;
|
||||
use Psr\Container\ContainerInterface;
|
||||
use Symfony\Component\Console\Input\InputInterface;
|
||||
use Symfony\Component\Console\Input\InputOption;
|
||||
use Symfony\Component\Console\Output\OutputInterface;
|
||||
use Symfony\Component\DomCrawler\Crawler;
|
||||
|
||||
#[Command]
|
||||
class TheImpressionStreetCommand extends BaseSpider
|
||||
{
|
||||
protected const PLATFORM = 'theimpression-street';
|
||||
|
||||
public function __construct(protected ContainerInterface $container)
|
||||
{
|
||||
parent::__construct('spider:theimpression-street');
|
||||
}
|
||||
|
||||
public function configure()
|
||||
{
|
||||
parent::configure();
|
||||
$this->setDescription('自动采集 https://theimpression.com/street-style');
|
||||
}
|
||||
|
||||
public function execute(InputInterface $input, OutputInterface $output): int
|
||||
{
|
||||
parent::execute($input, $output);
|
||||
|
||||
$url = 'https://theimpression.com/street-style';
|
||||
[$res, $httpCode] = $this->request($url);
|
||||
|
||||
if ($httpCode != 200) {
|
||||
$this->debugPrint("{$url} 请求失败.");
|
||||
return 0;
|
||||
}
|
||||
|
||||
// 取banner的图
|
||||
(new Crawler($res))
|
||||
->filter('.parallax .mask-overlay')->each(function ($node) {
|
||||
$href = $node->attr('href');
|
||||
$text = trim($node->attr('aria-label'));
|
||||
$this->debugPrint("标题: {$text}");
|
||||
$this->debugPrint("链接: {$href}");
|
||||
$this->getDetail($href, $text);
|
||||
});
|
||||
|
||||
$articleList = [];
|
||||
// 取前五十页
|
||||
for ($i = 1; $i < 2; $i++) {
|
||||
$url = "https://theimpression.com/wp-json/codetipi-zeen/v1/block?paged={$i}&type=1&data%5Bargs%5D%5Bcat%5D=1";
|
||||
[$res, $httpCode] = $this->request($url);
|
||||
if ($httpCode != 200) {
|
||||
$this->debugPrint("{$url} 请求失败.");
|
||||
return 0;
|
||||
}
|
||||
|
||||
$res = json_decode($res, true);
|
||||
(new Crawler($res[1]))
|
||||
->filter('article')->each(function (Crawler $node) use (&$articleList) {
|
||||
|
||||
$href = $node->filter('.mask-img')->attr('href', '');
|
||||
$title = $node->filter('.title-wrap')->text('');
|
||||
if (!$href || !$title) {
|
||||
$this->debugPrint("找不到标题或链接.");
|
||||
return 0;
|
||||
}
|
||||
|
||||
$this->getDetail($href, $title);
|
||||
|
||||
});
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
// return 0;
|
||||
}
|
||||
|
||||
|
||||
public function getDetail(string $url, $title)
|
||||
{
|
||||
$model = $this->getArticleModel(['title' => $title, 'platform' => static::getPlatform(), 'brand' => 0]);
|
||||
$model->title = $title;
|
||||
$model->platform = static::getPlatform();
|
||||
$model->module = ArticleModuleEnum::STREET->value;
|
||||
$model->year = AppHelper::getYear($title);
|
||||
|
||||
[$res, $httpCode] = $this->request($url);
|
||||
$model->source_url = $url;
|
||||
if ($httpCode != 200) {
|
||||
$this->debugPrint("{$url} 请求失败.");
|
||||
return 0;
|
||||
}
|
||||
$images = [];
|
||||
|
||||
(new Crawler($res))
|
||||
->filter('figure a img')->each(function ($node) use (&$images) {
|
||||
if ($node->attr('src') && !isset($images[$node->attr('src')])) {
|
||||
$this->debugPrint("采集图片: {$node->attr('src')}");
|
||||
$images[$node->attr('src')] = [
|
||||
'src' => $node->attr('src')
|
||||
];
|
||||
}
|
||||
});
|
||||
|
||||
if ($images) {
|
||||
$model->cover = current($images)['src'];
|
||||
$model->images = json_encode(array_values($images));
|
||||
$model->save();
|
||||
}
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user