first commit

This commit is contained in:
root
2025-06-18 10:31:43 +08:00
commit d9f820b55d
981 changed files with 449311 additions and 0 deletions

68
app/Command/CoverCommand.php Executable file
View File

@ -0,0 +1,68 @@
<?php
declare(strict_types=1);
namespace App\Command;
use App\Helpers\AppHelper;
use App\Model\AppArticle;
use Hyperf\Collection\Collection;
use Hyperf\Command\Annotation\Command;
use Hyperf\Command\Command as HyperfCommand;
use Hyperf\DbConnection\Db;
use Psr\Container\ContainerInterface;
use function Hyperf\Coroutine\co;
#[Command]
class CoverCommand extends HyperfCommand
{
protected $reids;
public function __construct(protected ContainerInterface $container)
{
parent::__construct('demo:cover');
}
public function configure()
{
parent::configure();
$this->setDescription('Hyperf Demo Command');
}
public function handle()
{
$this->line('Hello Hyperf!', 'info');
Db::table('app_articles')->where('id', '>', 11284)->orderBy('id')->chunk(20, function (Collection $item) {
$waitGroup = new \Hyperf\Coroutine\WaitGroup();
foreach ($item as $v) {;
if (!$v || $v->cover) {
if ($v->year) {
continue;
}
}
$waitGroup->add();
co(function () use ($waitGroup, $v) {
$cover = json_decode($v->images, true);
$v->cover = current($cover)['src'];
$model = AppArticle::find($v->id);
if ($model) {
$model->cover = current($cover)['src'];
preg_match('/([0-9]+)/', $model->title, $y);
$model->year = $y[1] ?? 0;
echo "update {$v->id} year: {$model->year} effect: {$model->update()}" . PHP_EOL;
}
$waitGroup->done();
});
}
$waitGroup->wait();
});
// var_dump(file_get_contents('https://www.vogue.com/fashion-shows/designer/AFKIR'));
}
}

184
app/Command/FooCommand.php Executable file
View File

@ -0,0 +1,184 @@
<?php
declare(strict_types=1);
namespace App\Command;
use App\Command\spider\VogueCommand;
use App\FormModel\spider\ReviewModel;
use App\Helpers\AppHelper;
use App\Helpers\TitleHelper;
use App\Model\AppArticle;
use App\Model\AppBrand;
use App\Model\AppSpiderArticle;
use Co\WaitGroup;
use GuzzleHttp\RequestOptions;
use Hyperf\Collection\Collection;
use Hyperf\Command\Command as HyperfCommand;
use Hyperf\Command\Annotation\Command;
use Hyperf\Context\ApplicationContext;
use Hyperf\Coroutine\Coroutine;
use Hyperf\DbConnection\Db;
use Hyperf\Logger\LoggerFactory;
use Laminas\Stdlib\ArrayUtils;
use Psr\Container\ContainerInterface;
use Psr\Log\LoggerInterface;
use Spatie\Crawler\Crawler;
use Swoole\Timer;
use function Hyperf\Coroutine\co;
#[Command]
class FooCommand extends HyperfCommand
{
protected LoggerInterface $logger;
protected $reids;
public function __construct(protected ContainerInterface $container, LoggerFactory $loggerFactory)
{
parent::__construct('demo:command');
$this->logger = $loggerFactory->get('log', 'command');
}
public function configure()
{
parent::configure();
$this->setDescription('Hyperf Demo Command');
}
public function handle()
{
$this->line('Hello Hyperf!', 'info');
// Crawler::create([
// RequestOptions::TIMEOUT => 30.0, // 请求最大持续时间
// RequestOptions::CONNECT_TIMEOUT => 10.0, // 连接超时
// ])->setCrawlObserver(new TestClass())->startCrawling('https://theimpression.com/milan-street-style-fall-2025-day-6/');
$query = AppArticle::where('id', '>', 1);
// $model = new ReviewModel();
$map = [
// 'Fall 1996 Ready-to-Wear' => '/Fall ([0-9]*?) Ready-to-Wear/',
'/Fall ([0-9]*?) Ready-to-Wear/' => [
'trans' => '秋季成衣',
'style' => 0,
'location' => 0,
],
'/Spring ([0-9]*?) Ready-to-Wear/' => [
'trans' => '春季成衣',
'style' => 0,
'location' => 0,
],
'/Pre-Fall ([0-9]*?)/' => [
'trans' => '早秋',
'style' => 0,
'location' => 0,
],
'/Australia Resort ([0-9]*?)/' => [
'trans' => '时装',
'style' => 1,
'location' => 1,
],
'/Fall ([0-9]*?) Menswear/' => [
'trans' => '秋季男装',
'style' => 0,
'location' => 1, // 澳大利亚
],
'/Ukraine Fall ([0-9]*?)/' => [
'trans' => '秋季',
'style' => 0,
'location' => 2, // 乌克兰
],
'/Kiev Fall ([0-9]*?)/' => [
'trans' => '秋季',
'style' => 0,
'location' => 3, // 基辅
],
'/Stockholm Fall ([0-9]*?)/' => [
'trans' => '秋季',
'style' => 0,
'location' => 4, // 斯德哥尔摩
],
'/Tokyo Fall ([0-9]*?)/' => [
'trans' => '秋季',
'style' => 0,
'location' => 5, // 东京
],
'/Berlin Fall ([0-9]*?)/' => [
'trans' => '秋季',
'style' => 0,
'location' => 6, // 柏林
],
'/Copenhagen Fall ([0-9]*?)/' => [
'trans' => '秋季',
'style' => 0,
'location' => 7, // 哥本哈根
],
'/([0-9]*?) Spring Summer/' => [
'trans' => '春夏',
'style' => 0,
'location' => 0,
],
'/([0-9]*?) Autumn Winter/' => [
'trans' => '秋冬',
'style' => 0,
'location' => 0,
],
];
//
// var_dump(TitleHelper::translate('Pre-Fall 2019'));die;
foreach ($query->cursor() as $item) {
echo '正在同步' . $item->id . PHP_EOL;
if (in_array($item->title, ['春季', ' 春季', '秋季', ' 秋季', '早秋', ' 早秋', '时装', ' 时装']) || stripos($item->title, '|') !== false) {
$originTitle = AppSpiderArticle::find($item->spider_article_id)->title;
if ($title = TitleHelper::translate($originTitle)) {
var_dump($title);
$item->title = $title;
$item->save();
}
} else {
if ($title = TitleHelper::translate($item->title)) {
$item->title = $title;
$item->save();
}
}
// $model = AppArticle::find($item->id);
// $model->aid = strtr(uniqid(more_entropy:true), [
// '.' => ''
// ]);
//// $model->description = '1';
// $model->save();
// foreach ($map as $mapPreg => $mapItem) {
//
// preg_match_all($mapPreg, $item->title, $matches);
//
// if (count($matches) > 1 && $matches[1]) {
// echo current($matches[1]) . " {$mapItem['trans']}" . PHP_EOL;
// $model = AppArticle::find($item->id);
// $model->title = current($matches[1]) . " {$mapItem['trans']}";
// $model->location = $mapItem['location'];
// $model->style = $mapItem['style'];
//
// echo 'save';
// $model->save();
// continue;
// }
}
//
//// $model->pass($item->id);
////
////// var_dump($item->created_at->date);
////// $model->deleted_at = $item->created_at->timestamp;
////// var_dump($model->save());
// }
// Fall 1996 Ready-to-Wear
// Spring 2025 Ready-to-Wear
// Pre-Fall 2025
}
}

View File

@ -0,0 +1,42 @@
<?php
declare(strict_types=1);
namespace App\Command;
use App\FormModel\spider\ReviewModel;
use App\Model\AppArticle;
use Hyperf\Collection\Collection;
use Hyperf\Command\Annotation\Command;
use Hyperf\Command\Command as HyperfCommand;
use Hyperf\DbConnection\Db;
use Psr\Container\ContainerInterface;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;
use function Hyperf\Coroutine\co;
#[Command]
class SpiderReviewCommand extends HyperfCommand
{
protected $reids;
public function __construct(protected ContainerInterface $container)
{
parent::__construct('spider:review');
}
public function configure()
{
parent::configure();
$this->setDescription('review spider article.');
$this->addOption('id', 'i', InputOption::VALUE_REQUIRED, '文章id', false);
}
public function execute(InputInterface $input, OutputInterface $output): int
{
$reviewModel = new ReviewModel();
$reviewModel->pass($input->getOption('id'));
return 0;
}
}

26
app/Command/TestClass.php Executable file
View File

@ -0,0 +1,26 @@
<?php
namespace App\Command;
use Psr\Http\Message\ResponseInterface;
use Psr\Http\Message\UriInterface;
use Spatie\Crawler\CrawlObservers\CrawlObserver;
class TestClass extends CrawlObserver
{
public function willCrawl(UriInterface $url, ?string $linkText): void
{
echo "即将爬取: {$url}\n";
}
public function crawled(
UriInterface $url, ResponseInterface $response, ?UriInterface $foundOnUrl = null, ?string $linkText = null): void {
var_dump($response);
echo "已成功爬取: {$url} 状态码: " . $response->getStatusCode() . "\n";
}
public function crawlFailed(UriInterface $url, \Throwable $exception, ?UriInterface $foundOnUrl = null, ?string $linkText = null): void
{
echo "爬取失败: {$url} 错误: {$exception->getMessage()}\n";
}
}

171
app/Command/spider/BaseSpider.php Executable file
View File

@ -0,0 +1,171 @@
<?php
namespace App\Command\spider;
use App\Model\AppArticle;
use App\Model\AppSpiderArticle;
use Hyperf\Command\Command;
use Hyperf\Contract\StdoutLoggerInterface;
use Hyperf\Coroutine\Coroutine;
use Hyperf\Di\Annotation\Inject;
use Laminas\Stdlib\ArrayUtils;
use Swoole\Coroutine\Channel;
use Swoole\Timer;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;
use function Hyperf\Coroutine\co;
class BaseSpider extends Command
{
/**
* 最大协程数量
* @var int
*/
protected int $maxCo = 10;
protected ?\Swoole\Coroutine\Channel $channel = null;
/**
* @var string
*/
protected string $baseUrl = '';
#[Inject]
protected ?StdoutLoggerInterface $logger = null;
protected array $coroutineList = [];
protected const PLATFORM = '';
private bool $isInit = false;
protected int|bool $timer = false;
protected array $commandConfigure = [];
private function init()
{
// 因为最外层还有个父协程, 所以加一
$this->channel = new Channel($this->maxCo + 1);
$this->timer = Timer::tick(1000 * 30, function () use (&$coList) {
// count(\Swoole\Coroutine::getElapsed());
var_dump(count($this->coroutineList));
// var_dump($list);
});
for ($i = 0; $i < $this->maxCo + 1; $i++) {
$this->channel->push(1);
}
}
public function configure()
{
parent::configure();
$this->addOption('prod', '', InputOption::VALUE_NEGATABLE, '是否关闭devMode.', false);
}
public static function getPlatform(): string
{
return static::PLATFORM;
}
public function getBaseUrl(): string
{
return rtrim($this->baseUrl, '/');
}
protected function getArticleModel(array $condition)
{
return AppSpiderArticle::query()->where($condition)->first() ?: new AppSpiderArticle();
}
protected function request(string $url): array
{
$ch = curl_init();
curl_setopt_array($ch, array(
CURLOPT_URL => $url,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_ENCODING => '',
CURLOPT_MAXREDIRS => 10,
CURLOPT_TIMEOUT => 15,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
CURLOPT_CUSTOMREQUEST => 'GET',
));
// curl_setopt($ch, CURLOPT_URL, $url);
// curl_setopt($ch, CURLOPT_HEADER, false);
// curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
// curl_setopt($ch, CURLOPT_TIMEOUT, 10);
$result = curl_exec($ch);
curl_close($ch);
$httpCode = curl_getinfo($ch,CURLINFO_HTTP_CODE);
return [$result, $httpCode];
}
protected function returnPool()
{
return $this->channel->push(1);
}
protected function getPool(): bool
{
return $this->channel->pop();
}
protected function createCoroutine(\Closure $func): void
{
if ($this->isInit === false) {
$this->isInit = true;
$this->init();
}
$this->getPool();
$cid = co(function () use ($func) {
\Co\defer(function() {
unset($this->coroutineList[Coroutine::id()]);
$this->returnPool();
});
$func();
});
$this->coroutineList[$cid] = 1;
}
protected function debugPrint(array|string $message = '', $level = 0)
{
if ($this->getCommandConfigure('prod') === false) {
$printTime = date('H:i:s');
echo "[spider-debug][$printTime]" . print_r($message, true) . PHP_EOL;
}
}
/**
* 用于单元测试
* @param string $methodName
* @param $args
* @return mixed
*/
public function testMethod(string $methodName, $args = [])
{
return $this->{$methodName}(...$args);
}
public function setCommandConfigure($options): void
{
$this->commandConfigure = $options;
}
public function getCommandConfigure($key = null, $defaultValue = null)
{
if (!$key) {
return $this->commandConfigure;
}
return $this->commandConfigure[$key] ?? $defaultValue;
}
public function execute(InputInterface $input, OutputInterface $output): int
{
$this->setCommandConfigure($input->getOptions());
return 0;
}
}

View File

@ -0,0 +1,187 @@
<?php
namespace App\Command\spider;
use Hyperf\Command\Annotation\Command;
use Hyperf\DbConnection\Db;
use Hyperf\Logger\LoggerFactory;
use Psr\Container\ContainerInterface;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;
use function Swoole\Coroutine\run;
#[Command]
class ElleStreetCommand extends BaseSpider
{
/**
* @var string
*/
protected string $baseUrl = 'https://www.elle.com';
protected const PLATFORM = 'elle-street';
public function __construct(protected ContainerInterface $container, LoggerFactory $loggerFactory)
{
parent::__construct('spider:elle-street');
}
public function configure()
{
parent::configure();
$this->setDescription('elle.com/street elle街拍模块');
$this->addOption('brandId', 'b', InputOption::VALUE_OPTIONAL, '指定的品牌id', false);
}
public function execute(InputInterface $input, OutputInterface $output): int
{
run(function () {
$this->spiderStart();
});
return 0;
}
private function _getTask($brand): \Generator
{
$query = Db::table('app_brands');
if ($brand) {
$query->where(['id' => $brand]);
}
$query->where('id', '>', 1)->orderBy('id');
foreach ($query->cursor() as $row) {
yield $row;
}
}
private function _getTaskName($name): string
{
return strtolower(strtr($name, [
'.' => '-',
' ' => '-'
]));
}
public function spiderStart(): void
{
list($result, $httpCode) = $this->request($this->getBaseUrl() . '/fashion/street-style/');
preg_match_all('/<script id="json-ld" type="application\/ld\+json">([\s\S]*?)<\/script>/', $result, $matches);
if (!is_array($matches) && count($matches) < 1) {
$this->logger->info(self::getPlatform() . " 数据获取失败。");
return;
}
$val = json_decode(($matches[1][0]), true);
$articles = $val[0]['itemListElement'] ?? [];
if (!$articles) {
$this->logger->info(self::getPlatform() . " 文章数据获取失败。");
return;
}
$saveImages = [];
foreach ($articles as $article) {
list($result, $httpCode) = $this->request($article['url']);
preg_match_all('/<script id="json-ld" type="application\/ld\+json">([\s\S]*?)<\/script>/', $result, $matches);
if (isset($matches[1][0])) {
$val = json_decode($matches[1][0], true);
$images = $val['about']['itemListElement'];
foreach ($images as $image) {
$saveImages[] = $image['item']['image'];
}
}
}
var_dump($saveImages);
return;
$this->createCoroutine(function () use ($task) {
$brandName = $this->_getTaskName($task->name);
$url = $this->getBaseUrl() . '/fashion-shows/designer/' . $brandName;
$this->logger->info(sprintf("[Command] brandName: {$this->_getTaskName($task->name)}; spiderUrl: {$url}"));
// 取发布会列表
$showsList = $this->_getShowsList($url);
foreach ($showsList as $list) {
$this->createCoroutine(function () use ($task, $list) {
$this->_getDetail($task->id, $list);
});
}
});
}
private function _getShowsList($url)
{
list($request, $httpCode) = $this->request($url);
if ($httpCode == 200) {
preg_match_all('/window.__PRELOADED_STATE__ = ([\s\S]*?);<\/script>/', $request, $matches);
$val = json_decode(current(end($matches)), true);
return $val['transformed']['runwayDesignerContent']['designerCollections'] ?? [];
} else {
$this->logger->info('未找到数据.');
return [];
}
}
private function _getDetail(int $brandId, array $info)
{
$model = $this->getArticleModel(['brand' => $brandId, 'title' => $info['hed']]);
$model->title = $info['hed'];
$model->images = json_encode([]);
$model->platform = self::getPlatform();
// 获取图片
$pageUri = $info['url'];
$requestUrl = $this->getBaseUrl() . $pageUri . '/slideshow/collection';
$this->logger->info("正在匹配发布会详情 {$requestUrl}");
$matches = [];
list($result, $httpCode) = $this->request($requestUrl);
if ($httpCode != 200 || !$result) {
$this->logger->warning($requestUrl . '请求失败.');
return;
}
preg_match_all('/window\.__PRELOADED_STATE__ = (.*?);</s', $result, $matches);
$saveUrl = [];
if (count($matches) > 1) {
$val = json_decode(current($matches[1]), true);
$images = $val['transformed']['runwayGalleries']['galleries'][0]['items'] ?? false;
if ($images === false) {
$this->logger->warning($requestUrl . '获取图片失败.');
return;
}
foreach (is_array($images) ? $images : [] as $img) {
$saveUrl[] = [
'src' => $img['image']['sources']['xxl']['url']
];
}
$model->images = json_encode($saveUrl);
}
$model->save();
$this->logger->info("end: {$requestUrl}");
}
}

View File

@ -0,0 +1,103 @@
<?php
namespace App\Command\spider;
use App\Helpers\AppHelper;
use App\Model\AppBrand;
use Hyperf\Command\Annotation\Command;
use Hyperf\Coroutine\Coroutine;
use Hyperf\DbConnection\Db;
use Hyperf\Logger\LoggerFactory;
use Psr\Container\ContainerInterface;
use Swoole\ExitException;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;
use function Swoole\Coroutine\run;
#[Command]
class FashionSnapCommand extends BaseSpider
{
protected const PLATFORM = 'fashionsnap';
protected string $baseUrl = 'https://www.fashionsnap.com';
public function __construct(protected ContainerInterface $container, LoggerFactory $loggerFactory)
{
parent::__construct('spider:fashionsnap');
}
public function configure()
{
parent::configure();
$this->setDescription('自动采集fashionsnap.com');
$this->addOption('brandId', 'b', InputOption::VALUE_OPTIONAL, '指定的品牌id', false);
}
private function _getTask($brand): \Generator
{
$query = Db::table('app_brands');
if ($brand) {
$query->whereIn('id', explode(',', $brand));
} else {
$query->where('spider_origin', '=', 'fashionsnap')->orderBy('id');
}
foreach ($query->cursor() as $row) {
if (!$row) {
throw new ExitException('END.');
}
yield $row;
}
}
public function execute(InputInterface $input, OutputInterface $output): int
{
$brand = $input->getOption('brandId');
run(function () use ($brand) {
foreach ($this->_getTask($brand) as $task) {
list($result, $httpCode) = $this->request($this->getBaseUrl() . "/api/algolia/article/?blogIds=4&brandName={$task->name}&limit=50");
echo $task->name . '--' . $httpCode . PHP_EOL;
if ($httpCode == 200) {
$isSuccess = false;
$result = json_decode($result, true);
if ($result['totalCount'] == 0 || $result['totalCount'] > 200) {
continue;
}
foreach ($result['articles'] ?? [] as $item) {
$model = $this->getArticleModel(['title' => $item['mainCategory']['name'], 'platform' => static::PLATFORM, 'brand' => $task->id]);
$model->title = $item['mainCategory']['name'];
$model->year = AppHelper::getYear($model->title);
$model->brand = $task->id;
$model->module = 0;
$model->platform = self::getPlatform();
$saveImages = [];
foreach ($item['mainGalleryImages'] as $image) {
$saveImages[] = [
'src' => 'https://fashionsnap-assets.com/asset/width=4096' . $image
];
}
$model->images = json_encode($saveImages);
$model->cover = $saveImages[0]['src'] ?? '';
// permalink
$model->source_url = 'https://fashionsnap.com' . $item['permalink'];
if ($model->cover) {
$isSuccess = $model->save();
}
}
if ($isSuccess) {
$brandModel = AppBrand::find($task->id);
$brandModel->spider_origin = self::getPlatform();
$brandModel->save();
}
}
}
});
return 0;
}
}

View File

@ -0,0 +1,117 @@
<?php
namespace App\Command\spider;
use App\Enums\ArticleModuleEnum;
use App\Helpers\AppHelper;
use Hyperf\Command\Annotation\Command;
use Hyperf\Logger\LoggerFactory;
use Psr\Container\ContainerInterface;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\DomCrawler\Crawler;
#[Command]
class TheImpressionStreetCommand extends BaseSpider
{
protected const PLATFORM = 'theimpression-street';
public function __construct(protected ContainerInterface $container)
{
parent::__construct('spider:theimpression-street');
}
public function configure()
{
parent::configure();
$this->setDescription('自动采集 https://theimpression.com/street-style');
}
public function execute(InputInterface $input, OutputInterface $output): int
{
parent::execute($input, $output);
$url = 'https://theimpression.com/street-style';
[$res, $httpCode] = $this->request($url);
if ($httpCode != 200) {
$this->debugPrint("{$url} 请求失败.");
return 0;
}
// 取banner的图
(new Crawler($res))
->filter('.parallax .mask-overlay')->each(function ($node) {
$href = $node->attr('href');
$text = trim($node->attr('aria-label'));
$this->debugPrint("标题: {$text}");
$this->debugPrint("链接: {$href}");
$this->getDetail($href, $text);
});
$articleList = [];
// 取前五十页
for ($i = 1; $i < 2; $i++) {
$url = "https://theimpression.com/wp-json/codetipi-zeen/v1/block?paged={$i}&type=1&data%5Bargs%5D%5Bcat%5D=1";
[$res, $httpCode] = $this->request($url);
if ($httpCode != 200) {
$this->debugPrint("{$url} 请求失败.");
return 0;
}
$res = json_decode($res, true);
(new Crawler($res[1]))
->filter('article')->each(function (Crawler $node) use (&$articleList) {
$href = $node->filter('.mask-img')->attr('href', '');
$title = $node->filter('.title-wrap')->text('');
if (!$href || !$title) {
$this->debugPrint("找不到标题或链接.");
return 0;
}
$this->getDetail($href, $title);
});
}
return 0;
// return 0;
}
public function getDetail(string $url, $title)
{
$model = $this->getArticleModel(['title' => $title, 'platform' => static::getPlatform(), 'brand' => 0]);
$model->title = $title;
$model->platform = static::getPlatform();
$model->module = ArticleModuleEnum::STREET->value;
$model->year = AppHelper::getYear($title);
[$res, $httpCode] = $this->request($url);
$model->source_url = $url;
if ($httpCode != 200) {
$this->debugPrint("{$url} 请求失败.");
return 0;
}
$images = [];
(new Crawler($res))
->filter('figure a img')->each(function ($node) use (&$images) {
if ($node->attr('src') && !isset($images[$node->attr('src')])) {
$this->debugPrint("采集图片: {$node->attr('src')}");
$images[$node->attr('src')] = [
'src' => $node->attr('src')
];
}
});
if ($images) {
$model->cover = current($images)['src'];
$model->images = json_encode(array_values($images));
$model->save();
}
}
}

View File

@ -0,0 +1,189 @@
<?php
namespace App\Command\spider;
use App\Helpers\AppHelper;
use App\Model\AppBrand;
use Hyperf\Command\Annotation\Command;
use Hyperf\Coroutine\Coroutine;
use Hyperf\DbConnection\Db;
use Hyperf\Logger\LoggerFactory;
use Psr\Container\ContainerInterface;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;
use function Swoole\Coroutine\run;
#[Command]
class VogueCommand extends BaseSpider
{
/**
* @var string
*/
protected string $baseUrl = 'https://www.vogue.com';
protected const PLATFORM = 'vogue';
public function __construct()
{
parent::__construct('spider:vogue');
ini_set('pcre.backtrack_limit', '-1');
}
public function configure()
{
parent::configure();
$this->setDescription('自动采集vogue.com');
$this->addOption('brandId', 'b', InputOption::VALUE_OPTIONAL, '指定的品牌id.', false);
$this->addOption('forceUpdate', 'f', InputOption::VALUE_NEGATABLE, '是否对已经保存的数据进行强制更新.', false);
$this->addOption('onlyPlatform', 'o', InputOption::VALUE_NEGATABLE, '是否只对当前平台品牌更新.', false);
}
public function execute(InputInterface $input, OutputInterface $output): int
{
$this->setCommandConfigure($input->getOptions());
run(function () {
// 最大查询的品牌数量, 防止同时最大协程数都有子数据, 导致无法创建协程的问题。
$maxBrandExecuteCount = $this->maxCo / 2;
$currentBrandExecute = 0;
foreach ($this->_getTask() as $task) {
$currentBrandExecute++;
$this->createCoroutine(function () use ($task, &$currentBrandExecute) {
$this->spiderStart($task);
$currentBrandExecute--;
});
while (true) {
if ($currentBrandExecute > $maxBrandExecuteCount) {
Coroutine::sleep(1);
} else {
break;
}
}
}
Coroutine::sleep(60);
exit(0);
});
return 0;
}
private function _getTask(): \Generator
{
$query = AppBrand::query();
$brandId = $this->getCommandConfigure('brandId');
$onlyPlatform = $this->getCommandConfigure('onlyPlatform');
if ($brandId) {
$query->where(['id' => $brandId]);
} else {
$query->where('id', '>', 1)->when($onlyPlatform, fn($q) => $q->where('spider_origin', static::PLATFORM))->orderBy('id');
}
foreach ($query->cursor() as $row) {
yield $row;
}
}
protected function getTaskName($name): string
{
return strtolower(strtr($name, [
'.' => '-',
' ' => '-',
'&' => ''
]));
}
public function spiderStart($task): void
{
$brandName = $this->getTaskName($task->name);
$url = $this->getBaseUrl() . '/fashion-shows/designer/' . $brandName;
$this->logger->info(sprintf("[Command] brandName: {$this->getTaskName($task->name)}; spiderUrl: {$url}"));
// 取发布会列表
$showsList = $this->getShowsList($url);
foreach ($showsList as $list) {
$this->createCoroutine(function () use ($task, $list) {
$this->getDetail($task->id, $list);
});
}
}
protected function getShowsList($url)
{
list($request, $httpCode) = $this->request($url);
if ($httpCode == 200) {
preg_match_all('/window.__PRELOADED_STATE__ = ([\s\S]*?);<\/script>/', $request, $matches);
$val = json_decode(current(end($matches)), true);
return $val['transformed']['runwayDesignerContent']['designerCollections'] ?? [];
} else {
$this->logger->info('未找到数据.');
return [];
}
}
protected function getDetail(int $brandId, array $info)
{
$model = $this->getArticleModel(['brand' => $brandId, 'title' => $info['hed']]);
// 如果不是force update
// 不更新原来的数据
if ($model->id && $this->getCommandConfigure('forceUpdate') === false) {
return;
}
$model->title = $info['hed'];
$model->images = json_encode([]);
$model->platform = self::PLATFORM;
$model->brand = $brandId;
$model->module = 0;
$model->year = AppHelper::getYear($info['hed']);
// 获取图片
$pageUri = $info['url'];
$requestUrl = $this->getBaseUrl() . $pageUri . '/slideshow/collection';
$this->logger->info("正在匹配发布会详情 {$requestUrl}");
$model->source_url = $requestUrl;
$matches = [];
list($result, $httpCode) = $this->request($requestUrl);
if ($httpCode != 200 || !$result) {
$this->logger->warning($requestUrl . '请求失败.');
return;
}
preg_match_all('/window\.__PRELOADED_STATE__ = (.*?);</s', $result, $matches);
$saveUrl = $detailUrl = [];
if (count($matches) > 1) {
$val = json_decode(current($matches[1]), true);
$images = $val['transformed']['runwayGalleries']['galleries'][0]['items'] ?? false;
if ($images === false) {
$this->logger->warning($requestUrl . '获取图片失败.');
return;
}
foreach (is_array($images) ? $images : [] as $img) {
$saveUrl[] = [
'src' => $img['image']['sources']['xxl']['url']
];
foreach ($img['details'] ?? [] as $detail) {
$detailUrl[] = ['src' => $detail['image']['sources']['xxl']['url']];
}
}
$model->images = json_encode($saveUrl);
$model->cover = $saveUrl[0]['src'];
}
$model->save();
$this->logger->info("end: {$requestUrl}");
}
}