setDescription('自动采集vogue.com'); $this->addOption('brandId', 'b', InputOption::VALUE_OPTIONAL, '指定的品牌id.', false); $this->addOption('forceUpdate', 'f', InputOption::VALUE_NEGATABLE, '是否对已经保存的数据进行强制更新.', false); $this->addOption('onlyPlatform', 'o', InputOption::VALUE_NEGATABLE, '是否只对当前平台品牌更新.', false); } public function execute(InputInterface $input, OutputInterface $output): int { $this->setCommandConfigure($input->getOptions()); run(function () { // 最大查询的品牌数量, 防止同时最大协程数都有子数据, 导致无法创建协程的问题。 $maxBrandExecuteCount = $this->maxCo / 2; $currentBrandExecute = 0; foreach ($this->_getTask() as $task) { $currentBrandExecute++; $this->createCoroutine(function () use ($task, &$currentBrandExecute) { $this->spiderStart($task); $currentBrandExecute--; }); while (true) { if ($currentBrandExecute > $maxBrandExecuteCount) { Coroutine::sleep(1); } else { break; } } } Coroutine::sleep(60); exit(0); }); return 0; } private function _getTask(): \Generator { $query = AppBrand::query(); $brandId = $this->getCommandConfigure('brandId'); $onlyPlatform = $this->getCommandConfigure('onlyPlatform'); if ($brandId) { $query->where(['id' => $brandId]); } else { $query->where('id', '>', 1)->when($onlyPlatform, fn($q) => $q->where('spider_origin', static::PLATFORM))->orderBy('id'); } foreach ($query->cursor() as $row) { yield $row; } } protected function getTaskName($name): string { return strtolower(strtr($name, [ '.' => '-', ' ' => '-', '&' => '' ])); } public function spiderStart($task): void { $brandName = $this->getTaskName($task->name); $url = $this->getBaseUrl() . '/fashion-shows/designer/' . $brandName; $this->logger->info(sprintf("[Command] brandName: {$this->getTaskName($task->name)}; spiderUrl: {$url}")); // 取发布会列表 $showsList = $this->getShowsList($url); foreach ($showsList as $list) { $this->createCoroutine(function () use ($task, $list) { $this->getDetail($task->id, $list); }); } } protected function getShowsList($url) { list($request, $httpCode) = $this->request($url); if ($httpCode == 200) { preg_match_all('/window.__PRELOADED_STATE__ = ([\s\S]*?);<\/script>/', $request, $matches); $val = json_decode(current(end($matches)), true); return $val['transformed']['runwayDesignerContent']['designerCollections'] ?? []; } else { $this->logger->info('未找到数据.'); return []; } } protected function getDetail(int $brandId, array $info) { $model = $this->getArticleModel(['brand' => $brandId, 'title' => $info['hed']]); // 如果不是force update // 不更新原来的数据 if ($model->id && $this->getCommandConfigure('forceUpdate') === false) { return; } $model->title = $info['hed']; $model->images = json_encode([]); $model->platform = self::PLATFORM; $model->brand = $brandId; $model->module = 0; $model->year = AppHelper::getYear($info['hed']); // 获取图片 $pageUri = $info['url']; $requestUrl = $this->getBaseUrl() . $pageUri . '/slideshow/collection'; $this->logger->info("正在匹配发布会详情 {$requestUrl}"); $model->source_url = $requestUrl; $matches = []; list($result, $httpCode) = $this->request($requestUrl); if ($httpCode != 200 || !$result) { $this->logger->warning($requestUrl . '请求失败.'); return; } preg_match_all('/window\.__PRELOADED_STATE__ = (.*?); 1) { $val = json_decode(current($matches[1]), true); $images = $val['transformed']['runwayGalleries']['galleries'][0]['items'] ?? false; if ($images === false) { $this->logger->warning($requestUrl . '获取图片失败.'); return; } foreach (is_array($images) ? $images : [] as $img) { $saveUrl[] = [ 'src' => $img['image']['sources']['xxl']['url'] ]; foreach ($img['details'] ?? [] as $detail) { $detailUrl[] = ['src' => $detail['image']['sources']['xxl']['url']]; } } $model->images = json_encode($saveUrl); $model->cover = $saveUrl[0]['src']; } $model->save(); $this->logger->info("end: {$requestUrl}"); } }