update
This commit is contained in:
200
app/Command/baidu/AutoPush.php
Normal file
200
app/Command/baidu/AutoPush.php
Normal file
@ -0,0 +1,200 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
namespace App\Command\baidu;
|
||||||
|
|
||||||
|
use App\Command\spider\BaseSpider;
|
||||||
|
use App\Model\AppNews;
|
||||||
|
use App\Model\AppWebsiteConfig;
|
||||||
|
use Hyperf\Command\Annotation\Command;
|
||||||
|
use Hyperf\DbConnection\Db;
|
||||||
|
use Hyperf\Logger\LoggerFactory;
|
||||||
|
use Psr\Container\ContainerInterface;
|
||||||
|
use Symfony\Component\Console\Input\InputInterface;
|
||||||
|
use Symfony\Component\Console\Input\InputOption;
|
||||||
|
use Symfony\Component\Console\Output\OutputInterface;
|
||||||
|
use function Swoole\Coroutine\run;
|
||||||
|
|
||||||
|
#[Command]
|
||||||
|
class AutoPush extends BaseSpider
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @var string
|
||||||
|
*/
|
||||||
|
protected string $baseUrl = 'http://data.zz.baidu.com/urls?site=https://www.jiangsucf.com&token=5CtAd8vD3QQazJjh';
|
||||||
|
|
||||||
|
protected const PLATFORM = 'elle-street';
|
||||||
|
|
||||||
|
public function __construct(protected ContainerInterface $container, LoggerFactory $loggerFactory)
|
||||||
|
{
|
||||||
|
parent::__construct('baidu:auto-push');
|
||||||
|
}
|
||||||
|
|
||||||
|
public function configure()
|
||||||
|
{
|
||||||
|
parent::configure();
|
||||||
|
// 普通收录
|
||||||
|
//使用说明
|
||||||
|
//1、普通收录工具可以向百度搜索主动推送资源,缩短爬虫发现网站链接的时间,不保证收录和展现效果。
|
||||||
|
//2、API提交和手动提交共享配额,sitemap提交配额不与其他方式共享,具体配额以站点页面显示数据为准 。配额不可累计,当日有效。
|
||||||
|
//3、若链接存在跳转关系,请直接提交跳转后链接。如网站换域名,需提交新域名资源;进行HTTPS改造页面,请提交HTTPS资源。
|
||||||
|
$this->setDescription('自动向百度推送');
|
||||||
|
}
|
||||||
|
|
||||||
|
public function execute(InputInterface $input, OutputInterface $output): int
|
||||||
|
{
|
||||||
|
$url = AppWebsiteConfig::query()->select(['app_domain', 'id'])->where('is_delete', 0)->get()->toArray();
|
||||||
|
$pushArray = [];
|
||||||
|
foreach ($url as $item) {
|
||||||
|
$article = AppNews::query()->where('platform', $item['id'])->select(['id'])->get()->toArray();
|
||||||
|
foreach ($article as $articleItem) {
|
||||||
|
$pushArray[] = 'https://' . $item['app_domain'] . '/news/' . $articleItem['id'];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
var_dump($pushArray);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
private function _getTask($brand): \Generator
|
||||||
|
{
|
||||||
|
$query = Db::table('app_brands');
|
||||||
|
if ($brand) {
|
||||||
|
$query->where(['id' => $brand]);
|
||||||
|
}
|
||||||
|
|
||||||
|
$query->where('id', '>', 1)->orderBy('id');
|
||||||
|
foreach ($query->cursor() as $row) {
|
||||||
|
yield $row;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private function _getTaskName($name): string
|
||||||
|
{
|
||||||
|
return strtolower(strtr($name, [
|
||||||
|
'.' => '-',
|
||||||
|
' ' => '-'
|
||||||
|
]));
|
||||||
|
}
|
||||||
|
|
||||||
|
public function spiderStart(): void
|
||||||
|
{
|
||||||
|
list($result, $httpCode) = $this->request($this->getBaseUrl() . '/fashion/street-style/');
|
||||||
|
|
||||||
|
preg_match_all('/<script id="json-ld" type="application\/ld\+json">([\s\S]*?)<\/script>/', $result, $matches);
|
||||||
|
|
||||||
|
if (!is_array($matches) && count($matches) < 1) {
|
||||||
|
$this->logger->info(self::getPlatform() . " 数据获取失败。");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
$val = json_decode(($matches[1][0]), true);
|
||||||
|
|
||||||
|
$articles = $val[0]['itemListElement'] ?? [];
|
||||||
|
|
||||||
|
if (!$articles) {
|
||||||
|
$this->logger->info(self::getPlatform() . " 文章数据获取失败。");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
$saveImages = [];
|
||||||
|
foreach ($articles as $article) {
|
||||||
|
list($result, $httpCode) = $this->request($article['url']);
|
||||||
|
|
||||||
|
preg_match_all('/<script id="json-ld" type="application\/ld\+json">([\s\S]*?)<\/script>/', $result, $matches);
|
||||||
|
|
||||||
|
if (isset($matches[1][0])) {
|
||||||
|
$val = json_decode($matches[1][0], true);
|
||||||
|
|
||||||
|
$images = $val['about']['itemListElement'];
|
||||||
|
|
||||||
|
foreach ($images as $image) {
|
||||||
|
$saveImages[] = $image['item']['image'];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var_dump($saveImages);
|
||||||
|
|
||||||
|
|
||||||
|
return;
|
||||||
|
|
||||||
|
|
||||||
|
$this->createCoroutine(function () use ($task) {
|
||||||
|
|
||||||
|
$brandName = $this->_getTaskName($task->name);
|
||||||
|
$url = $this->getBaseUrl() . '/fashion-shows/designer/' . $brandName;
|
||||||
|
$this->logger->info(sprintf("[Command] brandName: {$this->_getTaskName($task->name)}; spiderUrl: {$url}"));
|
||||||
|
|
||||||
|
// 取发布会列表
|
||||||
|
$showsList = $this->_getShowsList($url);
|
||||||
|
|
||||||
|
foreach ($showsList as $list) {
|
||||||
|
$this->createCoroutine(function () use ($task, $list) {
|
||||||
|
$this->_getDetail($task->id, $list);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private function _getShowsList($url)
|
||||||
|
{
|
||||||
|
list($request, $httpCode) = $this->request($url);
|
||||||
|
|
||||||
|
if ($httpCode == 200) {
|
||||||
|
preg_match_all('/window.__PRELOADED_STATE__ = ([\s\S]*?);<\/script>/', $request, $matches);
|
||||||
|
$val = json_decode(current(end($matches)), true);
|
||||||
|
return $val['transformed']['runwayDesignerContent']['designerCollections'] ?? [];
|
||||||
|
} else {
|
||||||
|
$this->logger->info('未找到数据.');
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private function _getDetail(int $brandId, array $info)
|
||||||
|
{
|
||||||
|
$model = $this->getArticleModel(['brand' => $brandId, 'title' => $info['hed']]);
|
||||||
|
|
||||||
|
$model->title = $info['hed'];
|
||||||
|
$model->images = json_encode([]);
|
||||||
|
$model->platform = self::getPlatform();
|
||||||
|
|
||||||
|
// 获取图片
|
||||||
|
$pageUri = $info['url'];
|
||||||
|
$requestUrl = $this->getBaseUrl() . $pageUri . '/slideshow/collection';
|
||||||
|
$this->logger->info("正在匹配发布会详情 {$requestUrl}");
|
||||||
|
|
||||||
|
$matches = [];
|
||||||
|
list($result, $httpCode) = $this->request($requestUrl);
|
||||||
|
|
||||||
|
if ($httpCode != 200 || !$result) {
|
||||||
|
$this->logger->warning($requestUrl . '请求失败.');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
preg_match_all('/window\.__PRELOADED_STATE__ = (.*?);</s', $result, $matches);
|
||||||
|
|
||||||
|
$saveUrl = [];
|
||||||
|
if (count($matches) > 1) {
|
||||||
|
$val = json_decode(current($matches[1]), true);
|
||||||
|
$images = $val['transformed']['runwayGalleries']['galleries'][0]['items'] ?? false;
|
||||||
|
|
||||||
|
if ($images === false) {
|
||||||
|
$this->logger->warning($requestUrl . '获取图片失败.');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach (is_array($images) ? $images : [] as $img) {
|
||||||
|
$saveUrl[] = [
|
||||||
|
'src' => $img['image']['sources']['xxl']['url']
|
||||||
|
];
|
||||||
|
}
|
||||||
|
$model->images = json_encode($saveUrl);
|
||||||
|
}
|
||||||
|
|
||||||
|
$model->save();
|
||||||
|
|
||||||
|
$this->logger->info("end: {$requestUrl}");
|
||||||
|
}
|
||||||
|
}
|
Reference in New Issue
Block a user