%PDF- %PDF-
Server IP : 37.220.80.31 / Your IP : 3.137.187.71 Web Server : Apache/2.4.52 (Ubuntu) System : Linux 3051455-guretool.twc1.net 5.15.0-107-generic #117-Ubuntu SMP Fri Apr 26 12:26:49 UTC 2024 x86_64 User : www-root ( 1010) PHP Version : 7.4.33 Disable Function : pcntl_alarm,pcntl_fork,pcntl_waitpid,pcntl_wait,pcntl_wifexited,pcntl_wifstopped,pcntl_wifsignaled,pcntl_wexitstatus,pcntl_wtermsig,pcntl_wstopsig,pcntl_signal,pcntl_signal_dispatch,pcntl_get_last_error,pcntl_strerror,pcntl_sigprocmask,pcntl_sigwaitinfo,pcntl_sigtimedwait,pcntl_exec,pcntl_getpriority,pcntl_setpriority, MySQL : OFF | cURL : ON | WGET : OFF | Perl : OFF | Python : OFF | Sudo : OFF | Pkexec : OFF Directory : /var/www/www-root/data/www/dev.artlot24.ru/bitrix/modules/main/lib/urlpreview/parser/ |
Upload File : |
<?php namespace Bitrix\Main\UrlPreview\Parser; use Bitrix\Main\Text\Encoding; use Bitrix\Main\UrlPreview\UrlPreview; use Bitrix\Main\Web\HttpClient; use Bitrix\Main\UrlPreview\HtmlDocument; use Bitrix\Main\UrlPreview\Parser; class Oembed extends Parser { const OEMBED_TYPE_XML = "text/xml+oembed"; const OEMBED_TYPE_JSON ="application/json+oembed"; /** @var string Possible values: (json|xml) */ protected $metadataType; /** @var string */ protected $metadataUrl; /** @var string */ protected $metadataEncoding; /** * Downloads and parses HTML's document metadata, formatted with oEmbed standard. * * @param HtmlDocument $document HTML document. * @param HttpClient|null $httpClient */ public function handle(HtmlDocument $document, HttpClient $httpClient = null) { if(!$this->detectOembedLink($document) || $this->metadataUrl == '') { return; } $isHttpClientPassed = true; if(!$httpClient) { $httpClient = $this->initHttpClient(); $isHttpClientPassed = false; } $rawMetadata = $this->getRawMetaData($httpClient); // if request was served through http - try to switch to https if( ( !$rawMetadata || $httpClient->getStatus() === 403 ) && mb_strpos($this->metadataUrl, 'http://') === 0) { if(!$isHttpClientPassed) { $httpClient = $this->initHttpClient(); } $metadataUrl = str_replace('http://', 'https://', $this->metadataUrl); $rawMetadata = $httpClient->get($metadataUrl); } if($rawMetadata === false) { return; } $parsedMetadata = $this->parseMetadata($rawMetadata); if($parsedMetadata !== false) { if($this->metadataEncoding <> '' && $document->getEncoding() !== $this->metadataEncoding) { $parsedMetadata = Encoding::convertEncoding($parsedMetadata, $this->metadataEncoding, $document->getEncoding()); } if($document->getTitle() == '' && $parsedMetadata['title'] != '') { $document->setTitle($parsedMetadata['title']); } if($document->getImage() == '' && $parsedMetadata['thumbnail_url'] != '') { $document->setImage($parsedMetadata['thumbnail_url']); } if($document->getEmdbed() == '' && $parsedMetadata['html'] != '') { $document->setEmbed($parsedMetadata['html']); } if($document->getExtraField('PROVIDER_NAME') == '' && $parsedMetadata['provider_name'] != '') { $document->setExtraField('PROVIDER_NAME', $parsedMetadata['provider_name']); } if($document->getExtraField('VIDEO_WIDTH') == '' && $parsedMetadata['width'] != '') { $document->setExtraField('VIDEO_WIDTH', $parsedMetadata['width']); } if($document->getExtraField('VIDEO_HEIGHT') == '' && $parsedMetadata['height'] != '') { $document->setExtraField('VIDEO_HEIGHT', $parsedMetadata['height']); } } } /** * @param HtmlDocument $document * @return bool */ protected function detectOembedLink(HtmlDocument $document) { preg_match_all('/<link.+?alternate.+?>/', $document->getHtml(), $linkElements); foreach($linkElements[0] as $linkElement) { $typeJson = (mb_strpos($linkElement, $this::OEMBED_TYPE_JSON) !== false); $typeXml = (mb_strpos($linkElement, $this::OEMBED_TYPE_XML) !== false); if($typeJson || $typeXml) { if(preg_match('/href=[\'"](.+?)[\'"]/', $linkElement, $attributes)) { $this->metadataType = ($typeJson ? 'json' : 'xml'); $this->metadataUrl = htmlspecialcharsback($attributes[1]); return true; } } } return false; } /** * @param string $rawMetadata * @return array|false */ protected function parseMetadata($rawMetadata) { switch($this->metadataType) { case 'json': return $this->parseJsonMetadata($rawMetadata); break; case 'xml': return $this->parseXmlMetadata($rawMetadata); break; } return false; } protected function parseJsonMetadata($rawMetadata) { $parsedMetadata = json_decode($rawMetadata, true); $this->metadataEncoding = 'UTF-8'; return $parsedMetadata; } /** * @param string $rawMetadata * @return array|false */ protected function parseXmlMetadata($rawMetadata) { $xml = new \CDataXML(); if($xml->LoadString($rawMetadata)) { //detect xml encoding if(preg_match('/<\?xml[^>]+?encoding=[\'"](.+?)[\'"]\?>/', $rawMetadata, $matches)) $this->metadataEncoding = $matches[1]; else $this->metadataEncoding = 'UTF-8'; $result = array(); $dom = $xml->GetTree(); $mainNode = $dom->elementsByName('oembed'); foreach($mainNode[0]->children as $node) { $result[$node->name] = $node->content; } return $result; } return false; } protected function getRawMetaData(HttpClient $httpClient) { $rawMetadata = $httpClient->get($this->metadataUrl); return $rawMetadata; } protected function initHttpClient(): HttpClient { $httpClient = new HttpClient(); $httpClient->setTimeout(5); $httpClient->setStreamTimeout(5); $httpClient->setHeader('User-Agent', UrlPreview::USER_AGENT, true); $httpClient->setPrivateIp(false); return $httpClient; } }