2018-09-01 01:50:29 +02:00
|
|
|
<?php
|
|
|
|
/**
|
|
|
|
* Parses post markup in order to use specified spee.ch url for assets
|
|
|
|
*
|
|
|
|
* @package LBRYPress
|
|
|
|
*/
|
|
|
|
|
|
|
|
class LBRY_Speech_Parser
|
|
|
|
{
|
2018-10-29 18:10:43 +01:00
|
|
|
/**
|
2018-10-31 19:29:14 +01:00
|
|
|
* Replace img srcset attributes with Spee.ch urls
|
|
|
|
* Check https://developer.wordpress.org/reference/functions/wp_calculate_image_srcset/ hook for details
|
2018-10-29 18:10:43 +01:00
|
|
|
*/
|
2018-10-31 19:17:42 +01:00
|
|
|
public function replace_image_srcset($sources, $size_array, $image_src, $image_meta, $attachment_id)
|
2018-10-24 20:49:46 +02:00
|
|
|
{
|
2018-10-28 19:21:22 +01:00
|
|
|
$new_sources = $sources;
|
|
|
|
$sizes = $image_meta['sizes'];
|
2018-11-09 01:29:08 +01:00
|
|
|
$base_image = pathinfo($image_src)['basename'];
|
2018-10-28 19:21:22 +01:00
|
|
|
|
|
|
|
foreach ($sources as $width => $source) {
|
2018-11-09 01:29:08 +01:00
|
|
|
// Check to see if it is using base image first
|
|
|
|
if ($image_src == $source['url'] && key_exists('speech_asset_url', $image_meta)) {
|
|
|
|
$new_sources[$width]['url'] = $image_meta['speech_asset_url'];
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Otherwise, find the corresponding size
|
2018-10-31 07:26:19 +01:00
|
|
|
$speech_url = $this->find_speech_url_by_width($sizes, $width);
|
2018-10-28 19:21:22 +01:00
|
|
|
|
|
|
|
if ($speech_url) {
|
|
|
|
$new_sources[$width]['url'] = $speech_url;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return $new_sources;
|
2018-10-24 20:49:46 +02:00
|
|
|
}
|
|
|
|
|
2018-10-31 19:17:42 +01:00
|
|
|
/**
|
2018-10-31 19:29:14 +01:00
|
|
|
* Replaces the output of the 'wp_get_attachment_image_src' function
|
|
|
|
* Check https://developer.wordpress.org/reference/functions/wp_get_attachment_image_src/ for details
|
|
|
|
*/
|
|
|
|
public function replace_attachment_image_src($image, $attachment_id, $size)
|
|
|
|
{
|
|
|
|
// $time_start = microtime(true);
|
|
|
|
if (!$image) {
|
|
|
|
return $image;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Die if we don't have a speech_asset_url
|
|
|
|
$image_meta = wp_get_attachment_metadata($attachment_id);
|
|
|
|
if (!LBRY()->speech->is_published($image_meta)) {
|
|
|
|
return $image;
|
|
|
|
}
|
|
|
|
|
|
|
|
$new_image = $image;
|
|
|
|
$sizes = $image_meta['sizes'];
|
|
|
|
|
|
|
|
// If we have a given size, then use that immediately
|
|
|
|
if (is_string($size)) {
|
|
|
|
switch ($size) {
|
|
|
|
case 'full':
|
|
|
|
$new_image[0] = $image_meta['speech_asset_url'];
|
|
|
|
break;
|
|
|
|
case 'post-thumbnail':
|
|
|
|
if (LBRY()->speech->is_published($sizes['thumbnail'])) {
|
|
|
|
$new_image[0] = $sizes['thumbnail']['speech_asset_url'];
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
if (key_exists($size, $sizes) && LBRY()->speech->is_published($sizes[$size])) {
|
|
|
|
$new_image[0] = $sizes[$size]['speech_asset_url'];
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
// $time_end = microtime(true);
|
|
|
|
// $time = ($time_end - $time_start) * 1000;
|
|
|
|
// error_log("attachment image source in $time milliseconds");
|
|
|
|
return $new_image;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Otherwise, we can find it by the url provided
|
|
|
|
$speech_url = $this->find_speech_url_by_file_url($image_meta, $image[0]);
|
|
|
|
if ($speech_url) {
|
|
|
|
$new_image[0] = $speech_url;
|
|
|
|
}
|
|
|
|
|
|
|
|
return $new_image;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Scrape content for urls that have a speech equivalent and replace
|
|
|
|
* @param string $content The content to scrape
|
|
|
|
* @return string The content with Spee.ch urls replaced
|
2018-10-31 19:17:42 +01:00
|
|
|
*/
|
2018-10-31 19:29:14 +01:00
|
|
|
// COMBAK: Need to make this a bit faster. Sitting between 100 and 300ms currently
|
2018-10-29 18:10:43 +01:00
|
|
|
public function replace_urls_with_speech($content)
|
|
|
|
{
|
2018-10-31 19:29:14 +01:00
|
|
|
// $time_start = microtime(true);
|
2018-10-31 19:17:42 +01:00
|
|
|
$new_content = $content;
|
|
|
|
|
|
|
|
$assets = array();
|
|
|
|
|
|
|
|
// Get Images
|
|
|
|
$images = $this->scrape_images($content);
|
|
|
|
foreach ($images as $image) {
|
|
|
|
if ($src = $this->get_src_from_image_tag($image)) {
|
|
|
|
$assets[] = $src;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Get Videos
|
|
|
|
$videos = $this->scrape_videos($content);
|
|
|
|
foreach ($videos as $video) {
|
|
|
|
if ($src = $this->get_src_from_video_tag($video)) {
|
|
|
|
$assets[] = $src;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Replace with Speech Urls
|
|
|
|
foreach ($assets as $asset) {
|
|
|
|
$id = $this->rigid_attachment_url_to_postid($asset);
|
|
|
|
$meta = wp_get_attachment_metadata($id);
|
|
|
|
|
|
|
|
if ($meta && LBRY()->speech->is_published($meta) && $speech_url = $this->find_speech_url_by_file_url($meta, $asset)) {
|
|
|
|
$new_content = str_replace($asset, $speech_url, $new_content);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-10-31 19:29:14 +01:00
|
|
|
// $time_end = microtime(true);
|
|
|
|
// $time = ($time_end - $time_start) * 1000;
|
|
|
|
// error_log("replace content urls in $time milliseconds");
|
2018-10-31 19:17:42 +01:00
|
|
|
return $new_content;
|
2018-10-29 18:10:43 +01:00
|
|
|
}
|
|
|
|
|
2018-10-31 19:17:42 +01:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Scrapes all image tags from content
|
|
|
|
* @param string $content The content to scrape
|
|
|
|
* @return array Array of image tags, empty if none found
|
|
|
|
*/
|
|
|
|
public function scrape_images($content)
|
|
|
|
{
|
2018-11-09 01:29:08 +01:00
|
|
|
// Return empty array if no images
|
|
|
|
if (!$content) {
|
|
|
|
return array();
|
|
|
|
}
|
2018-10-31 19:17:42 +01:00
|
|
|
// Find all images
|
|
|
|
preg_match_all('/<img [^>]+>/', $content, $images);
|
|
|
|
|
|
|
|
// Check to make sure we have results
|
|
|
|
$images = empty($images[0]) ? array() : $images[0];
|
|
|
|
|
|
|
|
return array_unique($images);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Scrapes all video shortcodes from content
|
|
|
|
* @param string $content The content to scrape
|
|
|
|
* @return array Array of video tags, empty if none found
|
|
|
|
*/
|
|
|
|
public function scrape_videos($content)
|
|
|
|
{
|
2018-11-09 01:29:08 +01:00
|
|
|
// Return empty array if no videos
|
|
|
|
if (!$content) {
|
|
|
|
return array();
|
|
|
|
}
|
|
|
|
|
2018-10-31 19:17:42 +01:00
|
|
|
// Only MP4 videos for now
|
|
|
|
preg_match_all('/\[video.*mp4=".*".*\]/', $content, $videos);
|
|
|
|
$videos = empty($videos[0]) ? array() : $videos[0];
|
|
|
|
|
|
|
|
return array_unique($videos);
|
2018-10-31 07:26:19 +01:00
|
|
|
}
|
|
|
|
|
2018-10-29 18:10:43 +01:00
|
|
|
/**
|
2018-10-31 19:17:42 +01:00
|
|
|
* Retrives an attachment ID via an html tag
|
|
|
|
* @param string $type Can be 'image' or 'mp4'
|
|
|
|
* @param string $tag The Tag / shortcode to scrub for an ID
|
|
|
|
* @return int|bool An ID if one is found, false otherwise
|
|
|
|
*/
|
|
|
|
public function get_attachment_id_from_tag($type, $tag)
|
|
|
|
{
|
|
|
|
$attachment_id = false;
|
|
|
|
switch ($type) {
|
|
|
|
case 'image':
|
|
|
|
// Looks for wp image class first, if not, pull id from source
|
|
|
|
if (preg_match('/wp-image-([0-9]+)/i', $tag, $class_id)) {
|
|
|
|
$attachment_id = absint($class_id[1]);
|
|
|
|
} elseif ($src = $this->get_src_from_image_tag($tag)) {
|
|
|
|
$attachment_id = $this->rigid_attachment_url_to_postid($src);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'mp4':
|
|
|
|
if ($src = $this->get_src_from_video_tag($tag)) {
|
|
|
|
$attachment_id = $this->rigid_attachment_url_to_postid($src);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return $attachment_id;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Pulls src from image tag
|
|
|
|
* @param string $image HTML Image tag
|
|
|
|
* @return string The source if found and is a local source, otherwise false
|
|
|
|
*/
|
|
|
|
private function get_src_from_image_tag($image)
|
|
|
|
{
|
|
|
|
if (preg_match('/src="((?:https?:)?\/\/[^"]+)"/', $image, $src) && $src[1]) {
|
|
|
|
if ($this->is_local($src[1])) {
|
|
|
|
return $src[1];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Pulls src from video tag
|
|
|
|
* @param string $video The video shortcode
|
|
|
|
* @return string The source if found and is a local source, otherwise false
|
|
|
|
*/
|
|
|
|
private function get_src_from_video_tag($video)
|
|
|
|
{
|
|
|
|
if (preg_match('/mp4="((?:https?:)?\/\/[^"]+)"/', $video, $src) && $src[1]) {
|
|
|
|
if ($this->is_local($src[1])) {
|
|
|
|
return $src[1];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Retrieves a speech_asset_url based sizes meta provided
|
|
|
|
* @param array $sizes Image sizes meta array
|
|
|
|
* @param string $width The width to look for
|
|
|
|
* @return string An asset url if found, false if not
|
2018-10-29 18:10:43 +01:00
|
|
|
*/
|
2018-10-31 07:26:19 +01:00
|
|
|
private function find_speech_url_by_width($sizes, $width)
|
2018-09-01 01:50:29 +02:00
|
|
|
{
|
2018-10-28 19:21:22 +01:00
|
|
|
foreach ($sizes as $key => $size) {
|
|
|
|
if ($size['width'] == $width && key_exists('speech_asset_url', $size)) {
|
|
|
|
return $size['speech_asset_url'];
|
|
|
|
}
|
|
|
|
}
|
2018-09-14 02:34:11 +02:00
|
|
|
|
2018-10-28 19:21:22 +01:00
|
|
|
return false;
|
|
|
|
}
|
2018-09-14 02:34:11 +02:00
|
|
|
|
2018-10-31 19:17:42 +01:00
|
|
|
/**
|
|
|
|
* Retrieves a speech_asset_url based on a passed url
|
|
|
|
* @param array $meta The attachment meta data for the asset
|
|
|
|
* @param string $url The URL of the asset being exchanged
|
|
|
|
* @return string The URL of the Speech Asset
|
|
|
|
*/
|
|
|
|
private function find_speech_url_by_file_url($meta, $url)
|
2018-10-31 07:26:19 +01:00
|
|
|
{
|
2018-10-31 19:17:42 +01:00
|
|
|
// See if this looks like video meta
|
|
|
|
if (!key_exists('file', $meta) && key_exists('mime_type', $meta) && $meta['mime_type'] == 'video/mp4') {
|
|
|
|
return $meta['speech_asset_url'];
|
|
|
|
}
|
|
|
|
|
|
|
|
$pathinfo = pathinfo($url);
|
|
|
|
$basename = $pathinfo['basename'];
|
|
|
|
|
|
|
|
// Check main file or if $meta is just a url (video) first
|
|
|
|
if (key_exists('file', $meta) && $basename == wp_basename($meta['file'])) {
|
|
|
|
return $meta['speech_asset_url'];
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check to see if we have a meta option here
|
|
|
|
if (key_exists('sizes', $meta)) {
|
|
|
|
// Then check sizes
|
|
|
|
foreach ($meta['sizes'] as $size => $meta) {
|
|
|
|
if ($basename == $meta['file'] && key_exists('speech_asset_url', $meta)) {
|
|
|
|
return $meta['speech_asset_url'];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Couldn't make a match
|
|
|
|
return false;
|
2018-10-31 07:26:19 +01:00
|
|
|
}
|
|
|
|
|
2018-10-31 19:17:42 +01:00
|
|
|
/**
|
|
|
|
* Checks to see if a url is local to this installation
|
|
|
|
* @param string $url
|
|
|
|
* @return boolean
|
|
|
|
*/
|
|
|
|
private function is_local($url)
|
2018-10-28 19:21:22 +01:00
|
|
|
{
|
2018-10-31 19:17:42 +01:00
|
|
|
if (strpos($url, home_url()) !== false) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Checks for image crop sizes and filters out query params
|
|
|
|
* Courtesy of this post: http://bordoni.me/get-attachment-id-by-image-url/
|
|
|
|
* @param string $url The url of the attachment you want an ID for
|
|
|
|
* @return int The found post_id
|
|
|
|
*/
|
|
|
|
private function rigid_attachment_url_to_postid($url)
|
|
|
|
{
|
|
|
|
$scrubbed_url = strtok($url, '?'); // Clean up query params first
|
|
|
|
$post_id = attachment_url_to_postid($scrubbed_url);
|
|
|
|
|
|
|
|
if (! $post_id) {
|
|
|
|
$dir = wp_upload_dir();
|
|
|
|
$path = $scrubbed_url;
|
|
|
|
|
|
|
|
if (0 === strpos($path, $dir['baseurl'] . '/')) {
|
|
|
|
$path = substr($path, strlen($dir['baseurl'] . '/'));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (preg_match('/^(.*)(\-\d*x\d*)(\.\w{1,})/i', $path, $matches)) {
|
|
|
|
$url = $dir['baseurl'] . '/' . $matches[1] . $matches[3];
|
|
|
|
$post_id = attachment_url_to_postid($url);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return (int) $post_id;
|
2018-09-01 01:50:29 +02:00
|
|
|
}
|
|
|
|
}
|