meng

File: //proc/self/root/proc/self/cwd/nueva/modules/roja45quotationsprofree/vendor/Html2Text/Html2Text.php
<?php

/*
 * Copyright (c) 2005-2007 Jon Abernathy <jon@chuggnutt.com>
 *
 * This script is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * The GNU General Public License can be found at
 * http://www.gnu.org/copyleft/gpl.html.
 *
 * This script is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 */

class Html2Text
{
	const ENCODING = 'UTF-8';

	protected $htmlFuncFlags;

	/**
	 * Contains the HTML content to convert.
	 *
	 * @type string
	 */
	protected $html;

	/**
	 * Contains the converted, formatted text.
	 *
	 * @type string
	 */
	protected $text;

	/**
	 * List of preg* regular expression patterns to search for,
	 * used in conjunction with $replace.
	 *
	 * @type array
	 * @see $replace
	 */
	protected $search = array(
		"/\r/",                                           // Non-legal carriage return
		"/[\n\t]+/",                                      // Newlines and tabs
		'/<head\b[^>]*>.*?<\/head>/i',                    // <head>
		'/<script\b[^>]*>.*?<\/script>/i',                // <script>s -- which strip_tags supposedly has problems with
		'/<style\b[^>]*>.*?<\/style>/i',                  // <style>s -- which strip_tags supposedly has problems with
		'/<i\b[^>]*>(.*?)<\/i>/i',                        // <i>
		'/<em\b[^>]*>(.*?)<\/em>/i',                      // <em>
		'/(<ul\b[^>]*>|<\/ul>)/i',                        // <ul> and </ul>
		'/(<ol\b[^>]*>|<\/ol>)/i',                        // <ol> and </ol>
		'/(<dl\b[^>]*>|<\/dl>)/i',                        // <dl> and </dl>
		'/<li\b[^>]*>(.*?)<\/li>/i',                      // <li> and </li>
		'/<dd\b[^>]*>(.*?)<\/dd>/i',                      // <dd> and </dd>
		'/<dt\b[^>]*>(.*?)<\/dt>/i',                      // <dt> and </dt>
		'/<li\b[^>]*>/i',                                 // <li>
		'/<hr\b[^>]*>/i',                                 // <hr>
		'/<div\b[^>]*>/i',                                // <div>
		'/(<table\b[^>]*>|<\/table>)/i',                  // <table> and </table>
		'/(<tr\b[^>]*>|<\/tr>)/i',                        // <tr> and </tr>
		'/<td\b[^>]*>(.*?)<\/td>/i',                      // <td> and </td>
		'/<span class="_html2text_ignore">.+?<\/span>/i', // <span class="_html2text_ignore">...</span>
		'/<(img)\b[^>]*alt=\"([^>"]+)\"[^>]*>/i',         // <img> with alt tag
	);

	/**
	 * List of pattern replacements corresponding to patterns searched.
	 *
	 * @type array
	 * @see $search
	 */
	protected $replace = array(
		'',                              // Non-legal carriage return
		' ',                             // Newlines and tabs
		'',                              // <head>
		'',                              // <script>s -- which strip_tags supposedly has problems with
		'',                              // <style>s -- which strip_tags supposedly has problems with
		'_\\1_',                         // <i>
		'_\\1_',                         // <em>
		"\n\n",                          // <ul> and </ul>
		"\n\n",                          // <ol> and </ol>
		"\n\n",                          // <dl> and </dl>
		"\t* \\1\n",                     // <li> and </li>
		" \\1\n",                        // <dd> and </dd>
		"\t* \\1",                       // <dt> and </dt>
		"\n\t* ",                        // <li>
		"\n-------------------------\n", // <hr>
		"<div>\n",                       // <div>
		"\n\n",                          // <table> and </table>
		"\n",                            // <tr> and </tr>
		"\t\t\\1\n",                     // <td> and </td>
		"",                              // <span class="_html2text_ignore">...</span>
		'[\\2]',                         // <img> with alt tag
	);

	/**
	 * List of preg* regular expression patterns to search for,
	 * used in conjunction with $entReplace.
	 *
	 * @type array
	 * @see $entReplace
	 */
	protected $entSearch = array(
		'/&#153;/i',                                     // TM symbol in win-1252
		'/&#151;/i',                                     // m-dash in win-1252
		'/&(amp|#38);/i',                                // Ampersand: see converter()
		'/[ ]{2,}/',                                     // Runs of spaces, post-handling
	);

	/**
	 * List of pattern replacements corresponding to patterns searched.
	 *
	 * @type array
	 * @see $entSearch
	 */
	protected $entReplace = array(
		'™',         // TM symbol
		'—',         // m-dash
		'|+|amp|+|', // Ampersand: see converter()
		' ',         // Runs of spaces, post-handling
	);

	/**
	 * List of preg* regular expression patterns to search for
	 * and replace using callback function.
	 *
	 * @type array
	 */
	protected $callbackSearch = array(
		'/<(h)[123456]( [^>]*)?>(.*?)<\/h[123456]>/i',           // h1 - h6
		'/[ ]*<(p)( [^>]*)?>(.*?)<\/p>[ ]*/si',                  // <p> with surrounding whitespace.
		'/<(br)[^>]*>[ ]*/i',                                    // <br> with leading whitespace after the newline.
		'/<(b)( [^>]*)?>(.*?)<\/b>/i',                           // <b>
		'/<(strong)( [^>]*)?>(.*?)<\/strong>/i',                 // <strong>
		'/<(th)( [^>]*)?>(.*?)<\/th>/i',                         // <th> and </th>
		'/<(a) [^>]*href=("|\')([^"\']+)\2([^>]*)>(.*?)<\/a>/i'  // <a href="">
	);

	/**
	 * List of preg* regular expression patterns to search for in PRE body,
	 * used in conjunction with $preReplace.
	 *
	 * @type array
	 * @see $preReplace
	 */
	protected $preSearch = array(
		"/\n/",
		"/\t/",
		'/ /',
		'/<pre[^>]*>/',
		'/<\/pre>/'
	);

	/**
	 * List of pattern replacements corresponding to patterns searched for PRE body.
	 *
	 * @type array
	 * @see $preSearch
	 */
	protected $preReplace = array(
		'<br>',
		'&nbsp;&nbsp;&nbsp;&nbsp;',
		'&nbsp;',
		'',
		'',
	);

	/**
	 * Temporary workspace used during PRE processing.
	 *
	 * @type string
	 */
	protected $preContent = '';

	/**
	 * Contains the base URL that relative links should resolve to.
	 *
	 * @type string
	 */
	protected $baseurl = '';

	/**
	 * Indicates whether content in the $html variable has been converted yet.
	 *
	 * @type boolean
	 * @see $html, $text
	 */
	protected $converted = false;

	/**
	 * Contains URL addresses from links to be rendered in plain text.
	 *
	 * @type array
	 * @see buildlinkList()
	 */
	protected $linkList = array();

	/**
	 * Various configuration options (able to be set in the constructor)
	 *
	 * @type array
	 */
	protected $options = array(
		'do_links' => 'inline', // 'none'
		// 'inline' (show links inline)
		// 'nextline' (show links on the next line)
		// 'table' (if a table of link URLs should be listed after the text.
		// 'bbcode' (show links as bbcode)

		'width' => 70,          //  Maximum width of the formatted text, in columns.
		//  Set this value to 0 (or less) to ignore word wrapping
		//  and not constrain text to a fixed-width column.
	);

	private function legacyConstruct($html = '', $fromFile = false, array $options = array())
	{
		$this->set_html($html, $fromFile);
		$this->options = array_merge($this->options, $options);
	}

	/**
	 * @param string $html    Source HTML
	 * @param array  $options Set configuration options
	 */
	public function __construct($html = '', $options = array())
	{
		// for backwards compatibility
		if (!is_array($options)) {
			return call_user_func_array(array($this, 'legacyConstruct'), func_get_args());
		}

		$this->html = $html;
		$this->options = array_merge($this->options, $options);
		$this->htmlFuncFlags = (PHP_VERSION_ID < 50400)
			? ENT_COMPAT
			: ENT_COMPAT | ENT_HTML5;
	}

	/**
	 * Set the source HTML
	 *
	 * @param string $html HTML source content
	 */
	public function setHtml($html)
	{
		$this->html = $html;
		$this->converted = false;
	}

	/**
	 * @deprecated
	 */
	public function set_html($html, $from_file = false)
	{
		if ($from_file) {
			throw new \InvalidArgumentException("Argument from_file no longer supported");
		}

		return $this->setHtml($html);
	}

	/**
	 * Returns the text, converted from HTML.
	 *
	 * @return string
	 */
	public function getText()
	{
		if (!$this->converted) {
			$this->convert();
		}

		return $this->text;
	}

	/**
	 * @deprecated
	 */
	public function get_text()
	{
		return $this->getText();
	}

	/**
	 * @deprecated
	 */
	public function print_text()
	{
		print $this->getText();
	}

	/**
	 * @deprecated
	 */
	public function p()
	{
		return $this->print_text();
	}

	/**
	 * Sets a base URL to handle relative links.
	 *
	 * @param string $baseurl
	 */
	public function setBaseUrl($baseurl)
	{
		$this->baseurl = $baseurl;
	}

	/**
	 * @deprecated
	 */
	public function set_base_url($baseurl)
	{
		return $this->setBaseUrl($baseurl);
	}

	protected function convert()
	{
		$origEncoding = mb_internal_encoding();
		mb_internal_encoding(self::ENCODING);

		$this->doConvert();

		mb_internal_encoding($origEncoding);
	}

	protected function doConvert()
	{
		$this->linkList = array();

		$text = trim($this->html);

		$this->converter($text);

		if ($this->linkList) {
			$text .= "\n\nLinks:\n------\n";
			foreach ($this->linkList as $i => $url) {
				$text .= '[' . ($i + 1) . '] ' . $url . "\n";
			}
		}

		$this->text = $text;

		$this->converted = true;
	}

	protected function converter(&$text)
	{
		$this->convertBlockquotes($text);
		$this->convertPre($text);
		$text = preg_replace($this->search, $this->replace, $text);
		$text = preg_replace_callback($this->callbackSearch, array($this, 'pregCallback'), $text);
		$text = strip_tags($text);
		$text = preg_replace($this->entSearch, $this->entReplace, $text);
		$text = html_entity_decode($text, $this->htmlFuncFlags, self::ENCODING);

		// Remove unknown/unhandled entities (this cannot be done in search-and-replace block)
		$text = preg_replace('/&([a-zA-Z0-9]{2,6}|#[0-9]{2,4});/', '', $text);

		// Convert "|+|amp|+|" into "&", need to be done after handling of unknown entities
		// This properly handles situation of "&amp;quot;" in input string
		$text = str_replace('|+|amp|+|', '&', $text);

		// Normalise empty lines
		$text = preg_replace("/\n\s+\n/", "\n\n", $text);
		$text = preg_replace("/[\n]{3,}/", "\n\n", $text);

		// remove leading empty lines (can be produced by eg. P tag on the beginning)
		$text = ltrim($text, "\n");

		if ($this->options['width'] > 0) {
			$text = wordwrap($text, $this->options['width']);
		}
	}

	/**
	 * Helper function called by preg_replace() on link replacement.
	 *
	 * Maintains an internal list of links to be displayed at the end of the
	 * text, with numeric indices to the original point in the text they
	 * appeared. Also makes an effort at identifying and handling absolute
	 * and relative links.
	 *
	 * @param  string $link          URL of the link
	 * @param  string $display       Part of the text to associate number with
	 * @param  null   $linkOverride
	 * @return string
	 */
	protected function buildlinkList($link, $display, $linkOverride = null)
	{
		$linkMethod = ($linkOverride) ? $linkOverride : $this->options['do_links'];
		if ($linkMethod == 'none') {
			return $display;
		}

		// Ignored link types
		if (preg_match('!^(javascript:|mailto:|#)!i', $link)) {
			return $display;
		}

		if (preg_match('!^([a-z][a-z0-9.+-]+:)!i', $link)) {
			$url = $link;
		} else {
			$url = $this->baseurl;
			if (mb_substr($link, 0, 1) != '/') {
				$url .= '/';
			}
			$url .= $link;
		}

		if ($linkMethod == 'table') {
			if (($index = array_search($url, $this->linkList)) === false) {
				$index = count($this->linkList);
				$this->linkList[] = $url;
			}

			return $display . ' [' . ($index + 1) . ']';
		} elseif ($linkMethod == 'nextline') {
			return $display . "\n[" . $url . ']';
		} elseif ($linkMethod == 'bbcode') {
			return sprintf('[url=%s]%s[/url]', $url, $display);
		} else { // link_method defaults to inline
			return $display . ' [' . $url . ']';
		}
	}

	protected function convertPre(&$text)
	{
		// get the content of PRE element
		while (preg_match('/<pre[^>]*>(.*)<\/pre>/ismU', $text, $matches)) {
			// Replace br tags with newlines to prevent the search-and-replace callback from killing whitespace
			$this->preContent = preg_replace('/(<br\b[^>]*>)/i', "\n", $matches[1]);

			// Run our defined tags search-and-replace with callback
			$this->preContent = preg_replace_callback(
				$this->callbackSearch,
				array($this, 'pregCallback'),
				$this->preContent
			);

			// convert the content
			$this->preContent = sprintf(
				'<div><br>%s<br></div>',
				preg_replace($this->preSearch, $this->preReplace, $this->preContent)
			);

			// replace the content (use callback because content can contain $0 variable)
			$text = preg_replace_callback(
				'/<pre[^>]*>.*<\/pre>/ismU',
				array($this, 'pregPreCallback'),
				$text,
				1
			);

			// free memory
			$this->preContent = '';
		}
	}

	/**
	 * Helper function for BLOCKQUOTE body conversion.
	 *
	 * @param string $text HTML content
	 */
	protected function convertBlockquotes(&$text)
	{
		if (preg_match_all('/<\/*blockquote[^>]*>/i', $text, $matches, PREG_OFFSET_CAPTURE)) {
			$originalText = $text;
			$start = 0;
			$taglen = 0;
			$level = 0;
			$diff = 0;
			foreach ($matches[0] as $m) {
				$m[1] = mb_strlen(substr($originalText, 0, $m[1]));
				if ($m[0][0] == '<' && $m[0][1] == '/') {
					$level--;
					if ($level < 0) {
						$level = 0; // malformed HTML: go to next blockquote
					} elseif ($level > 0) {
						// skip inner blockquote
					} else {
						$end = $m[1];
						$len = $end - $taglen - $start;
						// Get blockquote content
						$body = mb_substr($text, $start + $taglen - $diff, $len);

						// Set text width
						$pWidth = $this->options['width'];
						if ($this->options['width'] > 0) $this->options['width'] -= 2;
						// Convert blockquote content
						$body = trim($body);
						$this->converter($body);
						// Add citation markers and create PRE block
						$body = preg_replace('/((^|\n)>*)/', '\\1> ', trim($body));
						$body = '<pre>' . htmlspecialchars($body, $this->htmlFuncFlags, self::ENCODING) . '</pre>';
						// Re-set text width
						$this->options['width'] = $pWidth;
						// Replace content
						$text = mb_substr($text, 0, $start - $diff)
							. $body
							. mb_substr($text, $end + mb_strlen($m[0]) - $diff);

						$diff += $len + $taglen + mb_strlen($m[0]) - mb_strlen($body);
						unset($body);
					}
				} else {
					if ($level == 0) {
						$start = $m[1];
						$taglen = mb_strlen($m[0]);
					}
					$level++;
				}
			}
		}
	}

	/**
	 * Callback function for preg_replace_callback use.
	 *
	 * @param  array  $matches PREG matches
	 * @return string
	 */
	protected function pregCallback($matches)
	{
		switch (mb_strtolower($matches[1])) {
			case 'p':
				// Replace newlines with spaces.
				$para = str_replace("\n", " ", $matches[3]);

				// Trim trailing and leading whitespace within the tag.
				$para = trim($para);

				// Add trailing newlines for this para.
				return "\n" . $para . "\n";
			case 'br':
				return "\n";
			case 'b':
			case 'strong':
				return $this->toupper($matches[3]);
			case 'th':
				return $this->toupper("\t\t" . $matches[3] . "\n");
			case 'h':
				return $this->toupper("\n\n" . $matches[3] . "\n\n");
			case 'a':
				// override the link method
				$linkOverride = null;
				if (preg_match('/_html2text_link_(\w+)/', $matches[4], $linkOverrideMatch)) {
					$linkOverride = $linkOverrideMatch[1];
				}
				// Remove spaces in URL (#1487805)
				$url = str_replace(' ', '', $matches[3]);

				return $this->buildlinkList($url, $matches[5], $linkOverride);
		}

		return '';
	}

	/**
	 * Callback function for preg_replace_callback use in PRE content handler.
	 *
	 * @param  array  $matches PREG matches
	 * @return string
	 */
	protected function pregPreCallback(/** @noinspection PhpUnusedParameterInspection */ $matches)
	{
		return $this->preContent;
	}

	/**
	 * Strtoupper function with HTML tags and entities handling.
	 *
	 * @param  string $str Text to convert
	 * @return string Converted text
	 */
	protected function toupper($str)
	{
		// string can contain HTML tags
		$chunks = preg_split('/(<[^>]*>)/', $str, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE);

		// convert toupper only the text between HTML tags
		foreach ($chunks as $i => $chunk) {
			if ($chunk[0] != '<') {
				$chunks[$i] = $this->strtoupper($chunk);
			}
		}

		return implode($chunks);
	}

	/**
	 * Strtoupper multibyte wrapper function with HTML entities handling.
	 *
	 * @param  string $str Text to convert
	 * @return string Converted text
	 */
	protected function strtoupper($str)
	{
		$str = html_entity_decode($str, $this->htmlFuncFlags, self::ENCODING);
		$str = mb_strtoupper($str);
		$str = htmlspecialchars($str, $this->htmlFuncFlags, self::ENCODING);

		return $str;
	}
}