powered by nequal

Changeset 1468 -- 2009-12-20 06:58:04

Comment
[Package Release] Diggin_Http_Response_CharactorEncoding

Diffs

Diggin_Http_Response_CharactorEncoding/tags/release-0.1.0-20091220065804/Diggin/Http/Response/CharactorEncoding.php

@@ -0,0 +1,194 @@
+<?php
+
+/**
+ * Original code borrowed from HTMLScraping
+ *
+ * @see http://www.rcdtokyo.com/etc/htmlscraping/
+ *
+ * ---------------------------------------------------------------------
+ * HTMLScraping class
+ * ---------------------------------------------------------------------
+ * PHP versions 5 (5.1.3 and later)
+ * ---------------------------------------------------------------------
+ * LICENSE: This source file is subject to the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * either version 2.1 of the License, or any later version
+ * that is available through the world-wide-web at the following URI:
+ * http://www.gnu.org/licenses/lgpl.html
+ * If you did not have a copy of the GNU Lesser General Public License
+ * and are unable to obtain it through the web, please write to
+ * the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ---------------------------------------------------------------------
+ */
+
+/**
+ * Diggin - Simplicity PHP Library
+ *
+ * @category   Diggin
+ * @package    Diggin_Http
+ * @subpackage Response
+ */
+class Diggin_Http_Response_CharactorEncoding
+{
+    const DETECT_ORDER = 'ASCII, JIS, UTF-8, EUC-JP, SJIS';
+
+    /**
+     * @var string $_detectOrder
+     */
+    private static $_detectOrder = 'ASCII, JIS, UTF-8, EUC-JP, SJIS';
+
+    /**
+     * Set detect-order (static)
+     *
+     * @param string $order
+     */
+    public static function setDetectOrder($order)
+    {
+        if ($order === false) {
+            self::$_detectOrder = self::DETECT_ORDER;
+        } else {
+            self::$_detectOrder = $order;
+        }
+    }
+
+    /**
+     * Get detecting order
+     *
+     * @return string
+     */
+    public static function getDetectOrder()
+    {
+        return self::$_detectOrder;
+    }
+
+    /**
+     * Convert character encoding - mbstring or iconv
+     *
+     * @param mixed $vars
+     * @param string $encodingfrom
+     * @param string $encodingto
+     * @return mixed
+     * @throws Diggin_Http_Response_CharactorEncoding_Exception
+     */
+    public static function mbconvert($vars, $encodingfrom, $encodingto = 'UTF-8')
+    {
+        if (extension_loaded('mbstring')) {
+            @mb_convert_variables($encodingto, $encodingfrom, $vars);
+        } else {
+            if (is_string($vars)) {
+                $vars = array($vars);
+            }
+            foreach ($vars as $key => $value) {
+                if (false === $convertVars[$key] = @iconv($encodingfrom, $encodingto, $value)) {
+                    require_once 'Diggin/Http/Response/CharactorEncoding/Exception.php';
+                    throw new Diggin_Http_Response_CharactorEncoding_Exception('Failed converting character encoding.');
+                }
+            }
+        }
+
+        return $vars;
+    }
+
+    /**
+     * Create Wrapper instance accoring param's Response Object
+     *
+     * @param Object $response
+     * @param string $encodingto
+     * @return mixed
+     */
+    public static function createWrapper($response, $encodingto = 'UTF-8')
+    {
+        if ($response instanceof Zend_Http_Response) {
+            $detect = self::detect($response->getBody(), $response->getHeader('content-type'));
+            require_once 'Diggin/Http/Response/CharactorEncoding/Wrapper/Zf.php';
+            return Diggin_Http_Response_CharactorEncoding_Wrapper_Zf::createWrapper($response, $detect, $encodingto);
+        } else {
+            require_once 'Diggin/Http/Response/CharactorEncoding/Exception.php';
+            throw new Diggin_Http_Response_CharactorEncoding_Exception('Unknown Object Type..');
+        }
+    }
+
+    /**
+     * Detect response's character code name
+     *
+     * @param string $responseBody
+     * @param string $contentType
+     * @return string $encoding
+     */
+    public static function detect($responseBody, $contentType = null)
+    {
+        $encoding = false;
+        if (isset($contentType)) {
+            $encoding = self::_getCharsetFromCType($contentType);
+        }
+        if (!$encoding and preg_match_all('/<meta\b[^>]*?>/si', $responseBody, $matches)) {
+            foreach ($matches[0] as $value) {
+                if (strtolower(self::_getAttribute('http-equiv', $value)) == 'content-type'
+                    and false !== $encoding = self::_getAttribute('content', $value)) {
+                    $encoding = self::_getCharsetFromCType($encoding);
+                    break;
+                }
+            }
+        }
+
+        /*
+         * Use mbstring to detect character encoding if available.
+         */
+        if (extension_loaded('mbstring') and !$encoding) {
+            $detectOrder = mb_detect_order();
+            mb_detect_order(self::getDetectOrder());
+            if (false === $encoding = mb_preferred_mime_name(mb_detect_encoding($responseBody))) {
+                mb_detect_order($detectOrder);//restore
+                require_once 'Diggin/Http/Response/CharactorEncoding/Exception.php';
+                throw new Diggin_Http_Response_CharactorEncoding_Exception('Failed detecting character encoding.');
+            }
+            mb_detect_order($detectOrder);//restore
+        }
+
+        return $encoding;
+    }
+
+    /**
+     * Get Charset From Ctype
+     *
+     * @param  string  $string
+     * @return mixed
+     */
+    protected static function _getCharsetFromCType($string)
+    {
+        $array = explode(';', $string);
+        /* array_walk($array, create_function('$item', 'return trim($item);')); */
+        if (isset($array[1])) {
+            $array = explode('=', $array[1]);
+            if (isset($array[1])) {
+                $charset = trim($array[1]);
+                if (preg_match('/^UTF-?8$/i', $charset)) {
+                    return 'UTF-8';
+                } elseif (function_exists('mb_preferred_mime_name')) {
+                    return @mb_preferred_mime_name($charset);
+                } else {
+                    return $charset;
+                }
+            }
+        }
+        return false;
+    }
+
+    /**
+     * Get Attribute from meta-tags
+     *
+     * @param string $name:
+     * @param string $string:
+     * @return mixed
+     */
+    protected static function _getAttribute($name, $string)
+    {
+        $search = "'[\s\'\"]\b".$name."\b\s*=\s*([^\s\'\">]+|\'[^\']+\'|\"[^\"]+\")'si";
+        if (preg_match($search, $string, $matches)) {
+            return preg_replace('/^\s*[\'\"](.+)[\'\"]\s*$/s', '$1', $matches[1]);
+        } else {
+            return false;
+        }
+    }
+}

Diggin_Http_Response_CharactorEncoding/tags/release-0.1.0-20091220065804/Diggin/Http/Response/CharactorEncoding/Wrapper/Zf.php

@@ -0,0 +1,105 @@
+<?php
+
+/**
+ * Diggin - Simplicity PHP Library
+ *
+ * @category   Diggin
+ * @package    Diggin_Http
+ * @subpackage Response_CharactorEncoding
+ */
+
+/** Zend_Http_Response */
+require_once 'Zend/Http/Response.php';
+/** Diggin_Http_Response_CharactorEncoding_Wrapper_WrapperInterface */
+require_once 'Diggin/Http/Response/CharactorEncoding/Wrapper/WrapperInterface.php';
+
+class Diggin_Http_Response_CharactorEncoding_Wrapper_Zf
+    extends Zend_Http_Response implements Diggin_Http_Response_CharactorEncoding_Wrapper_WrapperInterface
+{
+    /**
+     * @var string character code names before conversion
+     */
+    private $_encodingFrom;
+
+    /**
+     * @var The type of encoding
+     */
+    private $_encodingTo;
+
+    /**
+     * Create wrapper instance
+     *
+     * @param Zend_Http_Response $response
+     * @param string $encoding_from
+     * @param string $encoding_to
+     * @return Diggin_Http_Response_CharactorEncoding_Wrapper_Zf
+     */
+    public static function createWrapper($response, $encoding_from, $encoding_to = 'UTF-8')
+    {
+        $httpResponse = new self($response->getStatus(),
+                                 $response->getHeaders(),
+                                 $response->getRawBody(),
+                                 $response->getVersion(),
+                                 $response->getMessage());
+
+        $httpResponse->setEncodingFrom($encoding_from);
+        $httpResponse->setEncodingTo($encoding_to);
+
+        return $httpResponse;
+    }
+
+    /**
+     * Get converted response's body
+     *
+     * @return string
+     */
+    public function getBody()
+    {
+        require_once 'Diggin/Http/Response/CharactorEncoding.php';
+        $body = Diggin_Http_Response_CharactorEncoding::mbconvert(parent::getBody(),
+                                                       $this->getEncodingFrom(),
+                                                       $this->getEncodingTo());
+        return $body;
+    }
+
+
+    /**
+     * Set character code name before conversion
+     *
+     * @param string $encoding_from
+     */
+    final public function setEncodingFrom($encoding_from)
+    {
+        $this->_encodingFrom = $encoding_from;
+    }
+
+    /**
+     * Get character code name before conversion
+     *
+     * @return string
+     */
+    final public function getEncodingFrom()
+    {
+        return $this->_encodingFrom;
+    }
+
+    /**
+     * Set charactor code name that response's body is being converted to
+     *
+     * @param string $encoding_to
+     */
+    final public function setEncodingTo($encoding_to)
+    {
+        $this->_encodingTo = $encoding_to;
+    }
+
+    /**
+     * Get charactor code name that response's body is being converted to
+     *
+     * @return string
+     */
+    final public function getEncodingTo()
+    {
+        return $this->_encodingTo;
+    }
+}

Diggin_Http_Response_CharactorEncoding/tags/release-0.1.0-20091220065804/Diggin/Http/Response/CharactorEncoding/Wrapper/WrapperInterface.php

@@ -0,0 +1,21 @@
+<?php
+
+/**
+ * Diggin - Simplicity PHP Library
+ *
+ * @category   Diggin
+ * @package    Diggin_Http
+ * @subpackage Response_CharactorEncoding
+ */
+interface Diggin_Http_Response_CharactorEncoding_Wrapper_WrapperInterface
+{
+    /**
+     * Create wrapper instance
+     *
+     * @param Zend_Http_Response $response
+     * @param string $encoding_from
+     * @param string $encoding_to
+     * @return Diggin_Http_Response_CharactorEncoding_Wrapper_WrapperInterface
+     */
+    public static function createWrapper($response, $encoding_from, $encoding_to = 'UTF-8');
+}

Diggin_Http_Response_CharactorEncoding/tags/release-0.1.0-20091220065804/Diggin/Http/Response/CharactorEncoding/Exception.php

@@ -0,0 +1,32 @@
+<?php
+/**
+ * Diggin - Simplicity PHP Library
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license.
+ * http://diggin.musicrider.com/LICENSE
+ *
+ * @category   Diggin
+ * @package    Diggin_Http
+ * @subpackage Response_CharactorEncoding
+ * @copyright  2006-2009 sasezaki (http://diggin.musicrider.com)
+ * @license    http://diggin.musicrider.com/LICENSE     New BSD License
+ */
+
+//is readble ? (borrowd from Zend_Loader::isReadable)
+if (!$fh = @fopen('Diggin/Http/Response/Exception.php', 'r', true)) {
+    class Diggin_Exception extends Exception{}
+    class Diggin_Http_Exception extends Diggin_Exception{}
+    class Diggin_Http_Response_Exception extends Diggin_Http_Exception{}
+    class Diggin_Http_Response_CharactorEncoding_Exception extends Diggin_Http_Response_Exception{}
+} else {
+    @fclose($fh);
+    /**
+     * @see Diggin_Http_Response_Exception
+     */
+    require_once 'Diggin/Http/Response/Exception.php';
+    class Diggin_Http_Response_CharactorEncoding_Exception extends Diggin_Http_Response_Exception
+    {}
+}
+