powered by nequal
Home » CSV_Iterator » Timeline » 1058

Changeset 1058 -- 2009-08-04 18:29:14

Comment
[Add Tag:Release] CSV_Iterator

Diffs

CSV_Iterator/tags/0.1.0-beta/tests/src/CSV/CSV_IteratorTest.php

@@ -0,0 +1,130 @@
+<?php
+ini_set("include_path", "../src/CSV".PATH_SEPARATOR."../../../src/CSV".PATH_SEPARATOR.ini_get("include_path"));
+require_once 'PHPUnit/Framework.php';
+
+require_once 'Iterator.php';
+
+/**
+ * Test class for CSV_Iterator.
+ * Generated by PHPUnit on 2009-07-31 at 18:25:12.
+ */
+class CSV_IteratorTest extends PHPUnit_Framework_TestCase
+{
+/**
+ * Tears down the fixture, for example, closes a network connection.
+ * This method is called after a test is executed.
+ *
+ * @access protected
+ */
+    protected function tearDown()
+    {
+    }
+
+    /**
+     * @dataProvider constructData
+     */
+    public function testRewind($file, $encoding, $delimiter, $enclosure, $expected) {
+        $obj = new CSV_Iterator(realpath(dirname(__FILE__) . '/../../fixtures/' . $file), $encoding, $delimiter, $enclosure);
+        $obj->rewind();
+        $this->assertEquals(current($expected), $obj->current());
+    }
+
+    /**
+     * @dataProvider constructData
+     */
+    public function testCurrent($file, $encoding, $delimiter, $enclosure, $expected) {
+        $obj = new CSV_Iterator(realpath(dirname(__FILE__) . '/../../fixtures/' . $file), $encoding, $delimiter, $enclosure);
+        $this->assertEquals(current($expected), $obj->current());
+    }
+
+    /**
+     * @todo Implement testSetRowLength().
+     */
+    public function testSetRowLength() {
+    // Remove the following lines when you implement this test.
+        $this->markTestIncomplete(
+            'This test has not been implemented yet.'
+        );
+    }
+
+    /**
+     * @dataProvider constructData
+     */
+    public function testForeach($file, $encoding, $delimiter, $enclosure, $expected)
+    {
+        reset($expected);
+        $count = 0;
+        foreach (new CSV_Iterator(realpath(dirname(__FILE__) . '/../../fixtures/' . $file), $encoding, $delimiter, $enclosure) AS $key=>$row) {
+            $this->assertEquals(current($expected), $row, 'at ' . $key);
+            next($expected);
+            $count++;
+        }
+        $this->assertEquals(count($expected), $count, 'does not loop all.');
+    }
+
+    public function constructData()
+    {
+        return array(
+            array('withHeader.csv', 'utf-8', ',', '"', array(
+                    array('header1'=>'1-1', 'header 2'=>'1-2', 'header3'=>'1-3'),
+                    array('header1'=>'2 1', 'header 2'=>'2 2', 'header3'=>'2 3'),
+                    array('header1'=>'3
+1', 'header 2'=>'
+3
+
+2
+', 'header3'=>'
+3 3'),
+                )),
+            array('withHeader.ja.utf-8.csv', 'utf-8', ',', '"', array(
+                    array("あいうえお"=>'一の一',"漢字ヘッダ"=>'一の弐',"漢字 ヘッダ"=>'壱の参'),
+                    array("あいうえお"=>'弐 壱',"漢字ヘッダ"=>'弐 弐',"漢字 ヘッダ"=>'弐 参'),
+                )),
+             array('withHeader.ja.shift_jis.csv', 'shift_jis', ',', '"', array(
+                    array("あいうえお"=>'一の一',"漢字ヘッダ"=>'一の弐',"漢字 ヘッダ"=>'壱の参'),
+                    array("あいうえお"=>'弐 壱',"漢字ヘッダ"=>'弐 弐',"漢字 ヘッダ"=>'弐 参'),
+                )),
+             array('enclosureWithHeader.ja.shift_jis.csv', 'shift_jis', ',', '@', array(
+                    array("あいうえお"=>'一の一',"漢字ヘッダ"=>'一の弐',"漢字 ヘッダ"=>'ァゼソゾタダチボポマミ'),
+                    array("あいうえお"=>'弐 壱',"漢字ヘッダ"=>'弐 弐',"漢字 ヘッダ"=>'弐 参'),
+                )),
+             array('withHeader.ja.iso-2022-jp.csv', 'iso-2022-jp', ',', '"', array(
+                    array("あいうえお"=>'一の一',"漢字ヘッダ"=>'一の弐',"漢字 ヘッダ"=>'壱の参'),
+                    array("あいうえお"=>'弐 壱',"漢字ヘッダ"=>'弐 弐',"漢字 ヘッダ"=>'弐 参'),
+                )),
+             array('escapeWithHeader.csv', 'utf-8', ',', '"', array(
+                    array('"a'=>'c"c','b"'=>'"'),
+                )),
+             array('withHeader.utf-16.csv', 'utf-16', ',', '"', array(
+                    array('Т'=>'夢','逢'=>'琢'),
+                )),
+             array('withHeader.utf-16le.csv', 'utf-16', ',', '"', array(
+                    array('Т'=>'夢','逢'=>'琢
+あ'),
+                )),
+             array('withHeader.utf-16be.csv', 'utf-16', ',', '"', array(
+                    array('Т'=>'夢','逢'=>'琢
+琢'),
+                )),
+            array('withHeaderWithoutEOLatEOF.csv', 'ascii', ',', '"', array(
+                    array('header1'=>'value1', 'header2'=>'value2'),
+                )),
+       );
+    }
+
+    protected static function convert_array_encoding(array $array, $to_encoding, $from_encoding = null)
+    {
+        $ret = array();
+        foreach($array AS $key=>$var) {
+            $key = mb_convert_encoding($key, $to_encoding, $from_encoding);
+            if(is_string($var)) {
+                $var = mb_convert_encoding($var, $to_encoding, $from_encoding);
+            } elseif(is_array($var)) {
+                $var = self::convert_array_encoding($var, $to_encoding, $from_encoding);
+            }
+            $ret[$key] = $var;
+        }
+        return $ret;
+    }
+}
+?>
属性に変更があったパス: CSV_Iterator/tags/0.1.0-beta/tests/src/CSV/CSV_IteratorTest.php
___________________________________________________________________
名前: svn:keywords
+ id
名前: svn:eol-style
+ native

CSV_Iterator/tags/0.1.0-beta/tests/fixtures/escapeWithHeader.csv

@@ -0,0 +1,2 @@
+"""a","b"""
+"c""c",""""

CSV_Iterator/tags/0.1.0-beta/tests/fixtures/withHeader.utf-16.csv

表示できません: バイナリ形式としてマークされたファイルです。
svn:mime-type = application/octet-stream
属性に変更があったパス: CSV_Iterator/tags/0.1.0-beta/tests/fixtures/withHeader.utf-16.csv
___________________________________________________________________
名前: svn:mime-type
+ application/octet-stream

CSV_Iterator/tags/0.1.0-beta/tests/fixtures/withHeader.utf-16le.csv

表示できません: バイナリ形式としてマークされたファイルです。
svn:mime-type = application/octet-stream
属性に変更があったパス: CSV_Iterator/tags/0.1.0-beta/tests/fixtures/withHeader.utf-16le.csv
___________________________________________________________________
名前: svn:mime-type
+ application/octet-stream

CSV_Iterator/tags/0.1.0-beta/tests/fixtures/withHeaderWithoutEOLatEOF.csv

@@ -0,0 +1,2 @@
+header1,header2
+value1,value2
\ ファイルの末尾に改行がありません

CSV_Iterator/tags/0.1.0-beta/tests/fixtures/withHeader.utf-16be.csv

表示できません: バイナリ形式としてマークされたファイルです。
svn:mime-type = application/octet-stream
属性に変更があったパス: CSV_Iterator/tags/0.1.0-beta/tests/fixtures/withHeader.utf-16be.csv
___________________________________________________________________
名前: svn:mime-type
+ application/octet-stream

CSV_Iterator/tags/0.1.0-beta/tests/fixtures/enclosureWithHeader.ja.shift_jis.csv

@@ -0,0 +1,3 @@
+@@,wb_,@@wb_@
+̈,@̓@,@@[\]^_`{|}~@
+@ @,@ @,@ Q@

CSV_Iterator/tags/0.1.0-beta/tests/fixtures/withHeader.ja.shift_jis.csv

@@ -0,0 +1,3 @@
+"",wb_,"@wb_"
+̈,"̓","̎Q"
+" "," "," Q"

CSV_Iterator/tags/0.1.0-beta/tests/fixtures/withHeader.ja.utf-8.csv

@@ -0,0 +1,3 @@
+"あいうえお",漢字ヘッダ,"漢字 ヘッダ"
+一の一,"一の弐","壱の参"
+"弐 壱","弐 弐","弐 参"

CSV_Iterator/tags/0.1.0-beta/tests/fixtures/withHeader.ja.iso-2022-jp.csv

表示できません: バイナリ形式としてマークされたファイルです。
svn:mime-type = application/octet-stream
属性に変更があったパス: CSV_Iterator/tags/0.1.0-beta/tests/fixtures/withHeader.ja.iso-2022-jp.csv
___________________________________________________________________
名前: svn:mime-type
+ application/octet-stream

CSV_Iterator/tags/0.1.0-beta/tests/fixtures/withHeader.csv

@@ -0,0 +1,10 @@
+header1,"header 2",header3
+1-1,1-2,1-3
+"2 1","2 2","2 3"
+"3
+1","
+3
+
+2
+","
+3 3"

CSV_Iterator/tags/0.1.0-beta/src/CSV/Iterator.php

@@ -0,0 +1,186 @@
+<?php
+class CSV_Iterator implements Iterator
+{
+    private $filePointer;
+    private $delimiter;
+    private $enclosure;
+    private $encoding;
+    private $eol;
+    private $rowCounter;
+    private $currentRow;
+    private $rowLength;
+    private $header;
+    private $offset;
+    private $outputEncoding;
+
+    /**
+     * construct
+     *
+     * @param string $file CSV file to iterate
+     * @param string $encoding File encoding
+     * @param string $delimiter The field delimiter. default is ','.
+     * @param string $enclosure The field enclosure. default is '"'.
+     * @param array $header An array containing header fileds. The first line will be used as header if empty or not supplied.
+     * @param integer $rowlength The max length of each line. default is NULL which menas inifinity.
+     */
+    public function __construct($file, $encoding = 'utf-8', $delimiter = ',', $enclosure = '"', array $header = array(), $rowlength = null, $outputEncoding = null)
+    {
+    // XXX: 変換元のエンコードってどう指定するべきかわからないから PHPに任せてます ASCII文字以外渡すとコケる可能性が大
+        $this->delimiter = mb_convert_encoding($delimiter, 'utf-8');
+        $this->enclosure = mb_convert_encoding($enclosure, 'utf-8');
+
+        $this->setRowLength($rowlength);
+        $this->filePointer = fopen($file, 'rb');
+        $this->_setEncoding($encoding);
+        $this->setOutputEncoding($outputEncoding);
+
+        $this->header = empty($header) ? $this->readRow() : $header;
+        $this->offset = ftell($this->filePointer);
+        $this->rewind();
+    }
+
+    public function rewind()
+    {
+        fseek($this->filePointer, $this->offset);
+        $this->rowCounter = -1;
+        $this->next();
+    }
+
+    public function next()
+    {
+        ++$this->rowCounter;
+        $data = $this->readRow();
+        if($data!==null && count($data)!==count($this->header)) {
+            throw new RuntimeException('CSV parse error. Number of columns is not equal to number of header at row #'.  $this->rowCounter .'.');
+        }
+        $this->currentRow = $data ? array_combine($this->header, $data) : null;
+    }
+
+    public function current()
+    {
+        return $this->currentRow;
+    }
+
+    public function key()
+    {
+        return $this->rowCounter;
+    }
+
+    public function valid()
+    {
+        return is_array($this->currentRow);
+    }
+
+    /**
+     * read 1 row of csv.
+     *
+     * this is a port of the original code written by yossy.
+     *
+     * @author yossy
+     * @author MugeSo
+     *
+     * @see http://yossy.iimp.jp/wp/?p=56
+     * @return array
+     */
+    private function readRow()
+    {
+        $d = preg_quote($this->delimiter);
+        $e = preg_quote($this->enclosure);
+        $line = "";
+
+        //*
+        // 囲い込み記号内で改行できるようにするための処理
+        // また、マルチバイト関係で安全に処理するために、文字エンコーディングを一旦UTF-8にする
+        while (!feof($this->filePointer)) {
+            $line .= mb_convert_encoding(self::getLine($this->filePointer, $this->rowLength, $this->eol), 'utf-8', $this->encoding);
+            $itemcnt = preg_match_all('/'.$e.'/u', $line, $dummy);
+            if ($itemcnt % 2 == 0) break;
+        }
+
+        $csv_line = preg_replace('/(?:\r\n|[\r\n])?$/u', $d, trim($line));
+        $csv_pattern = '/('.$e.'[^'.$e.']*(?:'.$e.$e.'[^'.$e.']*)*'.$e.'|[^'.$d.']*)'.$d.'/u';
+        preg_match_all($csv_pattern, $csv_line, $csv_matches);
+        $csv_data = $csv_matches[1];
+
+        if($this->outputEncoding) {
+            foreach($csv_data AS &$column){
+                $column = mb_convert_encoding(str_replace($e.$e, $e, preg_replace('/^'.$e.'(.*)'.$e.'$/us','$1',$column)), $this->outputEncoding, 'utf-8');
+            }
+        } else {
+            foreach($csv_data AS &$column){
+                $column =str_replace($e.$e, $e, preg_replace('/^'.$e.'(.*)'.$e.'$/us','$1',$column));
+            }
+        }
+
+        return empty($line) ? null : $csv_data;
+    }
+
+    /**
+     * set row length.
+     *
+     * don't use usaly.
+     *
+     * @param <type> $length
+     */
+    public function setRowLength($length)
+    {
+        if(!is_int($length) && $length!==null) throw new UnexpectedValueException('argument #1 should be integer or null.');
+        $this->rowLength = $length;
+    }
+
+    private function _setEncoding($encoding)
+    {
+        rewind($this->filePointer);
+        $this->eol = "\xd\xa";
+        if(strtoupper($encoding)==='UTF-16') {
+            $bom = fread($this->filePointer, 2);
+            if($bom==="\xff\xfe") {
+                $encoding = 'UTF-16LE';
+                $this->eol = "\xd\x0\xa\x0";
+            } elseif($bom==="\xfe\xff") {
+                $encoding = 'UTF-16BE';
+                $this->eol = "\x0\x0d\x0\x0a";
+            } else {
+            // 最初の2バイトがBOMでないので巻き戻す
+            // ビッグエンディアン
+                rewind($this->filePointer);
+                $this->eol = "\x0\x0d\x0\x0a";
+            }
+        }
+        $this->encoding = $encoding;
+    }
+
+    public function setOutputEncoding($encoding)
+    {
+        $this->outputEncoding = strtoupper($encoding)==='UTF-8' ? null : $encoding;
+    }
+
+    /**
+     * stream_get_lineにバグがあるため仕方なく作った
+     *
+     * @see http://bugs.php.net/bug.php?id=49148
+     * @param resource $fp ファイルポインタ
+     * @param int $buf_size 最大サイズ
+     * @param string $eol 行区切り
+     * @return string 1行分の文字
+     */
+    static public function getLine($fp, $buf_size, $eol)
+    {
+        $ret = '';
+        $eol_len = strlen($eol);
+        $eol_pos = 0;
+        if(!$buf_size) $buf_size = PHP_INT_MAX;
+        while(($c = fgetc($fp))!==false && strlen($ret) < $buf_size) {
+            $ret .= $c;
+            if($c === $eol[$eol_pos]) {
+                if(++$eol_pos === $eol_len) {
+                    break;
+                }
+                continue;
+            }
+            $eol_pos = 0;
+        }
+        return $ret;
+    }
+}
+?>
属性に変更があったパス: CSV_Iterator/tags/0.1.0-beta/src/CSV/Iterator.php
___________________________________________________________________
名前: svn:keywords
+ id
名前: svn:eol-style
+ native
属性に変更があったパス: CSV_Iterator/tags/0.1.0-beta
___________________________________________________________________
名前: svn:ignore
+ nbproject