AccesoDatosXML.php 8.7 KB
<?php
namespace clases\accesoDatos;

/**
 * Obtiene datos de un archivo XML.
 */
class AccesoDatosXML implements iAccesoDatos {
    private $posicionActual; 
    private $handle;
    public $totalBytes;
    public $readBytes = 0;
    private $nodeIndex = 0;
    private $chunk = "";
    private $chunkSize;
    private $readFromChunkPos;
    private $rootNode;
    private $customRootNode;
    private $procesando = false;


    /**
     * 
     * @param mixed $mixed Path to XML file OR file handle
     * @param int $posicionActual
     * @param int $chunkSize Bytes to read per cycle (Optional, default is 32 KiB)
     * @param string $customRootNode Specific root node to use (Optional)
     * @param int $totalBytes Xml file size - Required if supplied file handle
     * @param string $customChildNode
     * @throws Exception
     */
    public function __construct($mixed, $posicionActual = 0, $chunkSize = 32768, $customRootNode = null, $totalBytes = null, $customChildNode = null) {
        if (is_string($mixed)) {
            $this->handle = fopen($mixed, "r");
            if (isset($totalBytes)) {
                $this->totalBytes = $totalBytes;
            } else {
                $this->totalBytes = filesize($mixed);
            }
        } else if (is_resource($mixed)) {
            $this->handle = $mixed;
            if (!isset($totalBytes)) {
                throw new Exception("totalBytes parameter required when supplying a file handle.");
            }
            $this->totalBytes = $totalBytes;
        }

        $this->chunkSize = $chunkSize;
        $this->customRootNode = $customRootNode;
        $this->customChildNode = $customChildNode;
        
        $this->posicionActual = $posicionActual;
    }

    /**
     * Gets the total read bytes so far
     */
    public function getReadBytes() {
        return $this->readBytes;
    }

    /**
     * Gets the total file size of the xml
     */
    public function getTotalBytes() {
        return $this->totalBytes;
    }

    /**
     * Starts the streaming and parsing of the XML file
     */
    public function obtenerFila() {
        $elementWithChildren = null;
                
        $continue = true;
        while ($continue) {
            if(!$this->procesando) {
                $continue = $this->readNextChunk();
                if (!isset($this->rootNode)) {
                    // Find root node
                    if (isset($this->customRootNode)) {
                        $customRootNodePos = strpos($this->chunk, "<{$this->customRootNode}");
                        if ($customRootNodePos !== false) {
                            // Found custom root node
                            // Support attributes
                            $closer = strpos(substr($this->chunk, $customRootNodePos), ">");
                            $readFromChunkPos = $customRootNodePos + $closer + 1;

                            // Custom child node?
                            if (isset($this->customChildNode)) {
                                // Find it in the chunk
                                $customChildNodePos = strpos(substr($this->chunk, $readFromChunkPos), "<{$this->customChildNode}");
                                if ($customChildNodePos !== false) {
                                    // Found it!
                                    $readFromChunkPos = $readFromChunkPos + $customChildNodePos;
                                } else {
                                    // Didn't find it - read a larger chunk and do everything again
                                    continue;
                                }
                            }

                            $this->rootNode = $this->customRootNode;
                            $this->readFromChunkPos = $readFromChunkPos;
                        } else {
                            // Clear chunk to save memory, it doesn't contain the root anyway
                            $this->readFromChunkPos = 0;
                            $this->chunk = "";
                            continue;
                        }
                    } else {

                        // $$-- Valiton change: changed pattern. XML1.0 standard allows almost all
                        //                      Unicode characters even Chinese and Cyrillic.
                        //                      see:
                        //                      http://en.wikipedia.org/wiki/XML#International_use
                        preg_match('/<([^>\?]+)>/', $this->chunk, $matches);
                        //  --$$
                        if (isset($matches[1])) {
                            // Found root node
                            $this->rootNode = $matches[1];
                            $this->readFromChunkPos = strpos($this->chunk, $matches[0]) + strlen($matches[0]);
                        } else {
                            // Clear chunk to save memory, it doesn't contain the root anyway
                            $this->readFromChunkPos = 0;
                            $this->chunk = "";
                            continue;
                        }
                    }
                }
            }

            while (true) {
                $fromChunkPos = substr($this->chunk, $this->readFromChunkPos);
                preg_match('/<([^>]+)>/', $fromChunkPos, $matches);
                if (isset($matches[1])) {
                    $element = $matches[1];
                    $spacePos = strpos($element, " ");
                    $crPos = strpos($element, "\r");
                    $lfPos = strpos($element, "\n");
                    $tabPos = strpos($element, "\t");
                    $aPositionsIn = array($spacePos, $crPos, $lfPos, $tabPos);
                    foreach ($aPositionsIn as $iPos) {
                        if ($iPos !== false) {
                            $aPositions[] = $iPos;
                        }
                    }
                    if(!empty($aPositions) && is_array($aPositions)) {
                        $minPos = min($aPositions);
                    }else {
                        $minPos = 0;
                    }
                    if ($minPos !== false && $minPos != 0) {
                        $sElementName = substr($element, 0, $minPos);
                        $endTag = "</" . $sElementName . ">";
                    } else {
                        $sElementName = $element;
                        $endTag = "</$sElementName>";
                    }

                    $endTagPos = false;
                    $lastCharPos = strlen($element) - 1;
                    if (substr($element, $lastCharPos) == "/") {
                        $endTag = "/>";
                        $endTagPos = $lastCharPos;

                        $iPos = strpos($fromChunkPos, "<");
                        if ($iPos !== false) {
                            $endTagPos += $iPos + 1;
                        }
                    }

                    if ($endTagPos === false) {
                        $endTagPos = strpos($fromChunkPos, $endTag);
                    }
                    if ($endTagPos !== false) {
                        $endTagEndPos = $endTagPos + strlen($endTag);
                        $elementWithChildren = substr($fromChunkPos, 0, $endTagEndPos);
                        $elementWithChildren = trim($elementWithChildren);
                        if($this->readFromChunkPos) {
                            $this->chunk = $fromChunkPos;
                        }
                        $this->chunk = substr($this->chunk, strpos($this->chunk, $endTag) + strlen($endTag));
                        $this->readFromChunkPos = 0;
                        
                        if ($this->nodeIndex >= $this->posicionActual) {
                            $this->procesando = true;
                            break(2);
                        }
                        
                        $this->nodeIndex++;
                    } else {
                        $this->procesando = false;
                        break;
                    }
                } else {
                    $this->procesando = false;
                    break;
                }
            }
        }
        
        if($elementWithChildren === NULL) {
            fclose($this->handle);
        }
        
        return $elementWithChildren;
    }

    private function readNextChunk() {
        $this->chunk .= fread($this->handle, $this->chunkSize);
        $this->readBytes += $this->chunkSize;        
        if ($this->readBytes >= $this->totalBytes) {
            $this->readBytes = $this->totalBytes;
            return false;
        }
        
        return true;
    }
    
    /**
     * Cierra el fichero.
     */
    public function cerrarArchivo() {
        if($this->handle) {
            fclose($this->handle);
        }
    }
}