PHP Word : Reading .docx with PHP

PHP Word provides \PhpOffice\PhpWord\IOFactory::createReader . its 'load' method returns contents as an object

$objReader = \PhpOffice\PhpWord\IOFactory::createReader('Word2007');
$phpWord = $objReader->load("with_table_macros.docx");

However , this object is too complex if the intention is to just get the text contents for search. The complete Code would look like this.

<?php
//https://phpword.readthedocs.io/en/latest/writersreaders.html
//https://stackoverflow.com/questions/10646445/read-word-document-in-php
require_once 'vendor/autoload.php';
use PhpOffice\PhpWord\TemplateProcessor;

$templatesDocsFolder = "docs/templates/";
$templatesDocName = "with_table_macros.docx";
$templatesDocPath = $templatesDocsFolder.$templatesDocName;
$phpWord = \PhpOffice\PhpWord\IOFactory::createReader('Word2007')->load($templatesDocPath);

foreach($phpWord->getSections() as $section) {
            foreach($section->getElements() as $element) {
				if ($element instanceof PhpOffice\PhpWord\Element\Table)
				{
						
					echo '<p>table contents start</p>';
					foreach ($element->getRows() as $row)
					{
						foreach ($row->getCells() as $cell)
						{
							//print_r( get_class($cell->getElements()[0]));
							$cEl = $cell->getElements()[0];
								if ($cEl instanceof PhpOffice\PhpWord\Element\Text)
								{
									echo $cEl->getText() .'<br>';
								}
								elseif ($cEl instanceof PhpOffice\PhpWord\Element\TextRun){
									if (count($cEl->getElements())>0 and $cEl->getElements()[0] instanceof PhpOffice\PhpWord\Element\Text)
									{
										echo $cEl->getElements()[0]->getText();
									}
								}
						}
						echo '<br>';
					}
					echo '<p>table ended </p>';
				}//if ($element instanceof PhpOffice\PhpWord\Element\Table)
                if ($element instanceof PhpOffice\PhpWord\Element\Text)
                {
                    echo $element->getText() .'<br>';
                }
                elseif ($element instanceof PhpOffice\PhpWord\Element\TextRun){
                    if (count($element->getElements())>0 and $element->getElements()[0] instanceof PhpOffice\PhpWord\Element\Text)
                    {
                        echo $element->getElements()[0]->getText();
                    }
                }elseif(method_exists($element,'getText')) {
					echo($element->getText() . "<br>");
				}
			
            }
        }
?>

So a simpler way could be adopted using PHP 'zip_read'

<?php
//https://phpword.readthedocs.io/en/latest/writersreaders.html
//https://stackoverflow.com/questions/10646445/read-word-document-in-php
require_once 'vendor/autoload.php';
use PhpOffice\PhpWord\TemplateProcessor;
function read_docx($filename){

    $striped_content = '';
    $content = '';

    if(!$filename || !file_exists($filename)) return false;

    $zip = zip_open($filename);
    if (!$zip || is_numeric($zip)) return false;

    while ($zip_entry = zip_read($zip)) {

        if (zip_entry_open($zip, $zip_entry) == FALSE) continue;

        if (zip_entry_name($zip_entry) != "word/document.xml") continue;

        $content .= zip_entry_read($zip_entry, zip_entry_filesize($zip_entry));

        zip_entry_close($zip_entry);
    }
    zip_close($zip);      
    $content = str_replace('</w:r></w:p></w:tc><w:tc>', " ", $content);
    $content = str_replace('</w:r></w:p>', "\r\n", $content);
    $striped_content = strip_tags($content);

    return $striped_content;
}
$doc_contents = read_docx("WPC-A4.docx");
echo $doc_contents;
$matched =  preg_match_all("/(__([^\s]+)__)/sU",$doc_contents,$matches);
echo "<pre>".print_r(array_values(array_unique($matches[1])),true)."</pre>";
?>