#! /usr/bin/env bash # usage generate xmlpipe2 to be followed by an indexer pmwikis # e.g. ./sphinx_sources > pmwiki.sphinxxml && iconv -f ISO-8859-1 -t utf-8 pmwiki.sphinxxml > pmwiki.sphinxxml.utf-8 && mv pmwiki.sphinxxml.utf-8 pmwiki.sphinxxml # use /home/fabien/pmwiki.sphinxxml to be indexed # see http://sphinxsearch.com/docs/manual-2.0.1.html#xmlpipe2 XMLHEADER='\n' XMLFOOTER='' XMLSCHEMA='\n\n\n\n\n\n\n\n\n\n\n\n\n' KILLLIST='\n' WIKIS=( "/path/to/wikis/devpim" "/path/to/wikis/wiki" "/path/to/wikis/mirrors/agiwiki" "/path/to/wikis/mirrors/fabien" "/path/to/wikis/mirrors/pim" "/path/to/wikis/mirrors/saint-maur" "/path/to/wikis/mirrors/wiki" ) LANG=ISO-8859-1 # assumed, should be using the charset of each file instead IDXPMWIKI=/path/to/wikis/devpim/pub/sphinx_pmwikis_doc_ids.php echo -e ' $IDXPMWIKI echo -e "$XMLHEADER\n$XMLSCHEMA\n" for W in "${WIKIS[@]}" do WIKIPATH=$W/wiki.d for P in `ls $WIKIPATH -IPmWiki.* -I*RecentChanges -Itotalcounter.stat -I*,del-*`; do ID=`date +%N` # %s%N and md5sum (not even numeric) are too large echo "" # 1 prependded to avoid type confusion echo "\"1$ID\" => \"$WIKIPATH/$P\"," >> $IDXPMWIKI # this is done manually since the data are not in the MySQL db to be queried cat $WIKIPATH/$P | grep -v ^passwd | grep -v ^updatedto | grep -v ^text | grep -v ^diff: | grep -v ^csum: | grep -v ^author: | grep -v ^host: | grep -v ^newline= | grep -v ^title= | sed -f ~/bin/pmwiki-to-sphinxxml cat $WIKIPATH/$P | grep ^text= | tr -s '[:print:]' | sed -f ~/bin/pmwiki-to-sphinxxml | tr -d "\a" | tr -d "\b" | tr -d '[:cntrl:]' echo ""; done done #echo ');'>> $IDXPMWIKI;