中文分词实现的最简单方法:
Zend_Search_Lucene_Analysis_Analyzer::setDefault( new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8() );
默认支持英文分词。 实例:
set_include_path(NW_ZEND_LUCENE_DIR); require_once 'Zend/Search/Lucene.php'; Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8()); $index = new Zend_Search_Lucene($this->index_dir, !is_dir($this->index_dir) ); $content = strip_tags($row['description']); $doc = new Zend_Search_Lucene_Document(); $doc->addField(Zend_Search_Lucene_Field::UnIndexed('aid', $row['article_id'])); $doc->addField(Zend_Search_Lucene_Field::UnIndexed('tbl_name', $row['table_name']) ); $doc->addField(Zend_Search_Lucene_Field::UnIndexed('db_name', $row['db_name']) ); $doc->addField(Zend_Search_Lucene_Field::UnIndexed('link', $row['link']) ); $doc->addField(Zend_Search_Lucene_Field::Text('tags', $row['tags'], 'utf-8')); $doc->addField(Zend_Search_Lucene_Field::Text('title', $row['title'], 'utf-8')); $doc->addField(Zend_Search_Lucene_Field::UnStored('contents', $content, 'utf-8')); $index->addDocument($doc); $index->commit();
$index = new Zend_Search_Lucene($this->index_dir, !is_dir($this->index_dir) ); echo "doc count: ".$index->count(). "\r\n"; $Query = Zend_Search_Lucene_Search_QueryParser::parse($word,'utf-8'); $hits = $index->find($Query, 'aid', SORT_NUMERIC, SORT_DESC); echo "Search for \"$word\" returned " .count($hits). " hits.\n\n"; foreach ($hits as $hit) { echo str_repeat('-', 80) . "\n"; echo 'ID: ' . $hit->id ."\n"; echo 'Score: ' . sprintf('%.2f', $hit->score) ."\n\n"; foreach ($hit->getDocument()->getFieldNames() as $field) { echo "$field: \n"; echo ' ' . trim( $hit->$field ) . "\n"; } }