/usr/local/sphinx/etc/sphinx-blog-xmlpipe2.conf
and include the fields and attributes definition in the source
block:source blog
{
type = xmlpipe2
xmlpipe_command = /usr/bin/php /home/abbas/sphinx/makeindex.php
xmlpipe_field = title
xmlpipe_field = content
xmlpipe_attr_uint = author_id
xmlpipe_attr_timestamp = publish_date
xmlpipe_attr_multi = category_id
}
index posts
{
source = blog
path = /usr/local/sphinx/var/data/blog-xmlpipe2
docinfo = extern
charset_type = utf-8
}
indexer
{
mem_limit = 32M
}
makeindex.php
script and remove the sphinx:schema element along with all its sub-elements:<?php // Database connection credentials $dsn ='mysql:dbname=myblog;host=localhost'; $user = 'root'; $pass = ''; // Instantiate the PDO (PHP 5 specific) class try { $dbh = new PDO($dsn, $user, $pass); } catch (PDOException $e){ echo'Connection failed: '.$e->getMessage(); } // We will use PHP's inbuilt XMLWriter to create the xml structure $xmlwriter = new XMLWriter(); $xmlwriter->openMemory(); $xmlwriter->setIndent(true); $xmlwriter->startDocument('1.0', 'UTF-8'), // Start the parent docset element $xmlwriter->startElement('sphinx:docset'), // Query to get all posts from the database $sql = "SELECT id, title, content, author_id, UNIX_TIMESTAMP(publish_date) AS publish_date FROM posts"; $posts = $dbh->query($sql); // Run a loop and put the post data in XML foreach ($posts as $post) { // Start the element for holding the actual document (post) $xmlwriter->startElement('sphinx:document'), // Add the id attribute $xmlwriter->writeAttribute("id", $post['id']); // Set value for the title field $xmlwriter->startElement('title'), $xmlwriter->text($post['title']); $xmlwriter->endElement();//end title // Set value for the content field $xmlwriter->startElement('content'), $xmlwriter->text($post['content']); $xmlwriter->endElement();// end content // Set value for the author_id attribute $xmlwriter->startElement('author_id'), $xmlwriter->text($post['author_id']); $xmlwriter->endElement();// end attribute // Set value for the publish_date attribute $xmlwriter->startElement('publish_date'), $xmlwriter->text($post['publish_date']); $xmlwriter->endElement();// end attribute // Find all categories associated with this post $catsql = "SELECT category_id FROM posts_categories WHERE post_id = {$post['id']}"; $categories = array(); foreach ($dbh->query($catsql) as $category) { $categories[] = $category['category_id']; } // Set value for the category_id attribute // Multiple category ids should be comma separated $xmlwriter->startElement('category_id'), $xmlwriter->text(implode(',', $categories)); $xmlwriter->endElement();// end attribute $xmlwriter->endElement();// end document } $xmlwriter->endElement();// end docset // Output the xml print $xmlwriter->flush(); ?>
indexer:
$ /usr/local/sphinx/bin/indexer --config /usr/local/sphinx/etc/sphinx-blog-xmlpipe2.conf --all
We added the schema definition, and the declaration of fields and attributes that goes into index, in the configuration file itself.
To define a field we used the xmlpipe_field
option and to define an attribute we used the xmlpipe_attr_*
option. The following are some of the attribute options that can be used:
xmlpipe_attr_uint:
For unsigned integers. Syntax matches that for sql_attr_uint
.xmlpipe_attr_bool:
For Boolean attributes. Syntax matches that for sql_attr_bool
.xmlpipe_attr_timestamp:
For UNIX timestamp attributes. Syntax matches that for sql_attr_timestamp
.xmlpipe_attr_str2ordinal:
For string ordinal attributes. Syntax matches that of sql_attr_str2ordinal
.xmlpipe_attr_float:
For floating point attributes. Syntax matches that of sql_attr_float
.xmlpipe_attr_multi:
For Multi Valued Attributes (MVA).We then removed the<sphinx:schema>
element from the XML stream by modifying our makeindex.php
script.
No other change was required in the XML stream.