¸ñÂ÷
Zend_Search_Lucene (Àº)´Â, ¿ÏÀüÇÏ°Ô PHP 5 ±×¸®°í ¾²¿©Á® ÀÖ´Â ¹ü¿ëÀûÀÎ ÅØ½ºÆ® °Ë»ö ¿£ÁøÀÔ´Ï´Ù. À妽º¸¦ ÆÄÀÏ ½Ã½ºÅÛ»ó¿¡ º¸Á¸Çϱâ À§ÇØ(¶§¹®¿¡) µ¥ÀÌÅÍ º£À̽º ¼¹ö¸¦ ÇÊ¿ä·Î ÇÏÁö ¾Ê°í, ´ëºÎºÐÀÇ PHP À¥ »çÀÌÆ®»ó¿¡¼ µ¿ÀÛ½Ãų ¼ö ÀÖ½À´Ï´Ù. Zend_Search_Lucene (Àº)´Â, ÀÌÇÏÀÇ ±â´ÉÀ» ¼Æ÷Æ®Çϰí ÀÖ½À´Ï´Ù.
Á߿䵵¿¡ ÀÇÇÑ °Ë»ö - °¡Àå ¸ÅÄ¡ÇÑ °á°ú°¡ ÃÖÃÊ·Î µ¹·ÁÁÖ¾îÁý´Ï´Ù
´Ù¾çÇÑ °·ÂÇÑ °Ë»ö ¹æ½Ä: ÇÁ·¹ÀÌÁî °Ë»ö, ¿ÍÀϵå Ä«µå °Ë»ö, fuzzy reference, ¹üÀ§ ÁöÁ¤ °Ë»ö µî µî [6]
ÁöÁ¤ÇÑ Çʵå (·Ê: ŸÀÌÆ², ÀÛÀÚ, ³»¿ë) ¿¡ ÀÇÇÑ °Ë»ö
Zend_Search_Lucene ÇÏ Apache Lucene ÇÁ·ÎÁ§Æ®·ÎºÎÅÍ ÆÄ»ýÇÑ °ÍÀÔ´Ï´Ù. ÇöÀç ¼Æ÷Æ®Çϰí ÀÖ´Â °ÍÀº,Lucene ¹öÁ¯ 2.2 ÀÔ´Ï´Ù. [7]. Lucene ¿¡ ´ëÇÑ ÀÚ¼¼ÇÑ °ÍÀº http://lucene.apache.org/java/docs/ (http://lucene.apache.org/java/2_2_0/) (À»)¸¦ ÂüÁ¶ÇØ ÁֽʽÿÀ.
![]() |
|
ÀÌÀüÀÇ Zend_Search_Lucene ÀÇ ½ÇÀåÀº Lucene 1.9 À妽º Æ÷¸Ë¿¡ ´ëÀÀÇϰí ÀÖ½À´Ï´Ù. Çö½ÃÁ¡¿¡¼´Â, ÀÌ·¯ÇÑ ¹öÁ¯À¸·Î ÀÛ¼ºµÈ À妽º´Â Zend_Search_Lucene (À»)¸¦ ¾÷±×·¹À̵å Çϸé ÀÚµ¿ÀûÀ¸·Î Lucene 2.1 Æ÷¸Ë¿¡ ¾÷±×·¹ÀÌµå µÇ°Ô µÇ¾î ÀÖ½À´Ï´Ù. ÀÏ´Ü ¾÷±×·¹ÀÌµå µÈ À妽º´Â ±¸¹öÁ¯ÀÇ Zend_Search_Lucene ±×¸®°í ÀоîµéÀÏ ¼ö ¾ø½À´Ï´Ù. |
Zend_Search_Lucene (Àº)´Â, ¹®¼ ´ÜÀ§·Î À妽º¸¦ ÀÛ¼ºÇÕ´Ï´Ù. ¹®¼´Â À̸§Æ÷ÇÔÀÇ Çʵå·ÎºÎÅÍ ±¸¼ºµÇ¾î °Ë»ö ´ë»óÀÇ ÄÁÅÙÃ÷°¡ Çʵå¾È¿¡ Æ÷ÇԵ˴ϴÙ.
¹®¼¸¦ ³ªÅ¸³»´Â °ÍÀÌ Zend_Search_Lucene_Document ¿ÀºêÁ§Æ®ÀÔ´Ï´Ù.ÀÌ ¿ÀºêÁ§Æ®¾È¿¡´Â, Çʵ带 ³ªÅ¸³½´Ù Zend_Search_Lucene_Field ¿ÀºêÁ§Æ®°¡ Æ÷ÇԵ˴ϴÙ.
¸ðµç Á¾·ùÀÇ Á¤º¸°¡ À妽ºÈ µÉ °¡´É¼ºÀÌ ÀÖ´Â °Í¿¡ ÁÖÀÇÇսôÙ. ¾îÇø®ÄÉÀÌ¼Ç °íÀ¯ÀÇ Á¤º¸³ª ¸ÞŸµ¥ÀÌŸ¸¦ ¹®¼ÀÇ Çʵ忡 °Ý³³ÇØ, °Ë»ö °á°úÀÇ ¹®¼·Î¼ ´ÙÀ½¿¡ ÃëµæÇÒ ¼ö ÀÖ½À´Ï´Ù.
Àε¥Å©»ç¸¦ Á¦¾îÇÏ´Â °ÍÀº, ´ç½ÅÀÌ ÀÛ¼ºÇÏ´Â ¾îÇø®ÄÉÀ̼ÇÀÇ ¿ªÇÒÀÔ´Ï´Ù. ±× ¸»Àº, ´ç½ÅÀÇ ¾îÇø®ÄÉÀ̼ÇÀ¸·ÎºÎÅÍ ¾×¼¼½º °¡´ÉÇÑ, ¸ðµç ³»¿ëÀÇ µ¥ÀÌÅͰ¡ À妽ºÈ µÉ °¡´É¼ºÀÌ ÀÖ´Â °ÍÀÔ´Ï´Ù. ¿¹¸¦ µé¸é ÆÄÀÏ ½Ã½ºÅÛ, µ¥ÀÌŸº£À̽º,HTML ÆûµîÀ» »ý°¢ÇÒ ¼ö ÀÖ½À´Ï´Ù.
Zend_Search_Lucene_Field
Ŭ·¡½º¿¡´Â,
´Ù¾çÇÑ ¼ºÁúÀÇ Çʵ带 ÀÛ¼ºÇϱâ À§ÇÑ Á¤Àû ¸Þ¼Òµå°¡ Á¤Àǵǰí ÀÖ½À´Ï´Ù.
<?php
$doc = new Zend_Search_Lucene_Document();
// Çʵå´Â ÅäÅ«È µÇÁö ¾Ê½À´Ï´Ù¸¸, À妽ºÈ µÇ¾î º¸Á¸µË´Ï´Ù.
// º¸Á¸µÈ Çʵå´Â, À妽º·ÎºÎÅÍ ÃëµæÇÒ ¼ö ÀÖ½À´Ï´Ù.
$doc->addField(Zend_Search_Lucene_Field::Keyword('doctype',
'autogenerated'));
// Çʵå´Â Åäūȵµ À妽ºÈµµ ÇàÇØÁöÁö ¾Ê½À´Ï´Ù¸¸, À妽º¿¡ º¸Á¸µË´Ï´Ù.
$doc->addField(Zend_Search_Lucene_Field::UnIndexed('created',
time()));
// ¹ÙÀ̳ʸ® ¹®ÀÚ¿ Çʵå´Â Åäūȵµ À妽ºÈµµ ÇàÇØÁöÁö ¾Ê½À´Ï´Ù.
// ±×·¯³ª À妽º¿¡´Â º¸Á¸µË´Ï´Ù.
$doc->addField(Zend_Search_Lucene_Field::Binary('icon',
$iconData));
// Çʵ尡 Åäūȡ¤À妽ºÈ µÇ¾î À妽º¿¡ º¸Á¸µË´Ï´Ù.
$doc->addField(Zend_Search_Lucene_Field::Text('annotation',
'Document annotation text'));
// Çʵå´Â ÅäÅ«È µÇ¾î À妽ºÈ µË´Ï´Ù¸¸, À妽º¿¡´Â º¸Á¸µÇÁö ¾Ê½À´Ï´Ù.
$doc->addField(Zend_Search_Lucene_Field::UnStored('contents',
'My document content'));
ÀÌ·¯ÇÑ °¢ ¸Þ¼Òµå (Zend_Search_Lucene_Field::Binary()
¸Þ¼Òµå¸¦ Á¦¿ÜÇÏ´Ù) (Àº)´Â, ¿É¼ÇÀÇ ÆÄ¶ó¹ÌÅÍ
$encoding
(À»)¸¦ °¡Áö°í ÀÖ½À´Ï´Ù.
À̰ÍÀ¸·Î ÀÔ·Â µ¥ÀÌÅÍÀÇ ÀÎÄÚµùÀ» ÁöÁ¤ÇÕ´Ï´Ù.
ÀÎÄÚµùÀº ¹®¼¿¡ µû¶ó¼ ´Ù¸¦ °ÍÀ̰í, µ¿ÀÏ ¹®¼³»¿¡¼µµ Çʵ忡 ÀÇÇØ¼ ´Ù¸£±âµµ ÇϰÚÁö¿ä.
<?php
$doc = new Zend_Search_Lucene_Document();
$doc->addField(Zend_Search_Lucene_Field::Text('title', $title, 'iso-8859-1'));
$doc->addField(Zend_Search_Lucene_Field::UnStored('contents', $contents, 'utf-8'));
ÀÎÄÚµù ÆÄ¶ó¹ÌÅ͸¦ »ý·« ÇßÀ» °æ¿ì´Â, ÇöÀçÀÇ ·ÎÄÉÀÏÀÌ Ã³¸®½Ã¿¡ »ç¿ëµË´Ï´Ù.¿¹¸¦ µé¾î ´ÙÀ½°ú °°ÀÌ µË´Ï´Ù.
<?php
setlocale(LC_ALL, 'de_DE.iso-8859-1');
...
$doc->addField(Zend_Search_Lucene_Field::UnStored('contents', $contents));
Çʵ带 À妽ºÈÇϰųª µ¹·ÁÁְųª ÇÒ ¶§ , Ç×»ó UTF-8 ÀÎÄÚµùÀÌ µË´Ï´Ù.UTF-8 ¿¡ÀÇ º¯È¯Àº ÀÚµ¿ÀûÀ¸·Î ÇàÇØÁý´Ï´Ù.
ÅØ½ºÆ® ÇØ¼®±â (ÀÌÇϸ¦ ÂüÁ¶ÇØ ÁֽʽÿÀ) (Àº)´Â, ÅØ½ºÆ®¸¦ ±× ¿ÜÀÇ ÀÎÄÚµùÀ¸·Î º¯È¯Çϱ⵵ ÇÕ´Ï´Ù. ½ÇÁ¦, µðÆúÆ®ÀÇ ÇØ¼®±â´Â ÅØ½ºÆ®¸¦ 'ASCII//TRANSLIT' ÀÎÄÚµùÀ¸·Î º¯È¯ÇÕ´Ï´Ù. ¿©±â¼ ÁÖÀǰ¡ ÇÊ¿äÇÑ °ÍÀº, ÀÌ·¯ÇÑ º¯È¯Àº ÇöÀçÀÇ ·ÎÄÉÀÏ¿¡ ÀÇÁ¸ÇØ ÇàÇØÁö´Â °ÍÀÔ´Ï´Ù.
ÇʵåÀÇ À̸§Àº addField()
¸Þ¼Òµå·Î ÀÚÀ¯·Ó°Ô ºÙÀÏ ¼ö ÀÖ½À´Ï´Ù.
Java Lucene (Àº)´Â, °Ë»ö ½Ã¿¡ µðÆúÆ®·Î »ç¿ëµÇ´Â Çʵå¸íÀ¸·Î¼ "contents" (À»)¸¦ »ç¿ëÇÕ´Ï´Ù. Zend_Search_Lucene (Àº)´Â, µðÆúÆ®¿¡¼´Â ¸ðµç Çʵ带 °Ë»öÇÕ´Ï´Ù. ±×·¯³ª, ÀÌ °Åµ¿À» º¯°æÇÒ ¼öµµ ÀÖ½À´Ï´Ù.ÀÚ¼¼ÇÑ °ÍÀº "µðÆúÆ®ÀÇ °Ë»ö Çʵå" ÀÇ ÀåÀ» ÂüÁ¶ÇØ ÁֽʽÿÀ.
Keyword
Çʵå´Â, º¸Á¸µÇ¾î À妽ºÈ µË´Ï´Ù.
Áï, °Ë»öÇßÀ» ¶§¿¡ ¿øÀÇ ³»¿ëÀÌ ±×´ë·Î µ¹·ÁÁÖ¾îÁö´Â °ÍÀÔ´Ï´Ù.
Åäūȴ ÇàÇØÁöÁö ¾Ê½À´Ï´Ù (¸î°³ÀÇ ´Ü¾î¿¡ ºÐÇÒµÉ °ÍÀº ¾ø½À´Ï´Ù).
¿°ÅºÐÀÇ µ¥ÀÌŸº£À̽º Çʵå´Â, Åë»óÀº Zend_Search_Lucene ÀÇ
Keyword Çʵ忡 Àß µé¾î°¡°ÚÁö¿ä.
UnIndexed
Çʵå´Â °Ë»ö ´ë»óÀÌ µÇÁö ¾Ê½À´Ï´Ù¸¸,
°Ë»ö °á°úÀûÀ¸·Î´Â µ¹·ÁÁÖ¾îÁý´Ï´Ù.ÀÌ Çʵå·Î ¼³Á¤ÇÏ´Â °ªÀ¸·Î¼´Â,
µ¥ÀÌŸº£À̽ºÀÇ Å¸ÀÓ ½ºÅÆÇÁ, ÁÖŰ, ÆÄÀÏ ½Ã½ºÅÛÀÇ ÆÐ½º
¹× ±× ¿ÜÀÇ ¿ÜºÎ ½Äº°ÀÚµîÀÌ ÀÖ½À´Ï´Ù.
Binary
Çʵå´Â, Åäūȵµ À妽ºÈµµ ÇàÇØÁöÁö ¾Ê½À´Ï´Ù.
±×·¯³ª, °Ë»ö °á°úÀûÀ¸·Î ÃëµæÇÒ ¼ö ÀÖµµ·Ï º¸Á¸µË´Ï´Ù.
È»ó ¾ÆÀÌÄܰú °°Àº ¹ÙÀ̳ʸ® µ¥ÀÌÅ͸¦ ¹ÙÀ̳ʸ® ¹®ÀÚ¿·Î¼
encode ÇÑ °Í µî¿¡ ´ëÇØ »ç¿ëÇÕ´Ï´Ù.
Text
Çʵå´Â, º¸Á¸µÇ¾î À妽ºÈ µÇ¾î
±×¸®°í ÅäÅ«È µË´Ï´Ù.°Ë»ö Ç׸ñÀ¸·Î¼ »ç¿ëÇØ,
ÇÑÆí °Ë»ö °á°úÀûÀ¸·Îµµ ÃëµæÇÏ°í ½ÍÀº µíÇÑ Ç׸ñ,
¿¹¸¦ µé¸é ŸÀÌÆ²µîÀ» º¸Á¸Çϴµ¥ ÀûÇÕÇÕ´Ï´Ù.
UnStored
Çʵå´Â ÅäÅ«È µÇ¾î À妽ºÈ µË´Ï´Ù.
±×·¯³ª À妽º¿¡´Â º¸Á¸µÇÁö ¾Ê½À´Ï´Ù.´ë·®ÀÇ ÅØ½ºÆ® µî¿¡ ÀûÇÕÇÕ´Ï´Ù.
µ¥ÀÌÅ͸¦ º¸Á¸ÇØ ¹ö¸®¸é µð½ºÅ©»óÀÇ À妽ºÀÇ »çÀÌÁî°¡ Ä¿Á® ¹ö¸®¹Ç·Î,
°Ë»öÀº ÇÏ°í ½ÍÁö¸¸ °á°úÀûÀ¸·Î ±×°ÍÀ» Ç¥½ÃÇÒ Çʿ䰡 ¾ø´Â °æ¿ì µîÀº,
ÀÌ Çʵ带 »ç¿ëÇսôÙ.Zend_Search_Lucene À妽º¸¦
RDB¿Í Á¶ÇÕÇØ »ç¿ëÇÏ´Â °æ¿ìµî¿¡´Â
UnStored Çʵ带 ½Ç¿ëÀûÀ¸·Î »ç¿ëÇÒ ¼ö ÀÖ°ÚÁö¿ä.
Å« µ¥ÀÌÅÍ ÇʵåÀÇ ³»¿ëÀÇ °Ë»ö¿ëÀ¸·Î UnStored Çʵ忡 º¸Á¸ÇØ,
°á°ú¸¦ µ¥ÀÌŸº£À̽º·ÎºÎÅÍ ÃëµæÇϱâ À§Çؼ, ¶Ç Çϳªº°ÀÇ ID Çʵ带 »ç¿ëÇÕ´Ï´Ù.
Ç¥ 32.1. Zend_Search_Lucene_Field ÀÇ ÇüÅÂ
ÇʵåÇü | º¸Á¸ | À妽ºÈ | ÅäÅ«È | ¹ÙÀ̳ʸ® |
---|---|---|---|---|
Keyword | Yes | Yes | No | No |
UnIndexed | Yes | No | No | No |
Binary | Yes | No | No | Yes |
Text | Yes | Yes | Yes | No |
UnStored | No | Yes | Yes | No |
Zend_Search_Lucene ¿¡´Â HTML (À»)¸¦ ÆÛ½º ÇÏ´Â ±â´Éµµ ÀÖ½À´Ï´Ù. ´ÙÀ½°ú °°ÀÌ Çϰí,HTML ÆÄÀÏÀ̳ª ¹®ÀÚ¿·ÎºÎÅÍ ¹®¼¸¦ Á÷Á¢ ÀÛ¼ºÇÒ ¼ö ÀÖ½À´Ï´Ù.
<?php
$doc = Zend_Search_Lucene_Document_Html::loadHTMLFile($filename);
$index->addDocument($doc);
...
$doc = Zend_Search_Lucene_Document_Html::loadHTML($htmlString);
$index->addDocument($doc);
Zend_Search_Lucene_Document_Html
Ŭ·¡½º´Â,
DOMDocument::loadHTML()
¹×
DOMDocument::loadHTMLFile()
¸Þ¼Òµå¸¦ ÀÌ¿ëÇØ ¼Ò½º HTML
(À»)¸¦ ÆÛ½º Çϰí ÀÖ½À´Ï´Ù.Áï, ´ë»óÀÌ µÈ´Ù HTML (Àº)´Â Á¤Çü½ÄÀÏ ÇÊ¿ä´Â ¾ø°í,
¶Ç XHTML ÀÏ Çʿ䵵 ¾ø½À´Ï´Ù.ÇÑÆí, Çì´õÀÇ "meta http-equiv"
ű׷Î, ÀÎÄÚµùÀ» Á¦´ë·Î ¼³Á¤ÇØ µÑ Çʿ䰡 ÀÖ½À´Ï´Ù.
Zend_Search_Lucene_Document_Html
Ŭ·¡½º´Â,
¹®¼ÀÇ Å¸ÀÌÆ², º»¹® ±×¸®°í Çì´õÀÇ meta ű×ÀÇ ³»¿ëÀ» ÀνÄÇÕ´Ï´Ù.
'title' Çʵ忡´Â /html/head/title ÀÇ °ªÀÌ µé¾î¿É´Ï´Ù. À̰ÍÀº À妽º ¾ø´Â ÅäÅ«ÈÇØ º¸Á¸µÇ¾î °Ë»öÀÇ ´ë»óÀÌ µË´Ï´Ù.
'body' Çʵ忡´Â body ÀÇ ³»¿ëÀÌ µé¾î°©´Ï´Ù. ½ºÅ©¸³Æ®³ª ÄÚ¸àÆ®, ±×¸®°í ű×ÀÇ ¼Ó¼ºÀº Æ÷ÇÔµÇÁö ¾Ê½À´Ï´Ù.
Zend_Search_Lucene_Document_Html
Ŭ·¡½ºÀÇ
loadHTML()
¹× loadHTMLFile()
¸Þ¼Òµå¿¡´Â,
¿É¼ÇÀÇ µÎ¹øÂ°ÀÇ Àμöµµ ÀÖ½À´Ï´Ù.À̰ÍÀ» true (À¸)·Î ¼³Á¤Çϸé,
body ÀÇ ³»¿ëµµ À妽º¿¡ °Ý³³µÇ¾î À妽º·ÎºÎÅÍ ÃëµæÇÒ ¼ö ÀÖ°Ô µË´Ï´Ù.
body (Àº)´Â ÅäÅ«È¿Í À妽ºÈ¸¸À» ÇØ µðÆúÆ®¿¡¼´Â º¸Á¸µÇÁö ¾Ê½À´Ï´Ù.
¹®¼ Çì´õÀÇ meta ű×ÀÇ ³»¿ëÀ» ±âÃÊ·Î, Ãß°¡ÀÇ Çʵ带 ÀÛ¼ºÇÕ´Ï´Ù. ÇʵåÀÇ À̸§Àº 'name' ¼Ó¼ºÀ¸·ÎºÎÅÍ ÃëµæÇÕ´Ï´Ù.±×¸®°í 'content' ¼Ó¼ºÀÇ ³»¿ëÀÌ ±× °ªÀÌ µË´Ï´Ù.À̰ÍÀº ÅäÅ«È, À妽ºÈÇÑ ´ÙÀ½ º¸Á¸µË´Ï´Ù.Áï, ¹®¼´Â meta ű×ÀÇ ³»¿ëÀ» ±âÃÊ·Î ÇØ¼ (¿¹¸¦ µé¾î Ű¿öµå¿¡ ÀÇÇØ¼) °Ë»öÇÒ ¼ö ÀÖ°Ô µÇ´Â °ÍÀÔ´Ï´Ù.
ÆÛ½º µÈ ¹®¼¿¡, À¯Àú°¡ ´Ù¸¥ Çʵ带 È®ÀåÇÒ ¼ö ÀÖ½À´Ï´Ù.
<?php
$doc = Zend_Search_Lucene_Document_Html::loadHTML($htmlString);
$doc->addField(Zend_Search_Lucene_Field::UnIndexed('created',
time()));
$doc->addField(Zend_Search_Lucene_Field::UnIndexed('updated',
time()));
$doc->addField(Zend_Search_Lucene_Field::Text('annotation',
'Document annotation text'));
$index->addDocument($doc);
ÀÛ¼ºµÈ ¹®¼¿¡´Â ¸µÅ©´Â Æ÷ÇÔµÇÁö ¾Ê½À´Ï´Ù.±×·¯³ª,
Zend_Search_Lucene_Document_Html::getLinks()
¹×
Zend_Search_Lucene_Document_Html::getHeaderLinks()
¸Þ¼Òµå·Î ¸µÅ©¸¦ ÃëµæÇÒ ¼öµµ ÀÖ½À´Ï´Ù.
<?php
$doc = Zend_Search_Lucene_Document_Html::loadHTML($htmlString);
$linksArray = $doc->getLinks();
$headerLinksArray = $doc->getHeaderLinks();
[6] ÇöÀç ¼Æ÷Æ®Çϰí ÀÖ´Â °ÍÀº, ´Ü¾î °Ë»ö ¹× º¹¼ö ´Ü¾î °Ë»ö, ±×¸®°í ÇÁ·¹ÀÌÁî °Ë»ö, ³í¸®½Ä ¹× ¼ºê Äõ¸®ÀÔ´Ï´Ù.
[7] Lucene 2.1 À妽º Æ÷¸Ë (À̰ÍÀº Lucene 2.2 ±×·¸Áö¸¸ ÀÌ¿ëµÇ°í ÀÖ½À´Ï´Ù) ¿¡ ´ëÀÀÇϰí ÀÖ´Â °ÍÀº, ÇöÀçÀÇ "trunk" ºê·£Ä¡¿¡ Á¸ÀçÇÕ´Ï´Ù. ÀÌ ¹öÁ¯Àº,SVN ¸®Æ÷ÁöÅ͸®(repository) ȤÀº ÃÖ½ÅÀÇ nightly snapshot·ÎºÎÅÍ ÃëµæÇÒ ¼ö ÀÖ½À´Ï´Ù.
Lucene 2.1 À妽º Æ÷¸ËÀÇ ¼Æ÷Æ®´Â ZF 1.1.0 ¿¡ Â¥³Ö¾îÁú ¿¹Á¤ÀÔ´Ï´Ù.ÇöÀçÀÇ ¸±¸®½ºÆÇ (ZF V1.0.2) ÇÏ Lucene 1.9-2.0 ¿¡ ´ëÀÀÇϰí ÀÖ½À´Ï´Ù.