关于中文搜索时使用*通配符不起作用的问题
|
cjx186
2008-04-24
本人想实现一个智能提示功能,索引文件已经建立,查询时,发现一个问题,查英文时使用通配符*或者?是可以的,查查中文时加?查不到,加*查得到,但不符合规则是模糊查询,不加也是模糊查询。
package com.aladdin.alagis.suggest;
/***
* @deprecated 2008-4-23
* @author chenjianxiang
* @deprecated 智能提示
*
*/
import java.io.IOException;
import java.io.PrintWriter;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import com.aladdin.alagis.util.ConfigFilePath;
import com.aladdin.util.Escape;
public class Suggest extends javax.servlet.http.HttpServlet implements
javax.servlet.Servlet {
static final long serialVersionUID = 1L;
private static String sTemplateStart = "UBC('<DIV id=\"{key}\" style=\"DISPLAY: none\"></DIV><DIV style=\"Z-INDEX: 12\"><TABLE cellSpacing=\"1\" cellPadding=\"0\" width=\"100%\" align=\"center\" bgColor=\"#979797\" border=\"0\"><TBODY><TR><TD vAlign=\"top\"><TABLE cellSpacing=\"0\" cellPadding=\"0\" width=\"100%\" align=\"center\" border=\"0\"><TBODY>";
private static String sTemplateContent = "<TR svalue=\"{svalue}\" onSelect=\"this.txtBox.value=\\'{svalue}\\'\"><TD class=\"remindtt75\" align=\"left\" bgColor=\"#ffffff\">{svalue}</TD><TD class=\"remindtt752\" align=\"right\" bgColor=\"#ffffff\"></TD></TR>";
private static String sTemplateEnd = "</TBODY></TABLE><TABLE cellSpacing=\"0\" cellPadding=\"0\" width=\"100%\" align=\"center\" border=\"0\" onSelect=\"this.txtBox.value=\\'\\'\"><TBODY><TR><TD bgColor=\"#dddddd\" colSpan=\"2\" height=\"1\"></TD></TR><TR><TD class=\"jstxhuitiaoyou\" align=\"right\" bgColor=\"#ecf0ef\" height=\"17\"><A class=\"jstxlan\" onclick=\"TurnOffSuggest();\">关闭提示功能</A> </TD></TR></TBODY></TABLE></TD></TR></TBODY></TABLE></DIV>')";
private static String sTemplateUnFind ="UBC('<DIV id=\"{key}\" style=\"DISPLAY: none\"></DIV>')";
public Suggest() {
super();
}
protected void doGet(HttpServletRequest request,
HttpServletResponse response) throws ServletException, IOException {
this.doPost(request, response);
}
protected void doPost(HttpServletRequest request,
HttpServletResponse response) throws ServletException, IOException {
String key = Escape.unescape(request.getParameter("key"));
response.setCharacterEncoding("UTF-8");
response.setContentType("text/html; charset=utf-8");
PrintWriter out = response.getWriter();
IndexSearcher searcher = new IndexSearcher(ConfigFilePath.getConfigPath("index"));
Hits hits = null; Query query = null;
QueryParser qp = new QueryParser("word",new StandardAnalyzer());
try{
query = qp.parse(key+"*");
}catch(ParseException e){
e.printStackTrace();
}
hits = searcher.search(query);
int length = hits.length()>15?15:hits.length();
//if none
if(length<1){
out.print(sTemplateUnFind.replaceAll("\\{key}", key));
out.flush();
return;
}
//format
StringBuffer result = new StringBuffer();
for(int i=0;i<length;i++){
String sTemplate="<TR svalue=\"{svalue}\" onSelect=\"this.txtBox.value=\\'{svalue}\\'\"><TD class=\"remindtt75\" align=\"left\" bgColor=\"#ffffff\">{svalue}</TD><TD class=\"remindtt752\" align=\"right\" bgColor=\"#ffffff\"></TD></TR>";
result.append(sTemplate.replaceAll("\\{svalue}", hits.doc(i).get("word")));
}
out.print(sTemplateStart.replaceAll("\\{key}", key)+result.toString()+sTemplateEnd.replaceAll("\\{key}", key));
out.flush();
}
}
|
|
|
licco1
2008-05-04
这个得看分词器的语法了,你是用StandardAnalyzer,我在action里看到保准分词器分析非字母文字是用terrible来形容的。可以看看里面那个cc语法文件
|

