From aaf62ed933a9d34b7ce15430ef2565b92bcee22a Mon Sep 17 00:00:00 2001 From: Jian Wu Date: Fri, 22 Apr 2016 16:42:30 -0400 Subject: [PATCH] allow searching non-ascii characters by encoding queries in UTF-8 --- .../edu/psu/citeseerx/web/SearchController.java | 44 ++++++++++++++++++++-- 1 file changed, 41 insertions(+), 3 deletions(-) diff --git a/src/java/edu/psu/citeseerx/web/SearchController.java b/src/java/edu/psu/citeseerx/web/SearchController.java index 53dd569..3586a30 100644 --- a/src/java/edu/psu/citeseerx/web/SearchController.java +++ b/src/java/edu/psu/citeseerx/web/SearchController.java @@ -212,7 +212,7 @@ public void setSystemBaseURL(String systemBaseURL) { * All parameters will have a valid value to be used by the other methods. */ private Map collectQueryParam( - HttpServletRequest request) { + HttpServletRequest request) throws UnsupportedEncodingException { Map queryParameters = new HashMap(); String value = null; @@ -221,6 +221,7 @@ public void setSystemBaseURL(String systemBaseURL) { QUERY_PARAMETER, null); if (quest_organic != null) { queststr = Jsoup.clean(quest_organic, Whitelist.none()); + } queryParameters.put(QUERY_PARAMETER, queststr); @@ -267,6 +268,9 @@ private String buildSolrQuery(String queryType, if (queryType.equals(DOCUMENT_QUERY)) { + // encode query string, neglecting chars before the first ":" + query = encodeQuery(query); + // Searching for authors within documents if (queryParameters.get(QUERY_TYPE).equals(AUTHOR_QUERY) && !queryParameters.get(UAUTH).equals(UAUTHSET)) { @@ -482,13 +486,23 @@ private ModelAndView doGeneralSearch(Map queryParameters) { int code = e.getStatusCode(); if (code == 400) { errMsg = "Invalid query type. " + - "Please check your syntax."; + "Please check your syntax." + + " Query: " + solrQuery + + "Error: "; + for (StackTraceElement s : e.getStackTrace()) { + errMsg += s.toString()+"\n"; + } + } else { errMsg = "

Error processing query.

" + "

The most likely cause of this condition " + "is a malformed query. Please check your query " + "syntax and, if the problem persists, " + - "contact an admin for assistance.

"; + "contact an admin for assistance.

" + + "Query: " + solrQuery + "\n"; + for (StackTraceElement s : e.getStackTrace()) { + errMsg += s.toString()+"\n"; + } } System.err.println("Query: " + solrQuery); e.printStackTrace(); @@ -869,6 +883,30 @@ else if( t.equals(ALGORITHM_QUERY)) { } //- handleRequest + /* only encode the text part of the query, e.g., if the query is + author: lee giles, encode "lee giles" only + */ + private static String encodeQuery(String q) { + String encodedString = null; + if (q.contains(":")) { + try { + String[] qs = q.split(":",2); + qs[1] = URLEncoder.encode(qs[1],"UTF-8"); + encodedString = qs[0]+":"+qs[1]; + } catch (UnsupportedEncodingException e) { + return null; + } + } else { + try { + encodedString = URLEncoder.encode(q,"UTF-8"); + } catch (UnsupportedEncodingException e) { + return null; + } + } + return encodedString; + + } //- encodeQuery + private static String normalizeQuery(String q) { q = q.replaceAll("author\\:", "authorNorms:"); return q;