import java.util.regex.Matcher; import java.util.regex.Pattern; /** * path路径过滤字符串帮助类 */ public class CleanPathUtil { public static String cleanString(String path) { if (path == null){ return null; } StringBuilder cleanString = new StringBuilder(); for (int i = 0; i < path.length(); ++i) { cleanString.append(cleanChar(path.charAt(i))); } return cleanString.toString(); } private static char cleanChar(char aChar) { // 判断验证是否是汉字或者0-9、a-z、A-Z if (isRightChar(aChar)){ return aChar; } // 其他字符 switch (aChar) { case '/': return '/'; case '\\': return '\\'; case ':': return ':'; case '.': return '.'; case '-': return '-'; case '_': return '_'; case '%': return '%'; case '&': return '&'; case '=': return '='; case '?': return '?'; default: return ' '; } } /** * 判断字符串中是否包含中文 * 不能校验是否为中文标点符号 * @param str 待校验字符串 * @return 是否为中文 */ public static boolean isContainChinese(String str) { Pattern p = Pattern.compile("[\u4e00-\u9fa5]"); Matcher m = p.matcher(str); return m.find(); } /** * 过滤掉中文 * @param str 待过滤中文的字符串 * @return 过滤掉中文后字符串 */ public static String filterChinese(String str) { // 用于返回结果 String result = str; boolean flag = isContainChinese(str); if (flag) {// 包含中文 // 用于拼接过滤中文后的字符 StringBuffer sb = new StringBuffer(); // 用于校验是否为中文 boolean flag2 = false; // 用于临时存储单字符 char chinese = 0; // 5.去除掉文件名中的中文 // 将字符串转换成char[] char[] charArray = str.toCharArray(); // 过滤到中文及中文字符 for (char c : charArray) { chinese = c; flag2 = isChinese(chinese); if (!flag2) {// 不是中日韩文字及标点符号 sb.append(chinese); } } result = sb.toString(); } return result; } /** * 校验一个字符是否是汉字 * @param c 被校验的字符 * @return true代表是汉字 */ public static boolean isChineseChar(char c) { try { return String.valueOf(c).getBytes("UTF-8").length > 1; } catch (Exception e) { e.printStackTrace(); return false; } } /** * 验证字符串内容是否包含下列非法字符<br> * `~!#%^&*=+\\|{};:'\",<>/?○●★☆☉♀♂※¤╬の〆 * @param content 字符串内容 * @return 't'代表不包含非法字符,otherwise代表包含非法字符。 */ public static char validateLegalString(String content) { String illegal = "`~!#%^&*=+\\|{};:'\",<>/?○●★☆☉♀♂※¤╬の〆"; char isLegalChar = 't'; L1: for (int i = 0; i < content.length(); i++) { for (int j = 0; j < illegal.length(); j++) { if (content.charAt(i) == illegal.charAt(j)) { isLegalChar = content.charAt(i); break L1; } } } return isLegalChar; } /** * 验证是否是汉字或者0-9、a-z、A-Z * @param c 被验证的char * @return true代表符合条件 */ public static boolean isRightChar(char c) { return isChinese(c) || isWord(c); } /** * 校验某个字符是否是a-z、A-Z、_、0-9 * @param c 被校验的字符 * @return true代表符合条件 */ public static boolean isWord(char c) { String regEx = "[\\w]"; Pattern p = Pattern.compile(regEx); Matcher m = p.matcher("" + c); return m.matches(); } /** * 判定输入的是否是汉字 * @param c 被校验的字符 * @return true代表是汉字 */ public static boolean isChinese(char c) { Character.UnicodeBlock ub = Character.UnicodeBlock.of(c); return ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS || ub == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS || ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A || ub == Character.UnicodeBlock.GENERAL_PUNCTUATION || ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION || ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS; } /** * 校验String是否全是中文 * @param name 被校验的字符串 * @return true代表全是汉字 */ public static boolean checkNameChese(String name) { boolean res = true; char[] cTemp = name.toCharArray(); for (int i = 0; i < name.length(); i++) { if (!isChinese(cTemp[i])) { res = false; break; } } return res; } /** * main方法测试 * @param args 入参 */ public static void main(String[] args) { String path = "https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&rsv_idx=1&tn=baidu&wd=%E6%B1%89%E5%AD%97%20%E8%B7%AF%E5%BE%84%E8%BF%87%E6%BB%A4&fenlei=256&oq=%25E8%25B7%25AF%25E5%25BE%2584%25E8%25BF%2587%25E6%25BB%25A4&rsv_pq=b04d9420000bb9bc&rsv_t=9b21H1kF8kegeLOXb7XmsQuwLpD0XMpZ6vyocQ2HhUVNfysvg5TI6p0s2Xk&rqlang=cn&rsv_enter=0&rsv_dl=tb&rsv_btype=t&inputT=202&rsv_sug3=60&rsv_sug1=26&rsv_sug7=100&rsv_sug2=0&rsv_sug4=25744&rsv_sug=1"; String cleanString = CleanPathUtil.cleanString(path); System.out.println(cleanString); } }