001 package org.maltparser.core.helper; 002 003 004 import java.io.BufferedInputStream; 005 import java.io.BufferedOutputStream; 006 import java.io.File; 007 import java.io.FileInputStream; 008 import java.io.FileNotFoundException; 009 import java.io.FileOutputStream; 010 import java.io.IOException; 011 import java.io.InputStream; 012 import java.net.MalformedURLException; 013 import java.net.URL; 014 015 import org.apache.log4j.Logger; 016 import org.maltparser.core.config.ConfigurationException; 017 import org.maltparser.core.exception.MaltChainedException; 018 import org.maltparser.core.plugin.Plugin; 019 import org.maltparser.core.plugin.PluginLoader; 020 021 /** 022 * 023 * 024 * @author Johan Hall 025 */ 026 public class Util { 027 private static final int BUFFER = 4096; 028 private static final char AMP_CHAR = '&'; 029 private static final char LT_CHAR = '<'; 030 private static final char GT_CHAR = '>'; 031 private static final char QUOT_CHAR = '"'; 032 private static final char APOS_CHAR = '\''; 033 034 public static String xmlEscape(String str) { 035 boolean needEscape = false; 036 char c; 037 for (int i = 0; i < str.length(); i++) { 038 c = str.charAt(i); 039 if (c == AMP_CHAR || c == LT_CHAR || c == GT_CHAR || c == QUOT_CHAR || c == APOS_CHAR) { 040 needEscape = true; 041 break; 042 } 043 } 044 if (!needEscape) { 045 return str; 046 } 047 final StringBuilder sb = new StringBuilder(); 048 for (int i = 0; i < str.length(); i++) { 049 c = str.charAt(i); 050 if (str.charAt(i) == AMP_CHAR) { 051 sb.append("&"); 052 } else if ( str.charAt(i) == LT_CHAR) { 053 sb.append("<"); 054 } else if (str.charAt(i) == GT_CHAR) { 055 sb.append(">"); 056 } else if (str.charAt(i) == QUOT_CHAR) { 057 sb.append("""); 058 } else if (str.charAt(i) == APOS_CHAR) { 059 sb.append("'"); 060 } else { 061 sb.append(c); 062 } 063 } 064 return sb.toString(); 065 } 066 067 /** 068 * Search for a file according the following priority: 069 * <ol> 070 * <li>The local file system 071 * <li>Specified as an URL (starting with http:, file:, ftp: or jar: 072 * <li>MaltParser distribution file (malt.jar) 073 * <li>MaltParser plugins 074 * </ol> 075 * 076 * If the file string is found, an URL object is returned, otherwise <b>null</b> 077 * 078 * @param fileString the file string to convert into an URL. 079 * @return an URL object, if the file string is found, otherwise <b>null</b> 080 * @throws MaltChainedException 081 */ 082 public static URL findURL(String fileString) throws MaltChainedException { 083 File specFile = new File(fileString); 084 085 try { 086 if (specFile.exists()) { 087 // found the file in the file system 088 return new URL("file:///"+specFile.getAbsolutePath()); 089 } else if (fileString.startsWith("http:") || fileString.startsWith("file:") || fileString.startsWith("ftp:") || fileString.startsWith("jar:")) { 090 // the input string is an URL string starting with http, file, ftp or jar 091 return new URL(fileString); 092 } else { 093 return findURLinJars(fileString); 094 } 095 } catch (MalformedURLException e) { 096 throw new MaltChainedException("Malformed URL: "+fileString, e); 097 } 098 } 099 100 public static URL findURLinJars(String fileString) throws MaltChainedException { 101 try { 102 // search in malt.jar and its plugins 103 if (Thread.currentThread().getClass().getResource(fileString) != null) { 104 // found the input string in the malt.jar file 105 return Thread.currentThread().getClass().getResource(fileString); 106 } else { 107 for (Plugin plugin : PluginLoader.instance()) { 108 URL url = null; 109 if (!fileString.startsWith("/")) { 110 url = new URL("jar:"+plugin.getUrl() + "!/" + fileString); 111 } else { 112 url = new URL("jar:"+plugin.getUrl() + "!" + fileString); 113 } 114 115 try { 116 InputStream is = url.openStream(); 117 is.close(); 118 } catch (IOException e) { 119 continue; 120 } 121 // found the input string in one of the plugins 122 return url; 123 } 124 // could not convert the input string into an URL 125 return null; 126 } 127 } catch (MalformedURLException e) { 128 throw new MaltChainedException("Malformed URL: "+fileString, e); 129 } 130 } 131 132 public static int simpleTicer(Logger logger, long startTime, int nTicxRow, int inTic, int subject) { 133 logger.info("."); 134 int tic = inTic + 1; 135 if (tic >= nTicxRow) { 136 ticInfo(logger, startTime, subject); 137 tic = 0; 138 } 139 return tic; 140 } 141 142 public static void startTicer(Logger logger, long startTime, int nTicxRow, int subject) { 143 logger.info("."); 144 for (int i = 1; i <= nTicxRow; i++) { 145 logger.info(" "); 146 } 147 ticInfo(logger, startTime, subject); 148 } 149 150 public static void endTicer(Logger logger, long startTime, int nTicxRow, int inTic, int subject) { 151 for (int i = inTic; i <= nTicxRow; i++) { 152 logger.info(" "); 153 } 154 ticInfo(logger, startTime, subject); 155 } 156 157 private static void ticInfo(Logger logger, long startTime, int subject) { 158 logger.info("\t"); 159 int a = 1000000; 160 if (subject != 0) { 161 while (subject/a == 0) { 162 logger.info(" "); 163 a /= 10; 164 } 165 } else { 166 logger.info(" "); 167 } 168 logger.info(subject); 169 logger.info("\t"); 170 long time = (System.currentTimeMillis()-startTime)/1000; 171 a = 1000000; 172 if (time != 0) { 173 while (time/a == 0 ) { 174 logger.info(" "); 175 a /= 10; 176 } 177 logger.info(time); 178 logger.info("s"); 179 } else { 180 logger.info(" 0s"); 181 } 182 logger.info("\t"); 183 long memory = (Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory())/1000000; 184 a = 1000000; 185 if (memory != 0) { 186 while (memory/a == 0 ) { 187 logger.info(" "); 188 a /= 10; 189 } 190 logger.info(memory); 191 logger.info("MB\n"); 192 } else { 193 logger.info(" 0MB\n"); 194 } 195 } 196 197 public static void copyfile(String source, String destination) throws MaltChainedException { 198 try { 199 byte[] readBuffer = new byte[BUFFER]; 200 BufferedInputStream bis = new BufferedInputStream(new FileInputStream(source)); 201 BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(destination), BUFFER); 202 int n = 0; 203 while ((n = bis.read(readBuffer, 0, BUFFER)) != -1) { 204 bos.write(readBuffer, 0, n); 205 } 206 bos.flush(); 207 bos.close(); 208 bis.close(); 209 } catch (FileNotFoundException e) { 210 throw new MaltChainedException("The destination file '"+destination+"' cannot be created when coping the file. ", e); 211 } catch (IOException e) { 212 throw new MaltChainedException("The source file '"+source+"' cannot be copied to destination '"+destination+"'. ", e); 213 } 214 } 215 216 }