001 package org.maltparser.core.syntaxgraph.reader; 002 003 import java.io.File; 004 import java.util.HashMap; 005 006 import org.maltparser.core.config.ConfigurationDir; 007 import org.maltparser.core.exception.MaltChainedException; 008 import org.maltparser.core.flow.FlowChartInstance; 009 import org.maltparser.core.flow.item.ChartItem; 010 import org.maltparser.core.flow.spec.ChartItemSpecification; 011 import org.maltparser.core.helper.Util; 012 import org.maltparser.core.io.dataformat.DataFormatException; 013 import org.maltparser.core.io.dataformat.DataFormatInstance; 014 import org.maltparser.core.io.dataformat.DataFormatManager; 015 import org.maltparser.core.options.OptionManager; 016 import org.maltparser.core.symbol.SymbolTableHandler; 017 import org.maltparser.core.syntaxgraph.TokenStructure; 018 019 public class ReadChartItem extends ChartItem { 020 private String idName; 021 private String inputFormatName; 022 private String inputFileName; 023 private String inputCharSet; 024 private String readerOptions; 025 private int iterations; 026 private Class<? extends SyntaxGraphReader> graphReaderClass; 027 028 private String nullValueStrategy; 029 private String rootLabels; 030 031 private SyntaxGraphReader reader; 032 private String targetName; 033 private String optiongroupName; 034 private DataFormatInstance inputDataFormatInstance; 035 private TokenStructure cachedGraph = null; 036 037 public ReadChartItem() { super(); } 038 039 public void initialize(FlowChartInstance flowChartinstance, ChartItemSpecification chartItemSpecification) throws MaltChainedException { 040 super.initialize(flowChartinstance, chartItemSpecification); 041 042 for (String key : chartItemSpecification.getChartItemAttributes().keySet()) { 043 if (key.equals("id")) { 044 idName = chartItemSpecification.getChartItemAttributes().get(key); 045 } else if (key.equals("target")) { 046 targetName = chartItemSpecification.getChartItemAttributes().get(key); 047 } else if (key.equals("optiongroup")) { 048 optiongroupName = chartItemSpecification.getChartItemAttributes().get(key); 049 } 050 } 051 052 if (idName == null) { 053 idName = getChartElement("read").getAttributes().get("id").getDefaultValue(); 054 } else if (targetName == null) { 055 targetName = getChartElement("read").getAttributes().get("target").getDefaultValue(); 056 } else if (optiongroupName == null) { 057 optiongroupName = getChartElement("read").getAttributes().get("optiongroup").getDefaultValue(); 058 } 059 060 setInputFormatName(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "format").toString()); 061 setInputFileName(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "infile").toString()); 062 setInputCharSet(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "charset").toString()); 063 setReaderOptions(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "reader_options").toString()); 064 if (OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "iterations") != null) { 065 setIterations((Integer)OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "iterations")); 066 } else { 067 setIterations(1); 068 } 069 setSyntaxGraphReaderClass((Class<?>)OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "reader")); 070 071 setNullValueStrategy(OptionManager.instance().getOptionValue(getOptionContainerIndex(), "singlemalt", "null_value").toString()); 072 setRootLabels(OptionManager.instance().getOptionValue(getOptionContainerIndex(), "graph", "root_label").toString()); 073 074 075 initInput(getNullValueStrategy(), getRootLabels()); 076 initReader(getSyntaxGraphReaderClass(), getInputFileName(), getInputCharSet(), getReaderOptions(), iterations); 077 } 078 079 public int preprocess(int signal) throws MaltChainedException { 080 return signal; 081 } 082 083 public int process(int signal) throws MaltChainedException { 084 if (cachedGraph == null) { 085 cachedGraph = (TokenStructure)flowChartinstance.getFlowChartRegistry(org.maltparser.core.syntaxgraph.TokenStructure.class, targetName); 086 } 087 int prevIterationCounter = reader.getIterationCounter(); 088 boolean moreInput = reader.readSentence(cachedGraph); 089 // System.out.println(cachedGraph); 090 // System.exit(1); 091 if (!moreInput) { 092 return ChartItem.TERMINATE; 093 } else if (prevIterationCounter < reader.getIterationCounter()) { 094 return ChartItem.NEWITERATION; 095 } 096 return ChartItem.CONTINUE; 097 // return continueNextSentence && moreInput; 098 } 099 100 public int postprocess(int signal) throws MaltChainedException { 101 return signal; 102 } 103 104 public void terminate() throws MaltChainedException { 105 if (reader != null) { 106 reader.close(); 107 reader = null; 108 } 109 cachedGraph = null; 110 inputDataFormatInstance = null; 111 } 112 113 public String getInputFormatName() { 114 if (inputFormatName == null) { 115 return "/appdata/dataformat/conllx.xml"; 116 } 117 return inputFormatName; 118 } 119 120 public void setInputFormatName(String inputFormatName) { 121 this.inputFormatName = inputFormatName; 122 } 123 124 public String getInputFileName() { 125 if (inputFileName == null) { 126 return "/dev/stdin"; 127 } 128 return inputFileName; 129 } 130 131 public void setInputFileName(String inputFileName) { 132 this.inputFileName = inputFileName; 133 } 134 135 public String getInputCharSet() { 136 if (inputCharSet == null) { 137 return "UTF-8"; 138 } 139 return inputCharSet; 140 } 141 142 public void setInputCharSet(String inputCharSet) { 143 this.inputCharSet = inputCharSet; 144 } 145 146 public String getReaderOptions() { 147 if (readerOptions == null) { 148 return ""; 149 } 150 return readerOptions; 151 } 152 153 public void setReaderOptions(String readerOptions) { 154 this.readerOptions = readerOptions; 155 } 156 157 158 public int getIterations() { 159 return iterations; 160 } 161 162 public void setIterations(int iterations) { 163 this.iterations = iterations; 164 } 165 166 public Class<? extends SyntaxGraphReader> getSyntaxGraphReaderClass() { 167 return graphReaderClass; 168 } 169 170 public void setSyntaxGraphReaderClass(Class<?> graphReaderClass) throws MaltChainedException { 171 try { 172 if (graphReaderClass != null) { 173 this.graphReaderClass = graphReaderClass.asSubclass(org.maltparser.core.syntaxgraph.reader.SyntaxGraphReader.class); 174 } 175 } catch (ClassCastException e) { 176 throw new DataFormatException("The class '"+graphReaderClass.getName()+"' is not a subclass of '"+org.maltparser.core.syntaxgraph.reader.SyntaxGraphReader.class.getName()+"'. ", e); 177 } 178 } 179 180 public String getNullValueStrategy() { 181 if (nullValueStrategy == null) { 182 return "one"; 183 } 184 return nullValueStrategy; 185 } 186 187 public void setNullValueStrategy(String nullValueStrategy) { 188 this.nullValueStrategy = nullValueStrategy; 189 } 190 191 public String getRootLabels() { 192 if (nullValueStrategy == null) { 193 return "ROOT"; 194 } 195 return rootLabels; 196 } 197 198 public void setRootLabels(String rootLabels) { 199 this.rootLabels = rootLabels; 200 } 201 202 203 public String getTargetName() { 204 return targetName; 205 } 206 207 public void setTargetName(String targetName) { 208 this.targetName = targetName; 209 } 210 211 public SyntaxGraphReader getReader() { 212 return reader; 213 } 214 215 public DataFormatInstance getInputDataFormatInstance() { 216 return inputDataFormatInstance; 217 } 218 219 public void initInput(String nullValueStategy, String rootLabels) throws MaltChainedException { 220 ConfigurationDir configDir = (ConfigurationDir)flowChartinstance.getFlowChartRegistry(org.maltparser.core.config.ConfigurationDir.class, idName); 221 DataFormatManager dataFormatManager = configDir.getDataFormatManager(); 222 // DataFormatManager dataFormatManager = flowChartinstance.getDataFormatManager(); 223 SymbolTableHandler symbolTables = configDir.getSymbolTables(); 224 // SymbolTableHandler symbolTables = flowChartinstance.getSymbolTables(); 225 HashMap<String, DataFormatInstance> dataFormatInstances = configDir.getDataFormatInstances(); 226 // HashMap<String, DataFormatInstance> dataFormatInstances = flowChartinstance.getDataFormatInstances(); 227 228 inputDataFormatInstance = dataFormatManager.getInputDataFormatSpec().createDataFormatInstance(symbolTables, nullValueStategy, rootLabels); 229 if (!dataFormatInstances.containsKey(dataFormatManager.getInputDataFormatSpec().getDataFormatName())) { 230 dataFormatInstances.put(dataFormatManager.getInputDataFormatSpec().getDataFormatName(), inputDataFormatInstance); 231 } 232 } 233 234 public void initReader(Class<? extends SyntaxGraphReader> syntaxGraphReader, String inputFile, String inputCharSet, String readerOptions, int iterations) throws MaltChainedException { 235 try { 236 reader = syntaxGraphReader.newInstance(); 237 if (inputFile == null || inputFile.length() == 0 || inputFile.equals("/dev/stdin")) { 238 reader.open(System.in, inputCharSet); 239 } else if (new File(inputFile).exists()) { 240 reader.setNIterations(iterations); 241 reader.open(inputFile, inputCharSet); 242 } else { 243 reader.setNIterations(iterations); 244 reader.open(Util.findURL(inputFile), inputCharSet); 245 } 246 reader.setDataFormatInstance(inputDataFormatInstance); 247 reader.setOptions(readerOptions); 248 } catch (InstantiationException e) { 249 throw new DataFormatException("The data reader '"+syntaxGraphReader.getName()+"' cannot be initialized. ", e); 250 } catch (IllegalAccessException e) { 251 throw new DataFormatException("The data reader '"+syntaxGraphReader.getName()+"' cannot be initialized. ", e); 252 } 253 } 254 255 public boolean equals(Object obj) { 256 if (this == obj) 257 return true; 258 if (obj == null) 259 return false; 260 if (getClass() != obj.getClass()) 261 return false; 262 return obj.toString().equals(this.toString()); 263 } 264 265 public int hashCode() { 266 return 217 + (null == toString() ? 0 : toString().hashCode()); 267 } 268 269 public String toString() { 270 final StringBuilder sb = new StringBuilder(); 271 sb.append(" read "); 272 sb.append("id:");sb.append(idName); 273 sb.append(' '); 274 sb.append("target:"); 275 sb.append(targetName); 276 sb.append(' '); 277 sb.append("optiongroup:"); 278 sb.append(optiongroupName); 279 return sb.toString(); 280 } 281 }