001 package org.maltparser; 002 003 import java.net.MalformedURLException; 004 import java.net.URL; 005 import java.util.Iterator; 006 007 import org.maltparser.core.exception.MaltChainedException; 008 import org.maltparser.core.flow.FlowChartInstance; 009 import org.maltparser.core.helper.SystemInfo; 010 import org.maltparser.core.helper.Util; 011 import org.maltparser.core.io.dataformat.ColumnDescription; 012 import org.maltparser.core.io.dataformat.DataFormatInstance; 013 import org.maltparser.core.options.OptionManager; 014 import org.maltparser.core.syntaxgraph.DependencyGraph; 015 import org.maltparser.core.syntaxgraph.DependencyStructure; 016 import org.maltparser.core.syntaxgraph.node.DependencyNode; 017 import org.maltparser.parser.SingleMalt; 018 019 /** 020 * The purpose of MaltParserService is to easily write third-party programs that uses MaltParser. 021 * 022 * There are two ways to call the MaltParserService: 023 * 1. By running experiments, which allow other programs to train a parser model or parse with a parser model. IO-handling is done by MaltParser. 024 * 2. By first initialize a parser model and then call the method parse() with an array of tokens that MaltParser parses. IO-handling of the sentence is 025 * done by the third-party program. 026 * 027 * How to use MaltParserService, please see the examples provided in the directory 'examples/apiexamples/srcex' 028 * 029 * @author Johan Hall 030 */ 031 public class MaltParserService { 032 private URL urlMaltJar; 033 private Engine engine; 034 private FlowChartInstance flowChartInstance; 035 private DataFormatInstance dataFormatInstance; 036 private SingleMalt singleMalt; 037 private int optionContainer; 038 private boolean initialized = false; 039 040 /** 041 * Creates a MaltParserService with the option container 0 042 * 043 * @throws MaltChainedException 044 */ 045 public MaltParserService() throws MaltChainedException { 046 this(0); 047 } 048 049 /** 050 * Creates a MaltParserService with the specified option container. To use different option containers allows the calling program 051 * to load several parser models or several experiments. The option management in MaltParser uses the singleton design pattern, which means that there can only 052 * be one instance of the option manager. To be able to have several parser models or experiments at same time please use different option containers. 053 * 054 * @param optionContainer an integer from 0 to max value of data type Integer 055 * @throws MaltChainedException 056 */ 057 public MaltParserService(int optionContainer) throws MaltChainedException { 058 initialize(); 059 setOptionContainer(optionContainer); 060 } 061 062 /** 063 * Runs a MaltParser experiment. The experiment is controlled by a commandLine string, please see the documentation of MaltParser to see all available options. 064 * 065 * @param commandLine a commandLine string that controls the MaltParser. 066 * @throws MaltChainedException 067 */ 068 public void runExperiment(String commandLine) throws MaltChainedException { 069 OptionManager.instance().parseCommandLine(commandLine, optionContainer); 070 engine = new Engine(); 071 engine.initialize(optionContainer); 072 engine.process(optionContainer); 073 engine.terminate(optionContainer); 074 } 075 076 /** 077 * Initialize a parser model that later can by used to parse sentences. MaltParser is controlled by a commandLine string, please see the documentation of MaltParser to see all available options. 078 * 079 * @param commandLine a commandLine string that controls the MaltParser 080 * @throws MaltChainedException 081 */ 082 public void initializeParserModel(String commandLine) throws MaltChainedException { 083 OptionManager.instance().parseCommandLine(commandLine, optionContainer); 084 // Creates an engine 085 engine = new Engine(); 086 // Initialize the engine with option container and gets a flow chart instance 087 flowChartInstance = engine.initialize(optionContainer); 088 // Runs the preprocess chart items of the "parse" flow chart 089 if (flowChartInstance.hasPreProcessChartItems()) { 090 flowChartInstance.preprocess(); 091 } 092 singleMalt = (SingleMalt)flowChartInstance.getFlowChartRegistry(org.maltparser.parser.SingleMalt.class, "singlemalt"); 093 singleMalt.getConfigurationDir().initDataFormat(); 094 dataFormatInstance = singleMalt.getConfigurationDir().getDataFormatManager().getInputDataFormatSpec().createDataFormatInstance( 095 singleMalt.getSymbolTables(), 096 OptionManager.instance().getOptionValueString(optionContainer, "singlemalt", "null_value"), 097 OptionManager.instance().getOptionValueString(optionContainer, "graph", "root_label")); 098 initialized = true; 099 } 100 101 /** 102 * Parses an array of tokens and returns a dependency structure. 103 * 104 * Note: To call this method requires that a parser model has been initialized by using the initializeParserModel(). 105 * 106 * @param tokens an array of tokens 107 * @return a dependency structure 108 * @throws MaltChainedException 109 */ 110 public DependencyStructure parse(String[] tokens) throws MaltChainedException { 111 if (!initialized) { 112 throw new MaltChainedException("No parser model has been initialized. Please use the method initializeParserModel() before invoking this method."); 113 } 114 if (tokens == null || tokens.length == 0) { 115 throw new MaltChainedException("Nothing to parse. "); 116 } 117 118 DependencyStructure outputGraph = new DependencyGraph(singleMalt.getSymbolTables()); 119 120 for (int i = 0; i < tokens.length; i++) { 121 Iterator<ColumnDescription> columns = dataFormatInstance.iterator(); 122 DependencyNode node = outputGraph.addDependencyNode(i+1); 123 String[] items = tokens[i].split("\t"); 124 for (int j = 0; j < items.length; j++) { 125 if (columns.hasNext()) { 126 ColumnDescription column = columns.next(); 127 if (column.getCategory() == ColumnDescription.INPUT && node != null) { 128 outputGraph.addLabel(node, column.getName(), items[j]); 129 } 130 } 131 } 132 } 133 outputGraph.setDefaultRootEdgeLabel(outputGraph.getSymbolTables().getSymbolTable("DEPREL"), "ROOT"); 134 // Invoke parse with the output graph 135 singleMalt.parse(outputGraph); 136 return outputGraph; 137 } 138 139 /** 140 * Terminates the parser model. 141 * 142 * @throws MaltChainedException 143 */ 144 public void terminateParserModel() throws MaltChainedException { 145 // Runs the postprocess chart items of the "parse" flow chart 146 if (flowChartInstance.hasPostProcessChartItems()) { 147 flowChartInstance.postprocess(); 148 } 149 150 // Terminate the flow chart with an option container 151 engine.terminate(optionContainer); 152 } 153 154 private void initialize() throws MaltChainedException { 155 if (OptionManager.instance().getOptionDescriptions().getOptionGroupNameSet().size() > 0) { 156 return; // OptionManager is already initialized 157 } 158 String maltpath = getMaltJarPath(); 159 if (maltpath == null) { 160 new MaltChainedException("malt.jar could not be found. "); 161 } 162 urlMaltJar = Util.findURL(maltpath); 163 try { 164 OptionManager.instance().loadOptionDescriptionFile(new URL("jar:"+urlMaltJar.toString()+"!/appdata/options.xml")); 165 166 } catch (MalformedURLException e) { 167 throw new MaltChainedException("MaltParser couldn't find its options 'malt.jar!/appdata/options.xml'", e); 168 } 169 OptionManager.instance().generateMaps(); 170 } 171 172 173 /** 174 * Returns the option container index 175 * 176 * @return the option container index 177 */ 178 public int getOptionContainer() { 179 return optionContainer; 180 } 181 182 private void setOptionContainer(int optionContainer) { 183 this.optionContainer = optionContainer; 184 } 185 186 /** 187 * Returns the path of malt.jar file 188 * 189 * @return the path of malt.jar file 190 */ 191 public static String getMaltJarPath() { 192 if (SystemInfo.getMaltJarPath() != null) { 193 return SystemInfo.getMaltJarPath().toString(); 194 } 195 return null; 196 } 197 198 199 }