Opennlp-servlet

download Opennlp-servlet

of 3

Transcript of Opennlp-servlet

  • 8/11/2019 Opennlp-servlet

    1/3

    9/3/2014 Opennlp-servlet

    https://gist.github.com/spatzle/1104702 1/3

    Opennlp-servlet

    spatzle/ OpennlpServlet.javaCreated on Jul 25, 2011

    1

    2

    3

    4

    5

    6

    7

    8

    9

    10

    11

    12

    13

    14

    1516

    17

    18

    19

    20

    21

    22

    23

    24

    25

    26

    27

    28

    29

    30

    31

    32

    3334

    35

    36

    37

    38

    39

    40

    41

    42

    43

    44

    45

    46

    47

    48

    49

    50

    51

    52

    53

    54

    55

    56

    57

    58

    59

    60

    61

    62

    63

    64

    65

    66

    67

    6869

    70

    71

    72

    73

    74

    packagecom.example.opennlp;

    importjava.io.*;

    importjava.util.ArrayList;

    importjava.util.Arrays;

    importjava.util.Map;

    importjava.util.TreeSet;

    importjavax.servlet.*;

    importjavax.servlet.http.*;

    importcom.google.gson.Gson;

    importopennlp.tools.dictionary.Dictionary;

    importopennlp.tools.namefind.DictionaryNameFinder;importopennlp.tools.namefind.NameFinderME;

    importopennlp.tools.namefind.TokenNameFinder;

    importopennlp.tools.namefind.TokenNameFinderModel;

    importopennlp.tools.util.Span;

    importopennlp.tools.util.StringList;

    importopennlp.tools.tokenize.TokenizerModel;

    importopennlp.tools.tokenize.TokenizerME;

    /**

    * This is a servlet interface to the Opennlp maxent classifier.

    *

    * @author Joyce Chan 2011

    **/

    publicclassOpennlpServletextendsHttpServlet {

    privateTokenNameFinder f_nameFinder; privateTokenNameFinder r_nameFinder;

    privateTokenizerME tokenizer ;

    privateTokenizerME tokenizer_internal_use ;

    privateTreeSetstopwords =newTreeSet();

    privatefinalString ENGLISH_STOP_WORDS[]={

    "a","an","and","are","as","at","be","but","by",

    "for","if","in","into","is","it",

    "no","not","of","on","or","such",

    "that","the","their","then","there","these",

    "they","this","to","was","will","with"

    };

    privateDictionary r_dictionary;

    privatestaticfinallongserialVersionUID =1L;

    privateTokenizerME getTokenizer(String tokenizer_title)throwsServletException{

    String title;

    String location;

    TokenizerME tokenizer =null;

    // get default classifier

    title =getServletConfig().getInitParameter(tokenizer_title);

    if(title ==null||title.trim().equals(""))thrownewServletException("Default Tokenize

    location =getServletConfig().getInitParameter(title);

    if(location ==null||location.trim().equals(""))thrownewServletException("Tokenizer

    InputStream stream =getServletConfig().getServletContext().getResourceAsStream(location);

    if(stream ==null)thrownewServletException("File not found. Filename = "+location);

    try{

    tokenizer =newTokenizerME(newTokenizerModel(newBufferedInputStream(stream)));

    }catch(IOException e){

    try{

    stream =newFileInputStream(location);

    }catch(IOException e2){

    thrownewServletException("IO problem reading tokenizer A. "+location); }

    thrownewServletException("IO problem reading tokenizer B.");

    }

    finally{

    if(stream !=null){

    try{stream.close();}catch(IOException e){}

    OpennlpServlet.java

    https://gist.github.com/https://gist.github.com/spatzlehttps://gist.github.com/https://gist.github.com/spatzle/1104702https://gist.github.com/spatzlehttps://gist.github.com/
  • 8/11/2019 Opennlp-servlet

    2/3

    9/3/2014 Opennlp-servlet

    https://gist.github.com/spatzle/1104702 2/3

    75

    76

    77

    78

    79

    80

    81

    82

    83

    84

    85

    86

    8788

    89

    90

    91

    92

    93

    94

    95

    96

    97

    98

    99

    100

    101

    102

    103

    104

    105

    106

    107

    108

    109

    110

    111

    112

    113

    114

    115

    116

    117

    118

    119

    120

    121

    122123

    124

    125

    126

    127

    128

    129

    130

    131

    132

    133

    134

    135

    136

    137

    138

    139

    140

    141

    142

    143

    144

    145

    146

    147

    148

    149

    150

    151

    152

    153

    154

    155

    156

    157158

    159

    160

    161

    162

    163

    }

    }returntokenizer;

    }//end getTokenModel

    // for ner

    privateTokenNameFinder getNameFinder(String classifier_title)throwsServletException{

    String title;

    String location;

    TokenNameFinder nmFinder =null;

    // get default classifier

    title =getServletConfig().getInitParameter(classifier_title);

    if(title ==null||title.trim().equals(""))thrownewServletException("Default classifi

    location =getServletConfig().getInitParameter(title); if(location ==null||location.trim().equals(""))thrownewServletException("Classifier

    InputStream filestream =getServletConfig().getServletContext().getResourceAsStream(locati

    if(filestream ==null)thrownewServletException("File not found. Filename = "+location);

    try{

    nmFinder =newNameFinderME(newTokenNameFinderModel(newBufferedInputStream(filestream))

    }catch(IOException e){

    thrownewServletException("IO problem reading classifier.");

    }finally{

    if(filestream !=null){

    try{filestream.close();}catch(IOException e){}

    }

    }returnnmFinder;

    }//end getTokenNameFinder

    privateDictionaryNameFinder getDictionaryNameFinder(String filetitle)throwsServletException{

    r_dictionary =newDictionary();

    String title;

    String location;

    title =getServletConfig().getInitParameter(filetitle);

    if(title ==null||title.trim().equals(""))thrownewServletException("Default classifi

    location =getServletConfig().getInitParameter(title);

    if(location ==null||location.trim().equals(""))thrownewServletException("Classifier

    InputStream filestream = getServletConfig().getServletContext().getResourceAsStream(locat

    if(filestream ==null)thrownewServletException("File not found. Filename = "+location);

    try{

    BufferedReader br =newBufferedReader(newInputStreamReader(newDataInputStream(f

    String strLine;

    while((strLine =br.readLine())!=null){

    //change to lower case, remove commas, tokenize, remove stop words

    String s[]=removeStopWordsFromSentence(tokenizer.tokenize(strLine.toLowe

    putInDict(r_dictionary,s);

    for(inti =0;i

  • 8/11/2019 Opennlp-servlet

    3/3

    9/3/2014 Opennlp-servlet

    https://gist.github.com/spatzle/1104702 3/3

    164

    165

    166

    167

    168

    169

    170

    171

    172

    173

    174

    175

    176177

    178

    179

    180

    181

    182

    183

    184

    185

    186

    187

    188

    189

    190

    191

    192

    193

    194

    195

    196

    197

    198

    199

    200

    201

    202

    203

    204

    205

    206

    207

    208

    209

    210

    211212

    213

    214

    215

    216

    217

    218

    219

    220

    221

    222

    223

    224

    225

    226

    227

    228

    229

    230

    String sentence =request.getParameter("sentence");

    String tokens[]=removeStopWordsFromSentence(tokenizer.tokenize(sentence.toLowerC

    Span spannames[]=null;

    if(reqMap.containsKey("dict")&&request.getParameter("dict").equals("on")){

    spannames =r_nameFinder.find(tokens);

    }else{

    spannames =f_nameFinder.find(tokens);

    }

    names =Span.spansToStrings(spannames,tokens);

    if(reqMap.containsKey("wt")&&request.getParameter("wt").equals("json")){

    printAsJson(response,pw,names);

    }elseprintAsHtml(response,pw,names);

    }

    }//end doPost

    // private methods

    privatevoidaddToStopWords(){

    for(inti=0;i