Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,5 @@
.project
.settings
target/
data/*
.gitignore
6 changes: 4 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@
<dependency>
<groupId>edu.stanford.nlp</groupId>
<artifactId>stanford-corenlp</artifactId>
<version>3.6.0</version>
<version>3.8.0</version>
<exclusions>
<exclusion>
<artifactId>slf4j-api</artifactId>
Expand All @@ -123,7 +123,7 @@
<dependency>
<groupId>edu.stanford.nlp</groupId>
<artifactId>stanford-corenlp</artifactId>
<version>3.6.0</version>
<version>3.8.0</version>
<classifier>models</classifier>
<scope>runtime</scope>
<exclusions>
Expand Down Expand Up @@ -330,6 +330,8 @@
<artifact>edu.stanford.nlp:stanford-corenlp:jar:models:*</artifact>
<includes>
<include>**/pos-tagger/**/*</include>
<include>**/parser/**/*</include>
<include>**/lexparser/**/*</include>
</includes>
</filter>
</filters>
Expand Down
3 changes: 2 additions & 1 deletion src/main/java/justhalf/nlp/depparser/StanfordDepParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.parser.nndep.DependencyParser;
import edu.stanford.nlp.trees.GrammaticalStructure;
import edu.stanford.nlp.trees.GrammaticalStructure.Extras;
import edu.stanford.nlp.trees.TypedDependency;

/**
Expand Down Expand Up @@ -53,7 +54,7 @@ public boolean isThreadSafe() {
public List<TypedDependency> parse(List<CoreLabel> sentence) {
check(sentence);
GrammaticalStructure structure = dependencyParser.predict(sentence);
return structure.typedDependenciesCCprocessed();
return structure.typedDependencies(Extras.NONE);
}

private void check(List<CoreLabel> sentence){
Expand Down
17 changes: 10 additions & 7 deletions src/main/java/justhalf/nlp/reader/acereader/ACEDocument.java
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ public class ACEDocument implements Serializable{
private static final boolean TEST_STRICT_PARSING = false;
private static final long serialVersionUID = -4698300709681532759L;

public String fileName;
public String text;
public String fullText;
public int offset;
Expand All @@ -103,16 +104,16 @@ public class ACEDocument implements Serializable{
public Map<String, ACEObject> objectsById;
public Map<String, ACEObjectMention<? extends ACEObject>> objectMentionsById;

public ACEDocument(String sgmFilename) throws IOException, SAXException {
this(sgmFilename, false);
public ACEDocument(String fileName, String sgmFilename) throws IOException, SAXException {
this(fileName, sgmFilename, false);
}

public ACEDocument(String sgmFilename, boolean excludeMetadata) throws IOException, SAXException {
this(sgmFilename, sgmFilename.replace(".sgm", ".apf.xml"), excludeMetadata);
public ACEDocument(String fileName, String sgmFilename, boolean excludeMetadata) throws IOException, SAXException {
this(fileName, sgmFilename, sgmFilename.replace(".sgm", ".apf.xml"), excludeMetadata);
}

public ACEDocument(String sgmFilename, String apfFilename, boolean excludeMetadata) throws IOException, SAXException {
this(IOUtils.getInputStreamFromURLOrClasspathOrFileSystem(sgmFilename),
public ACEDocument(String fileName, String sgmFilename, String apfFilename, boolean excludeMetadata) throws IOException, SAXException {
this(fileName, IOUtils.getInputStreamFromURLOrClasspathOrFileSystem(sgmFilename),
IOUtils.getInputStreamFromURLOrClasspathOrFileSystem(apfFilename),
excludeMetadata);
}
Expand All @@ -126,7 +127,7 @@ public ACEDocument(String sgmFilename, String apfFilename, boolean excludeMetada
* @throws IOException
* @throws SAXException
*/
public ACEDocument(InputStream sgmStream, InputStream apfStream, boolean excludeMetadata) throws IOException, SAXException{
public ACEDocument(String fileName, InputStream sgmStream, InputStream apfStream, boolean excludeMetadata) throws IOException, SAXException{
DOMParser parser = new DOMParser();
String sgmText = IOUtils.slurpInputStream(sgmStream, "UTF-8");
sgmText = sgmText.replaceAll("<(/)?BODY>", "<$1BODY_TEXT>");
Expand All @@ -143,6 +144,7 @@ public ACEDocument(InputStream sgmStream, InputStream apfStream, boolean exclude
} else {
this.text = this.fullText;
}
this.fileName = fileName;
this.textInLowercase = this.text.equals(this.text.toLowerCase());
this.offset = fullText.indexOf(text);

Expand Down Expand Up @@ -296,6 +298,7 @@ private ACEEntityMention getMention(Node entityMention, ACEEntity aceEntity){
Span span = getSpan(extentCharseq);
String aceText = extentCharseq.getTextContent();
Node head = ((Element)entityMention).getElementsByTagName("HEAD_EXTENT").item(0);
if (head == null) throw new RuntimeException("No head span?");
Node headCharseq = head == null ? null : ((Element)head).getElementsByTagName("CHARSEQ").item(0);
Span headSpan = headCharseq == null ? null : getSpan(headCharseq);
String aceHeadText = headCharseq == null ? "" : headCharseq.getTextContent();
Expand Down
Loading