Class: StanfordCoreNLP::Config

Inherits:

Object

Object
StanfordCoreNLP::Config

show all

Defined in:: lib/stanford-core-nlp/config.rb

Constant Summary collapse

LanguageCodes = A hash of language codes in humanized, 2 and 3-letter ISO639 codes.

{
  :english => [:en, :eng, :english],
  :german => [:de, :ger, :german],
  :french => [:fr, :fre, :french]
}

ModelFolders = Folders inside the JAR path for the models.

{
  :pos => 'taggers/',
  :parse => 'grammar/',
  :ner => 'classifiers/',
  :dcoref => 'dcoref/'
}

TagSets = Tag sets used by Stanford for each language.

{
  :english => :penn,
  :german => :stutgart,
  :french => :paris7
}

Models = Default models for all languages.

{
  
  :pos => {
    :english => 'english-left3words-distsim.tagger',
    :german => 'german-fast.tagger',
    :french  => 'french.tagger'
  },
  
  :parse => {
    :english => 'englishPCFG.ser.gz',
    :german => 'germanPCFG.ser.gz',
    :french  => 'frenchFactored.ser.gz'
  },
  
  :ner => {
    :english => 'english.all.3class.distsim.crf.ser.gz'
    # :german => {} # Add this at some point.
  },
  
  :dcoref => {
    :english => {
      'demonym' => 'demonyms.txt',
      'animate' => 'animate.unigrams.txt',
      'female' => 'female.unigrams.txt',
      'inanimate' => 'inanimate.unigrams.txt',
      'male' => 'male.unigrams.txt',
      'neutral' => 'neutral.unigrams.txt',
      'plural' => 'plural.unigrams.txt',
      'singular' => 'singular.unigrams.txt',
      'states' => 'state-abbreviations.txt',
      'countries' => 'countries', 
      'states.provinces' => 'statesandprovinces',
      'extra.gender' => 'namegender.combine.txt',
      'singleton.predictor' => 'singleton.predictor.ser'
    },
    :german => {},
    :french  => {}
  }
  
  # Models to add.

  #"truecase.model" - path towards the true-casing model; default: StanfordCoreNLPModels/truecase/noUN.ser.gz
  #"truecase.bias" - class bias of the true case model; default: INIT_UPPER:-0.7,UPPER:-0.7,O:0
  #"truecase.mixedcasefile" - path towards the mixed case file; default: StanfordCoreNLPModels/truecase/MixDisambiguation.list
  #"nfl.gazetteer" - path towards the gazetteer for the NFL domain
  #"nfl.relation.model" - path towards the NFL relation extraction model
}

Annotations = List of annotations by JAVA class path.

{

  'nlp.dcoref.CoNLL2011DocumentReader' => [
    'CorefMentionAnnotation',
    'NamedEntityAnnotation'
  ],

  'nlp.ling.CoreAnnotations' => [

    'AbbrAnnotation',
    'AbgeneAnnotation',
    'AbstrAnnotation',
    'AfterAnnotation',
    'AnswerAnnotation',
    'AnswerObjectAnnotation',
    'AntecedentAnnotation',
    'ArgDescendentAnnotation',
    'ArgumentAnnotation',
    'BagOfWordsAnnotation',
    'BeAnnotation',
    'BeforeAnnotation',
    'BeginIndexAnnotation',
    'BestCliquesAnnotation',
    'BestFullAnnotation',
    'CalendarAnnotation',
    'CategoryAnnotation',
    'CategoryFunctionalTagAnnotation',
    'CharacterOffsetBeginAnnotation',
    'CharacterOffsetEndAnnotation',
    'CharAnnotation',
    'ChineseCharAnnotation',
    'ChineseIsSegmentedAnnotation',
    'ChineseOrigSegAnnotation',
    'ChineseSegAnnotation',
    'ChunkAnnotation',
    'CoarseTagAnnotation',
    'CommonWordsAnnotation',
    'CoNLLDepAnnotation',
    'CoNLLDepParentIndexAnnotation',
    'CoNLLDepTypeAnnotation',
    'CoNLLPredicateAnnotation',
    'CoNLLSRLAnnotation',
    'ContextsAnnotation',
    'CopyAnnotation',
    'CostMagnificationAnnotation',
    'CovertIDAnnotation',
    'D2_LBeginAnnotation',
    'D2_LEndAnnotation',
    'D2_LMiddleAnnotation',
    'DayAnnotation',
    'DependentsAnnotation',
    'DictAnnotation',
    'DistSimAnnotation',
    'DoAnnotation',
    'DocDateAnnotation',
    'DocIDAnnotation',
    'DomainAnnotation',
    'EndIndexAnnotation',
    'EntityClassAnnotation',
    'EntityRuleAnnotation',
    'EntityTypeAnnotation',
    'FeaturesAnnotation',
    'FemaleGazAnnotation',
    'FirstChildAnnotation',
    'ForcedSentenceEndAnnotation',
    'FreqAnnotation',
    'GazAnnotation',
    'GazetteerAnnotation',
    'GenericTokensAnnotation',
    'GeniaAnnotation',
    'GoldAnswerAnnotation',
    'GovernorAnnotation',
    'GrandparentAnnotation',
    'HaveAnnotation',
    'HeadWordStringAnnotation',
    'HeightAnnotation',
    'IDAnnotation',
    'IDFAnnotation',
    'INAnnotation',
    'IndexAnnotation',
    'InterpretationAnnotation',
    'IsDateRangeAnnotation',
    'IsURLAnnotation',
    'LabelAnnotation',
    'LastGazAnnotation',
    'LastTaggedAnnotation',
    'LBeginAnnotation',
    'LeftChildrenNodeAnnotation',
    'LeftTermAnnotation',
    'LemmaAnnotation',
    'LEndAnnotation',
    'LengthAnnotation',
    'LMiddleAnnotation',
    'MaleGazAnnotation',
    'MarkingAnnotation',
    'MonthAnnotation',
    'MorphoCaseAnnotation',
    'MorphoGenAnnotation',
    'MorphoNumAnnotation',
    'MorphoPersAnnotation',
    'NamedEntityTagAnnotation',
    'NeighborsAnnotation',
    'NERIDAnnotation',
    'NormalizedNamedEntityTagAnnotation',
    'NotAnnotation',
    'NumericCompositeObjectAnnotation',
    'NumericCompositeTypeAnnotation',
    'NumericCompositeValueAnnotation',
    'NumericObjectAnnotation',
    'NumericTypeAnnotation',
    'NumericValueAnnotation',
    'NumerizedTokensAnnotation',
    'NumTxtSentencesAnnotation',
    'OriginalAnswerAnnotation',
    'OriginalCharAnnotation',
    'OriginalTextAnnotation',
    'ParagraphAnnotation',
    'ParagraphsAnnotation',
    'ParaPositionAnnotation',
    'ParentAnnotation',
    'PartOfSpeechAnnotation',
    'PercentAnnotation',
    'PhraseWordsAnnotation',
    'PhraseWordsTagAnnotation',
    'PolarityAnnotation',
    'PositionAnnotation',
    'PossibleAnswersAnnotation',
    'PredictedAnswerAnnotation',
    'PrevChildAnnotation',
    'PriorAnnotation',
    'ProjectedCategoryAnnotation',
    'ProtoAnnotation',
    'RoleAnnotation',
    'SectionAnnotation',
    'SemanticHeadTagAnnotation',
    'SemanticHeadWordAnnotation',
    'SemanticTagAnnotation',
    'SemanticWordAnnotation',
    'SentenceIDAnnotation',
    'SentenceIndexAnnotation',
    'SentencePositionAnnotation',
    'SentencesAnnotation',
    'ShapeAnnotation',
    'SpaceBeforeAnnotation',
    'SpanAnnotation',
    'SpeakerAnnotation',
    'SRL_ID',
    'SRLIDAnnotation',
    'SRLInstancesAnnotation',
    'StackedNamedEntityTagAnnotation',
    'StateAnnotation',
    'StemAnnotation',
    'SubcategorizationAnnotation',
    'TagLabelAnnotation',
    'TextAnnotation',
    'TokenBeginAnnotation',
    'TokenEndAnnotation',
    'TokensAnnotation',
    'TopicAnnotation',
    'TrueCaseAnnotation',
    'TrueCaseTextAnnotation',
    'TrueTagAnnotation',
    'UBlockAnnotation',
    'UnaryAnnotation',
    'UnknownAnnotation',
    'UtteranceAnnotation',
    'UTypeAnnotation',
    'ValueAnnotation',
    'VerbSenseAnnotation',
    'WebAnnotation',
    'WordFormAnnotation',
    'WordnetSynAnnotation',
    'WordPositionAnnotation',
    'WordSenseAnnotation',
    'XmlContextAnnotation',
    'XmlElementAnnotation',
    'YearAnnotation'
  ],

  'nlp.dcoref.CorefCoreAnnotations' => [

    'CorefAnnotation',
    'CorefChainAnnotation',
    'CorefClusterAnnotation',
    'CorefClusterIdAnnotation',
    'CorefDestAnnotation',
    'CorefGraphAnnotation'
  ],

  'nlp.ling.CoreLabel' => [
    'GenericAnnotation'
  ],

  'nlp.trees.EnglishGrammaticalRelations' => [
    'AbbreviationModifierGRAnnotation',
    'AdjectivalComplementGRAnnotation',
    'AdjectivalModifierGRAnnotation',
    'AdvClauseModifierGRAnnotation',
    'AdverbialModifierGRAnnotation',
    'AgentGRAnnotation',
    'AppositionalModifierGRAnnotation',
    'ArgumentGRAnnotation',
    'AttributiveGRAnnotation',
    'AuxModifierGRAnnotation',
    'AuxPassiveGRAnnotation',
    'ClausalComplementGRAnnotation',
    'ClausalPassiveSubjectGRAnnotation',
    'ClausalSubjectGRAnnotation',
    'ComplementGRAnnotation',
    'ComplementizerGRAnnotation',
    'ConjunctGRAnnotation',
    'ControllingSubjectGRAnnotation',
    'CoordinationGRAnnotation',
    'CopulaGRAnnotation',
    'DeterminerGRAnnotation',
    'DirectObjectGRAnnotation',
    'ExpletiveGRAnnotation',
    'IndirectObjectGRAnnotation',
    'InfinitivalModifierGRAnnotation',
    'MarkerGRAnnotation',
    'ModifierGRAnnotation',
    'MultiWordExpressionGRAnnotation',
    'NegationModifierGRAnnotation',
    'NominalPassiveSubjectGRAnnotation',
    'NominalSubjectGRAnnotation',
    'NounCompoundModifierGRAnnotation',
    'NpAdverbialModifierGRAnnotation',
    'NumberModifierGRAnnotation',
    'NumericModifierGRAnnotation',
    'ObjectGRAnnotation',
    'ParataxisGRAnnotation',
    'ParticipialModifierGRAnnotation',
    'PhrasalVerbParticleGRAnnotation',
    'PossessionModifierGRAnnotation',
    'PossessiveModifierGRAnnotation',
    'PreconjunctGRAnnotation',
    'PredeterminerGRAnnotation',
    'PredicateGRAnnotation',
    'PrepositionalComplementGRAnnotation',
    'PrepositionalModifierGRAnnotation',
    'PrepositionalObjectGRAnnotation',
    'PunctuationGRAnnotation',
    'PurposeClauseModifierGRAnnotation',
    'QuantifierModifierGRAnnotation',
    'ReferentGRAnnotation',
    'RelativeClauseModifierGRAnnotation',
    'RelativeGRAnnotation',
    'SemanticDependentGRAnnotation',
    'SubjectGRAnnotation',
    'TemporalModifierGRAnnotation',
    'XClausalComplementGRAnnotation'
  ],

  'nlp.trees.GrammaticalRelation' => [
    'DependentGRAnnotation',
    'GovernorGRAnnotation',
    'GrammaticalRelationAnnotation',
    'KillGRAnnotation',
    'Language',
    'RootGRAnnotation'
  ],

  'nlp.ie.machinereading.structure.MachineReadingAnnotations' => [
    'DependencyAnnotation',
    'DocumentDirectoryAnnotation',
    'DocumentIdAnnotation',
    'EntityMentionsAnnotation',
    'EventMentionsAnnotation',
    'GenderAnnotation',
    'RelationMentionsAnnotation',
    'TriggerAnnotation'
  ],

  'nlp.parser.lexparser.ParserAnnotations' => [
    'ConstraintAnnotation'
  ],

  'nlp.semgraph.SemanticGraphCoreAnnotations' => [
    'BasicDependenciesAnnotation',
    'CollapsedCCProcessedDependenciesAnnotation',
    'CollapsedDependenciesAnnotation'
  ],

  'nlp.time.TimeAnnotations' => [
    'TimexAnnotation',
    'TimexAnnotations'
  ],

  'nlp.time.TimeExpression' => [
    'Annotation',
    'ChildrenAnnotation',
    'TimeIndexAnnotation'
  ],

  'nlp.trees.TreeCoreAnnotations' => [
    'TreeHeadTagAnnotation',
    'TreeHeadWordAnnotation',
    'TreeAnnotation'
  ]
}

AnnotationsByName = Hash of name => path.

annotations_by_name