Module: Nlpir

Defined in:
lib/nlpir.rb,
lib/nlpir/version.rb

Constant Summary collapse

NLPIR_FALSE =
0
NLPIR_TRUE =
1
POS_MAP_NUMBER =
4
ICT_POS_MAP_FIRST =

计算所一级标注集

1
ICT_POS_MAP_SECOND =

计算所二级标注集

0
PKU_POS_MAP_SECOND =

北大二级标注集

2
PKU_POS_MAP_FIRST =

北大一级标注集

3
POS_SIZE =
40
Result_t =
struct ['int start','int length',"char  sPOS[#{POS_SIZE}]",'int iPOS',
'int word_ID','int word_type','int weight']
GBK_CODE =

默认支持GBK编码

0
UTF8_CODE =

UTF8编码

GBK_CODE + 1
BIG5_CODE =

BIG5编码

GBK_CODE + 2
GBK_FANTI_CODE =

GBK编码,里面包含繁体字

GBK_CODE + 3
NLPIR_Init_rb =
Fiddle::Function.new(
  libm['NLPIR_Init'],
  [Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT],
  Fiddle::TYPE_INT
)
NLPIR_Exit_rb =
Fiddle::Function.new(
  libm['NLPIR_Exit'],
  [],
  Fiddle::TYPE_INT
)
NLPIR_ImportUserDict_rb =
Fiddle::Function.new(
  libm['NLPIR_ImportUserDict'],
  [Fiddle::TYPE_VOIDP],
  Fiddle::TYPE_INT
)
NLPIR_ParagraphProcess_rb =
Fiddle::Function.new(
  libm['NLPIR_ParagraphProcess'],
  [Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT],
  Fiddle::TYPE_VOIDP
)
NLPIR_ParagraphProcessA_rb =
Fiddle::Function.new(
  libm['NLPIR_ParagraphProcessA'],
  [Fiddle::TYPE_VOIDP,Fiddle::TYPE_VOIDP],
  Fiddle::TYPE_VOIDP
)
NLPIR_FileProcess_rb =
Fiddle::Function.new(
  libm['NLPIR_FileProcess'],
  [Fiddle::TYPE_VOIDP,Fiddle::TYPE_VOIDP, Fiddle::TYPE_INT],
  Fiddle::TYPE_DOUBLE
)
NLPIR_GetParagraphProcessAWordCount_rb =
Fiddle::Function.new(
  libm['NLPIR_GetParagraphProcessAWordCount'],
  [Fiddle::TYPE_VOIDP],
  Fiddle::TYPE_INT
)
NLPIR_ParagraphProcessAW_rb =
Fiddle::Function.new(
  libm['NLPIR_ParagraphProcessAW'],
  [Fiddle::TYPE_INT,Fiddle::TYPE_VOIDP],
  Fiddle::TYPE_INT
)
NLPIR_AddUserWord_rb =
Fiddle::Function.new(
  libm['NLPIR_AddUserWord'],
  [Fiddle::TYPE_VOIDP],
  Fiddle::TYPE_INT
)
NLPIR_SaveTheUsrDic_rb =
Fiddle::Function.new(
  libm['NLPIR_SaveTheUsrDic'],
  [],
  Fiddle::TYPE_INT
)
NLPIR_DelUsrWord_rb =
Fiddle::Function.new(
  libm['NLPIR_DelUsrWord'],
  [Fiddle::TYPE_VOIDP],
  Fiddle::TYPE_INT
)
NLPIR_GetKeyWords_rb =
Fiddle::Function.new(
  libm['NLPIR_GetKeyWords'],
  [Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT,Fiddle::TYPE_INT],
  Fiddle::TYPE_VOIDP
)
NLPIR_GetFileKeyWords_rb =
Fiddle::Function.new(
  libm['NLPIR_GetFileKeyWords'],
  [Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT,Fiddle::TYPE_INT],
  Fiddle::TYPE_VOIDP
)
NLPIR_GetNewWords_rb =
Fiddle::Function.new(
  libm['NLPIR_GetNewWords'],
  [Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT,Fiddle::TYPE_INT],
  Fiddle::TYPE_VOIDP
)
NLPIR_GetFileNewWords_rb =
Fiddle::Function.new(
  libm['NLPIR_GetFileNewWords'],
  [Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT,Fiddle::TYPE_INT],
  Fiddle::TYPE_VOIDP
)
NLPIR_FingerPrint_rb =
Fiddle::Function.new(
  libm['NLPIR_FingerPrint'],
  [Fiddle::TYPE_VOIDP],
  Fiddle::TYPE_LONG
)
NLPIR_SetPOSmap_rb =
Fiddle::Function.new(
  libm['NLPIR_SetPOSmap'],
  [Fiddle::TYPE_INT],
  Fiddle::TYPE_INT
)
NLPIR_NWI_Start_rb =
Fiddle::Function.new(
  libm['NLPIR_NWI_Start'],
  [],
  Fiddle::TYPE_INT
)
NLPIR_NWI_AddFile_rb =
Fiddle::Function.new(
  libm['NLPIR_NWI_AddFile'],
  [Fiddle::TYPE_VOIDP],
  Fiddle::TYPE_INT
)
NLPIR_NWI_AddMem_rb =
Fiddle::Function.new(
  libm['NLPIR_NWI_AddMem'],
  [Fiddle::TYPE_VOIDP],
  Fiddle::TYPE_INT
)
NLPIR_NWI_Complete_rb =
Fiddle::Function.new(
  libm['NLPIR_NWI_Complete'],
  [],
  Fiddle::TYPE_INT
)
NLPIR_NWI_GetResult_rb =
Fiddle::Function.new(
  libm['NLPIR_NWI_GetResult'],
  [Fiddle::TYPE_INT],
  Fiddle::TYPE_VOIDP
)
NLPIR_NWI_Result2UserDict_rb =
Fiddle::Function.new(
  libm['NLPIR_NWI_Result2UserDict'],
  [],
  Fiddle::TYPE_VOIDP
)
VERSION =
"1.1.0"

Instance Method Summary collapse

Instance Method Details

#NLPIR_AddUserWord(sWord) ⇒ Object Also known as: add_userword



222
223
224
# File 'lib/nlpir.rb', line 222

def NLPIR_AddUserWord(sWord)
  NLPIR_AddUserWord_rb.call(sWord)
end

#NLPIR_DelUsrWord(sWord) ⇒ Object Also known as: del_userword



232
233
234
# File 'lib/nlpir.rb', line 232

def NLPIR_DelUsrWord(sWord)
  NLPIR_DelUsrWord_rb.call(sWord)
end

#NLPIR_ExitObject Also known as: nlpir_exit



167
168
169
# File 'lib/nlpir.rb', line 167

def NLPIR_Exit()
  NLPIR_Exit_rb.call()
end

#NLPIR_FileProcess(sSourceFilename, sResultFilename, bPOStagged = NLPIR_TRUE) ⇒ Object Also known as: file_proc



201
202
203
# File 'lib/nlpir.rb', line 201

def NLPIR_FileProcess(sSourceFilename, sResultFilename, bPOStagged=NLPIR_TRUE)
  NLPIR_FileProcess_rb.call(sSourceFilename, sResultFilename, bPOStagged)
end

#NLPIR_FingerPrint(sLine) ⇒ Object Also known as: text_fingerprint



259
260
261
# File 'lib/nlpir.rb', line 259

def NLPIR_FingerPrint(sLine)
  NLPIR_FingerPrint_rb.call(sLine)
end

#NLPIR_GetFileKeyWords(sTextFile, nMaxKeyLimit = 50, bWeightOut = NLPIR_FALSE) ⇒ Object Also known as: file_keywords



242
243
244
245
246
# File 'lib/nlpir.rb', line 242

def NLPIR_GetFileKeyWords(sTextFile, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
  line = NLPIR_GetFileKeyWords_rb.call(sTextFile, nMaxKeyLimit, bWeightOut).to_s
  line.force_encoding('gbk')
  line.encode!(@charset)
end

#NLPIR_GetFileNewWords(sTextFile, nMaxKeyLimit = 50, bWeightOut = NLPIR_FALSE) ⇒ Object Also known as: file_newwords



254
255
256
# File 'lib/nlpir.rb', line 254

def NLPIR_GetFileNewWords(sTextFile, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
  NLPIR_GetFileNewWords_rb.call(sTextFile, nMaxKeyLimit, bWeightOut).to_s.force_encoding(@charset)
end

#NLPIR_GetKeyWords(sLine, nMaxKeyLimit = 50, bWeightOut = NLPIR_FALSE) ⇒ Object Also known as: text_keywords



237
238
239
# File 'lib/nlpir.rb', line 237

def NLPIR_GetKeyWords(sLine, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
  NLPIR_GetKeyWords_rb.call(sLine, nMaxKeyLimit, bWeightOut).to_s.force_encoding(@charset)
end

#NLPIR_GetNewWords(sLine, nMaxKeyLimit = 50, bWeightOut = NLPIR_FALSE) ⇒ Object Also known as: text_newwords



249
250
251
# File 'lib/nlpir.rb', line 249

def NLPIR_GetNewWords(sLine, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
  NLPIR_GetNewWords_rb.call(sLine, nMaxKeyLimit, bWeightOut).to_s.force_encoding(@charset)
end

#NLPIR_GetParagraphProcessAWordCount(sParagraph) ⇒ Object Also known as: text_wordcount



196
197
198
# File 'lib/nlpir.rb', line 196

def NLPIR_GetParagraphProcessAWordCount(sParagraph)
  NLPIR_GetParagraphProcessAWordCount_rb.call(sParagraph)
end

#NLPIR_ImportUserDict(sFilename) ⇒ Object Also known as: import_userdict



172
173
174
# File 'lib/nlpir.rb', line 172

def NLPIR_ImportUserDict(sFilename)
  NLPIR_ImportUserDict_rb.call(sFilename)
end

#NLPIR_Init(sInitDirPath = nil, encoding = UTF8_CODE) ⇒ Object Also known as: nlpir_init

–函数



152
153
154
155
156
157
158
159
160
161
162
163
164
# File 'lib/nlpir.rb', line 152

def NLPIR_Init(sInitDirPath=nil , encoding=UTF8_CODE)
  sInitDirPath += "/Data/"
  if File.exist?(sInitDirPath)==false
    FileUtils.mkdir(sInitDirPath)
    filemother = File.expand_path("../Data/", __FILE__)
    FileUtils.copy_entry filemother,sInitDirPath
  end          
  @charset = 'gbk' if encoding == GBK_CODE
  @charset = 'utf-8' if encoding == UTF8_CODE
  @charset = 'big5' if  encoding == BIG5_CODE
  @charset = 'gbk' if encoding == GBK_FANTI_CODE
  NLPIR_Init_rb.call(nil,encoding)
end

#NLPIR_NWI_AddFile(sFilename) ⇒ Object Also known as: NWI_addfile



274
275
276
# File 'lib/nlpir.rb', line 274

def NLPIR_NWI_AddFile(sFilename)
  NLPIR_NWI_AddFile_rb.call(sFilename)
end

#NLPIR_NWI_AddMem(sFilename) ⇒ Object Also known as: NWI_addmem



279
280
281
# File 'lib/nlpir.rb', line 279

def NLPIR_NWI_AddMem(sFilename)
  NLPIR_NWI_AddMem_rb.call(sFilename)
end

#NLPIR_NWI_CompleteObject Also known as: NWI_complete



284
285
286
# File 'lib/nlpir.rb', line 284

def NLPIR_NWI_Complete()
  NLPIR_NWI_Complete_rb.call()
end

#NLPIR_NWI_GetResult(bWeightOut = NLPIR_FALSE) ⇒ Object Also known as: NWI_result



289
290
291
# File 'lib/nlpir.rb', line 289

def NLPIR_NWI_GetResult( bWeightOut = NLPIR_FALSE)
  NLPIR_NWI_GetResult_rb.call(bWeightOut)
end

#NLPIR_NWI_Result2UserDictObject Also known as: NWI_result2userdict



294
295
296
# File 'lib/nlpir.rb', line 294

def NLPIR_NWI_Result2UserDict()
  NLPIR_NWI_Result2UserDict_rb.call()
end

#NLPIR_NWI_StartObject Also known as: NWI_start



269
270
271
# File 'lib/nlpir.rb', line 269

def NLPIR_NWI_Start()
  NLPIR_NWI_Start_rb.call()
end

#NLPIR_ParagraphProcess(sParagraph, bPOStagged = NLPIR_TRUE) ⇒ Object Also known as: text_proc



177
178
179
# File 'lib/nlpir.rb', line 177

def NLPIR_ParagraphProcess(sParagraph, bPOStagged=NLPIR_TRUE)
  NLPIR_ParagraphProcess_rb.call(sParagraph, bPOStagged).to_s.force_encoding(@charset)
end

#NLPIR_ParagraphProcessA(sParagraph) ⇒ Object Also known as: text_procA



182
183
184
185
186
187
188
189
190
191
192
193
# File 'lib/nlpir.rb', line 182

def NLPIR_ParagraphProcessA(sParagraph)
  resultCount = NLPIR_GetParagraphProcessAWordCount(sParagraph)
  pResultCount = Fiddle::Pointer.to_ptr(resultCount)
  p = NLPIR_ParagraphProcessA_rb.call(sParagraph, pResultCount.ref.to_i)
  pVecResult = Fiddle::Pointer.new(p.to_i)
  words_list = []
  words_list << Result_t.new(pVecResult)
  for i in 1...resultCount  do
      words_list << Result_t.new(pVecResult += Result_t.size)
  end
  return words_list
end

#NLPIR_ParagraphProcessAW(sParagraph) ⇒ Object Also known as: text_procAW



207
208
209
210
211
212
213
214
215
216
217
218
# File 'lib/nlpir.rb', line 207

def NLPIR_ParagraphProcessAW(sParagraph)
  free = Fiddle::Function.new(Fiddle::RUBY_FREE, [TYPE_VOIDP], TYPE_VOID)
  resultCount = NLPIR_GetParagraphProcessAWordCount(sParagraph)
  pVecResult = Pointer.malloc(Result_t.size*resultCount,free)
  NLPIR_ParagraphProcessAW_rb.call(resultCount,pVecResult)
  words_list = []
  words_list << Result_t.new(pVecResult)
  for i in 1...resultCount do
      words_list << Result_t.new(pVecResult+=Result_t.size)
  end
  return words_list
end

#NLPIR_SaveTheUsrDicObject Also known as: save_userdict



227
228
229
# File 'lib/nlpir.rb', line 227

def NLPIR_SaveTheUsrDic()
  NLPIR_SaveTheUsrDic_rb.call()
end

#NLPIR_SetPOSmap(nPOSmap) ⇒ Object Also known as: setPOSmap



264
265
266
# File 'lib/nlpir.rb', line 264

def NLPIR_SetPOSmap(nPOSmap)
  NLPIR_SetPOSmap_rb.call(nPOSmap)
end