Module: Nlpir
- Defined in:
- lib/nlpir.rb,
lib/nlpir/version.rb
Constant Summary collapse
- NLPIR_FALSE =
0
- NLPIR_TRUE =
1
- POS_MAP_NUMBER =
4
- ICT_POS_MAP_FIRST =
计算所一级标注集
1
- ICT_POS_MAP_SECOND =
计算所二级标注集
0
- PKU_POS_MAP_SECOND =
北大二级标注集
2
- PKU_POS_MAP_FIRST =
北大一级标注集
3
- POS_SIZE =
40
- Result_t =
struct ['int start','int length',"char sPOS[#{POS_SIZE}]",'int iPOS', 'int word_ID','int word_type','int weight']
- GBK_CODE =
默认支持GBK编码
0
- UTF8_CODE =
UTF8编码
GBK_CODE + 1
- BIG5_CODE =
BIG5编码
GBK_CODE + 2
- GBK_FANTI_CODE =
GBK编码,里面包含繁体字
GBK_CODE + 3
- NLPIR_Init_rb =
Fiddle::Function.new( libm['NLPIR_Init'], [Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT], Fiddle::TYPE_INT )
- NLPIR_Exit_rb =
Fiddle::Function.new( libm['NLPIR_Exit'], [], Fiddle::TYPE_INT )
- NLPIR_ImportUserDict_rb =
Fiddle::Function.new( libm['NLPIR_ImportUserDict'], [Fiddle::TYPE_VOIDP], Fiddle::TYPE_INT )
- NLPIR_ParagraphProcess_rb =
Fiddle::Function.new( libm['NLPIR_ParagraphProcess'], [Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT], Fiddle::TYPE_VOIDP )
- NLPIR_ParagraphProcessA_rb =
Fiddle::Function.new( libm['NLPIR_ParagraphProcessA'], [Fiddle::TYPE_VOIDP,Fiddle::TYPE_VOIDP], Fiddle::TYPE_VOIDP )
- NLPIR_FileProcess_rb =
Fiddle::Function.new( libm['NLPIR_FileProcess'], [Fiddle::TYPE_VOIDP,Fiddle::TYPE_VOIDP, Fiddle::TYPE_INT], Fiddle::TYPE_DOUBLE )
- NLPIR_GetParagraphProcessAWordCount_rb =
Fiddle::Function.new( libm['NLPIR_GetParagraphProcessAWordCount'], [Fiddle::TYPE_VOIDP], Fiddle::TYPE_INT )
- NLPIR_ParagraphProcessAW_rb =
Fiddle::Function.new( libm['NLPIR_ParagraphProcessAW'], [Fiddle::TYPE_INT,Fiddle::TYPE_VOIDP], Fiddle::TYPE_INT )
- NLPIR_AddUserWord_rb =
Fiddle::Function.new( libm['NLPIR_AddUserWord'], [Fiddle::TYPE_VOIDP], Fiddle::TYPE_INT )
- NLPIR_SaveTheUsrDic_rb =
Fiddle::Function.new( libm['NLPIR_SaveTheUsrDic'], [], Fiddle::TYPE_INT )
- NLPIR_DelUsrWord_rb =
Fiddle::Function.new( libm['NLPIR_DelUsrWord'], [Fiddle::TYPE_VOIDP], Fiddle::TYPE_INT )
- NLPIR_GetKeyWords_rb =
Fiddle::Function.new( libm['NLPIR_GetKeyWords'], [Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT,Fiddle::TYPE_INT], Fiddle::TYPE_VOIDP )
- NLPIR_GetFileKeyWords_rb =
Fiddle::Function.new( libm['NLPIR_GetFileKeyWords'], [Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT,Fiddle::TYPE_INT], Fiddle::TYPE_VOIDP )
- NLPIR_GetNewWords_rb =
Fiddle::Function.new( libm['NLPIR_GetNewWords'], [Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT,Fiddle::TYPE_INT], Fiddle::TYPE_VOIDP )
- NLPIR_GetFileNewWords_rb =
Fiddle::Function.new( libm['NLPIR_GetFileNewWords'], [Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT,Fiddle::TYPE_INT], Fiddle::TYPE_VOIDP )
- NLPIR_FingerPrint_rb =
Fiddle::Function.new( libm['NLPIR_FingerPrint'], [Fiddle::TYPE_VOIDP], Fiddle::TYPE_LONG )
- NLPIR_SetPOSmap_rb =
Fiddle::Function.new( libm['NLPIR_SetPOSmap'], [Fiddle::TYPE_INT], Fiddle::TYPE_INT )
- NLPIR_NWI_Start_rb =
Fiddle::Function.new( libm['NLPIR_NWI_Start'], [], Fiddle::TYPE_INT )
- NLPIR_NWI_AddFile_rb =
Fiddle::Function.new( libm['NLPIR_NWI_AddFile'], [Fiddle::TYPE_VOIDP], Fiddle::TYPE_INT )
- NLPIR_NWI_AddMem_rb =
Fiddle::Function.new( libm['NLPIR_NWI_AddMem'], [Fiddle::TYPE_VOIDP], Fiddle::TYPE_INT )
- NLPIR_NWI_Complete_rb =
Fiddle::Function.new( libm['NLPIR_NWI_Complete'], [], Fiddle::TYPE_INT )
- NLPIR_NWI_GetResult_rb =
Fiddle::Function.new( libm['NLPIR_NWI_GetResult'], [Fiddle::TYPE_INT], Fiddle::TYPE_VOIDP )
- NLPIR_NWI_Result2UserDict_rb =
Fiddle::Function.new( libm['NLPIR_NWI_Result2UserDict'], [], Fiddle::TYPE_VOIDP )
- VERSION =
"1.1.0"
Instance Method Summary collapse
- #NLPIR_AddUserWord(sWord) ⇒ Object (also: #add_userword)
- #NLPIR_DelUsrWord(sWord) ⇒ Object (also: #del_userword)
- #NLPIR_Exit ⇒ Object (also: #nlpir_exit)
- #NLPIR_FileProcess(sSourceFilename, sResultFilename, bPOStagged = NLPIR_TRUE) ⇒ Object (also: #file_proc)
- #NLPIR_FingerPrint(sLine) ⇒ Object (also: #text_fingerprint)
- #NLPIR_GetFileKeyWords(sTextFile, nMaxKeyLimit = 50, bWeightOut = NLPIR_FALSE) ⇒ Object (also: #file_keywords)
- #NLPIR_GetFileNewWords(sTextFile, nMaxKeyLimit = 50, bWeightOut = NLPIR_FALSE) ⇒ Object (also: #file_newwords)
- #NLPIR_GetKeyWords(sLine, nMaxKeyLimit = 50, bWeightOut = NLPIR_FALSE) ⇒ Object (also: #text_keywords)
- #NLPIR_GetNewWords(sLine, nMaxKeyLimit = 50, bWeightOut = NLPIR_FALSE) ⇒ Object (also: #text_newwords)
- #NLPIR_GetParagraphProcessAWordCount(sParagraph) ⇒ Object (also: #text_wordcount)
- #NLPIR_ImportUserDict(sFilename) ⇒ Object (also: #import_userdict)
-
#NLPIR_Init(sInitDirPath = nil, encoding = UTF8_CODE) ⇒ Object
(also: #nlpir_init)
–函数.
- #NLPIR_NWI_AddFile(sFilename) ⇒ Object (also: #NWI_addfile)
- #NLPIR_NWI_AddMem(sFilename) ⇒ Object (also: #NWI_addmem)
- #NLPIR_NWI_Complete ⇒ Object (also: #NWI_complete)
- #NLPIR_NWI_GetResult(bWeightOut = NLPIR_FALSE) ⇒ Object (also: #NWI_result)
- #NLPIR_NWI_Result2UserDict ⇒ Object (also: #NWI_result2userdict)
- #NLPIR_NWI_Start ⇒ Object (also: #NWI_start)
- #NLPIR_ParagraphProcess(sParagraph, bPOStagged = NLPIR_TRUE) ⇒ Object (also: #text_proc)
- #NLPIR_ParagraphProcessA(sParagraph) ⇒ Object (also: #text_procA)
- #NLPIR_ParagraphProcessAW(sParagraph) ⇒ Object (also: #text_procAW)
- #NLPIR_SaveTheUsrDic ⇒ Object (also: #save_userdict)
- #NLPIR_SetPOSmap(nPOSmap) ⇒ Object (also: #setPOSmap)
Instance Method Details
#NLPIR_AddUserWord(sWord) ⇒ Object Also known as: add_userword
222 223 224 |
# File 'lib/nlpir.rb', line 222 def NLPIR_AddUserWord(sWord) NLPIR_AddUserWord_rb.call(sWord) end |
#NLPIR_DelUsrWord(sWord) ⇒ Object Also known as: del_userword
232 233 234 |
# File 'lib/nlpir.rb', line 232 def NLPIR_DelUsrWord(sWord) NLPIR_DelUsrWord_rb.call(sWord) end |
#NLPIR_Exit ⇒ Object Also known as: nlpir_exit
167 168 169 |
# File 'lib/nlpir.rb', line 167 def NLPIR_Exit() NLPIR_Exit_rb.call() end |
#NLPIR_FileProcess(sSourceFilename, sResultFilename, bPOStagged = NLPIR_TRUE) ⇒ Object Also known as: file_proc
201 202 203 |
# File 'lib/nlpir.rb', line 201 def NLPIR_FileProcess(sSourceFilename, sResultFilename, bPOStagged=NLPIR_TRUE) NLPIR_FileProcess_rb.call(sSourceFilename, sResultFilename, bPOStagged) end |
#NLPIR_FingerPrint(sLine) ⇒ Object Also known as: text_fingerprint
259 260 261 |
# File 'lib/nlpir.rb', line 259 def NLPIR_FingerPrint(sLine) NLPIR_FingerPrint_rb.call(sLine) end |
#NLPIR_GetFileKeyWords(sTextFile, nMaxKeyLimit = 50, bWeightOut = NLPIR_FALSE) ⇒ Object Also known as: file_keywords
242 243 244 245 246 |
# File 'lib/nlpir.rb', line 242 def NLPIR_GetFileKeyWords(sTextFile, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE) line = NLPIR_GetFileKeyWords_rb.call(sTextFile, nMaxKeyLimit, bWeightOut).to_s line.force_encoding('gbk') line.encode!(@charset) end |
#NLPIR_GetFileNewWords(sTextFile, nMaxKeyLimit = 50, bWeightOut = NLPIR_FALSE) ⇒ Object Also known as: file_newwords
254 255 256 |
# File 'lib/nlpir.rb', line 254 def NLPIR_GetFileNewWords(sTextFile, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE) NLPIR_GetFileNewWords_rb.call(sTextFile, nMaxKeyLimit, bWeightOut).to_s.force_encoding(@charset) end |
#NLPIR_GetKeyWords(sLine, nMaxKeyLimit = 50, bWeightOut = NLPIR_FALSE) ⇒ Object Also known as: text_keywords
237 238 239 |
# File 'lib/nlpir.rb', line 237 def NLPIR_GetKeyWords(sLine, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE) NLPIR_GetKeyWords_rb.call(sLine, nMaxKeyLimit, bWeightOut).to_s.force_encoding(@charset) end |
#NLPIR_GetNewWords(sLine, nMaxKeyLimit = 50, bWeightOut = NLPIR_FALSE) ⇒ Object Also known as: text_newwords
249 250 251 |
# File 'lib/nlpir.rb', line 249 def NLPIR_GetNewWords(sLine, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE) NLPIR_GetNewWords_rb.call(sLine, nMaxKeyLimit, bWeightOut).to_s.force_encoding(@charset) end |
#NLPIR_GetParagraphProcessAWordCount(sParagraph) ⇒ Object Also known as: text_wordcount
196 197 198 |
# File 'lib/nlpir.rb', line 196 def NLPIR_GetParagraphProcessAWordCount(sParagraph) NLPIR_GetParagraphProcessAWordCount_rb.call(sParagraph) end |
#NLPIR_ImportUserDict(sFilename) ⇒ Object Also known as: import_userdict
172 173 174 |
# File 'lib/nlpir.rb', line 172 def NLPIR_ImportUserDict(sFilename) NLPIR_ImportUserDict_rb.call(sFilename) end |
#NLPIR_Init(sInitDirPath = nil, encoding = UTF8_CODE) ⇒ Object Also known as: nlpir_init
–函数
152 153 154 155 156 157 158 159 160 161 162 163 164 |
# File 'lib/nlpir.rb', line 152 def NLPIR_Init(sInitDirPath=nil , encoding=UTF8_CODE) sInitDirPath += "/Data/" if File.exist?(sInitDirPath)==false FileUtils.mkdir(sInitDirPath) filemother = File.("../Data/", __FILE__) FileUtils.copy_entry filemother,sInitDirPath end @charset = 'gbk' if encoding == GBK_CODE @charset = 'utf-8' if encoding == UTF8_CODE @charset = 'big5' if encoding == BIG5_CODE @charset = 'gbk' if encoding == GBK_FANTI_CODE NLPIR_Init_rb.call(nil,encoding) end |
#NLPIR_NWI_AddFile(sFilename) ⇒ Object Also known as: NWI_addfile
274 275 276 |
# File 'lib/nlpir.rb', line 274 def NLPIR_NWI_AddFile(sFilename) NLPIR_NWI_AddFile_rb.call(sFilename) end |
#NLPIR_NWI_AddMem(sFilename) ⇒ Object Also known as: NWI_addmem
279 280 281 |
# File 'lib/nlpir.rb', line 279 def NLPIR_NWI_AddMem(sFilename) NLPIR_NWI_AddMem_rb.call(sFilename) end |
#NLPIR_NWI_Complete ⇒ Object Also known as: NWI_complete
284 285 286 |
# File 'lib/nlpir.rb', line 284 def NLPIR_NWI_Complete() NLPIR_NWI_Complete_rb.call() end |
#NLPIR_NWI_GetResult(bWeightOut = NLPIR_FALSE) ⇒ Object Also known as: NWI_result
289 290 291 |
# File 'lib/nlpir.rb', line 289 def NLPIR_NWI_GetResult( bWeightOut = NLPIR_FALSE) NLPIR_NWI_GetResult_rb.call(bWeightOut) end |
#NLPIR_NWI_Result2UserDict ⇒ Object Also known as: NWI_result2userdict
294 295 296 |
# File 'lib/nlpir.rb', line 294 def NLPIR_NWI_Result2UserDict() NLPIR_NWI_Result2UserDict_rb.call() end |
#NLPIR_NWI_Start ⇒ Object Also known as: NWI_start
269 270 271 |
# File 'lib/nlpir.rb', line 269 def NLPIR_NWI_Start() NLPIR_NWI_Start_rb.call() end |
#NLPIR_ParagraphProcess(sParagraph, bPOStagged = NLPIR_TRUE) ⇒ Object Also known as: text_proc
177 178 179 |
# File 'lib/nlpir.rb', line 177 def NLPIR_ParagraphProcess(sParagraph, bPOStagged=NLPIR_TRUE) NLPIR_ParagraphProcess_rb.call(sParagraph, bPOStagged).to_s.force_encoding(@charset) end |
#NLPIR_ParagraphProcessA(sParagraph) ⇒ Object Also known as: text_procA
182 183 184 185 186 187 188 189 190 191 192 193 |
# File 'lib/nlpir.rb', line 182 def NLPIR_ParagraphProcessA(sParagraph) resultCount = NLPIR_GetParagraphProcessAWordCount(sParagraph) pResultCount = Fiddle::Pointer.to_ptr(resultCount) p = NLPIR_ParagraphProcessA_rb.call(sParagraph, pResultCount.ref.to_i) pVecResult = Fiddle::Pointer.new(p.to_i) words_list = [] words_list << Result_t.new(pVecResult) for i in 1...resultCount do words_list << Result_t.new(pVecResult += Result_t.size) end return words_list end |
#NLPIR_ParagraphProcessAW(sParagraph) ⇒ Object Also known as: text_procAW
207 208 209 210 211 212 213 214 215 216 217 218 |
# File 'lib/nlpir.rb', line 207 def NLPIR_ParagraphProcessAW(sParagraph) free = Fiddle::Function.new(Fiddle::RUBY_FREE, [TYPE_VOIDP], TYPE_VOID) resultCount = NLPIR_GetParagraphProcessAWordCount(sParagraph) pVecResult = Pointer.malloc(Result_t.size*resultCount,free) NLPIR_ParagraphProcessAW_rb.call(resultCount,pVecResult) words_list = [] words_list << Result_t.new(pVecResult) for i in 1...resultCount do words_list << Result_t.new(pVecResult+=Result_t.size) end return words_list end |
#NLPIR_SaveTheUsrDic ⇒ Object Also known as: save_userdict
227 228 229 |
# File 'lib/nlpir.rb', line 227 def NLPIR_SaveTheUsrDic() NLPIR_SaveTheUsrDic_rb.call() end |
#NLPIR_SetPOSmap(nPOSmap) ⇒ Object Also known as: setPOSmap
264 265 266 |
# File 'lib/nlpir.rb', line 264 def NLPIR_SetPOSmap(nPOSmap) NLPIR_SetPOSmap_rb.call(nPOSmap) end |