office 2003 vb ocr OCR光学字符识别可以把图片上的文字扫描出来,成为文本,今天看到一些资料,随便试一试,效果还可以,我的传真软件生成的TIF文件,上面的汉字和文字基本能够正确辨认。 下次高兴就加上 OCR 功能,不过不知道打包文件会不会很大 [hide] office 共享功能 校对工具 英文 中文简体 光学字符识别模块 microsoft office document imaging 扫描 OCR 和索引服务筛选器 [/hide] Option Explicit www.shengfang.org Private Sub Command1_Click() Dim mm As New www.shengfang.org MODI.Document mm.Create "c:\2.tif" mv.Document = mm 'mv.Refresh mm.OCR miLANG_ENGLISH, True, True mv.SelectAll 0 Debug.Print mv.TextSelection.Text Exit Sub www.shengfang.org 'Dim strtr As String 'Dim i As Integer 'For i = 0 To mm.Images.Count ' Dim img As MODI.Image ' img = mm.Images(i) ' Dim lay As MODI.Layout ' lay = img.Layout ' Dim j As Integer ' For j = 0 To lay.Words.Count ' Dim wrd As MODI.Word ' wrd = lay.Words(j) ' ' Dim k As Integer ' For k = 0 To wrd.Rects.Count ' Dim rct As MODI.MiRect ' ' Next ' Next 'Next ' 'www.shengfang.org ' mv.SelectAll 0 ' MODI.MiRect rect = (MODI.MiRect) word.Rects[k]; ' charactersHeights += rect.Bottom-rect.Top; ' numOfCharacters++; ' ' } ' } ' float avHeight = (float )charactersHeights/numOfCharacters; ' statistic += "Page "+i+ ": Avarage character height is: "+ ' "avHeight.ToString("0.00") +" pixel!"+ "\r\n"; ' } ' End Sub www.shengfang.org http://msdn.microsoft.com/office/downloads/vba/default.aspx http://www.codeproject.com/csharp/tableextractor.asp http://www.codeproject.com/csharp/MODI.asp Reference=*\G{ A5EDEDF4-2BBC-45F3-822B-E60C278A1A79 }#b.0#0#..\..\..\Program Files\Common Files\Microsoft Shared\MODI\11.0\MDIVWCTL.DLL#Microsoft Office Document Imaging 11.0 Type Library Object = "{ A5EDEDF4-2BBC-45F3-822B-E60C278A1A79 }#11.0#0"; "MDIVWCTL.DLL"
字体:大 中 小 |