Class: Google::Cloud::DiscoveryEngine::V1::DocumentProcessingConfig

Inherits:
Object
  • Object
show all
Extended by:
Protobuf::MessageExts::ClassMethods
Includes:
Protobuf::MessageExts
Defined in:
proto_docs/google/cloud/discoveryengine/v1/document_processing_config.rb

Overview

A singleton resource of DataStore. If it's empty when DataStore is created and DataStore is set to DataStore.ContentConfig.CONTENT_REQUIRED, the default parser will default to digital parser.

Defined Under Namespace

Classes: ChunkingConfig, ParsingConfig, ParsingConfigOverridesEntry

Instance Attribute Summary collapse

Instance Attribute Details

#chunking_config::Google::Cloud::DiscoveryEngine::V1::DocumentProcessingConfig::ChunkingConfig



61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# File 'proto_docs/google/cloud/discoveryengine/v1/document_processing_config.rb', line 61

class DocumentProcessingConfig
  include ::Google::Protobuf::MessageExts
  extend ::Google::Protobuf::MessageExts::ClassMethods

  # Configuration for chunking config.
  # @!attribute [rw] layout_based_chunking_config
  #   @return [::Google::Cloud::DiscoveryEngine::V1::DocumentProcessingConfig::ChunkingConfig::LayoutBasedChunkingConfig]
  #     Configuration for the layout based chunking.
  class ChunkingConfig
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods

    # Configuration for the layout based chunking.
    # @!attribute [rw] chunk_size
    #   @return [::Integer]
    #     The token size limit for each chunk.
    #
    #     Supported values: 100-500 (inclusive).
    #     Default value: 500.
    # @!attribute [rw] include_ancestor_headings
    #   @return [::Boolean]
    #     Whether to include appending different levels of headings to chunks
    #     from the middle of the document to prevent context loss.
    #
    #     Default value: False.
    class LayoutBasedChunkingConfig
      include ::Google::Protobuf::MessageExts
      extend ::Google::Protobuf::MessageExts::ClassMethods
    end
  end

  # Related configurations applied to a specific type of document parser.
  # @!attribute [rw] digital_parsing_config
  #   @return [::Google::Cloud::DiscoveryEngine::V1::DocumentProcessingConfig::ParsingConfig::DigitalParsingConfig]
  #     Configurations applied to digital parser.
  #
  #     Note: The following fields are mutually exclusive: `digital_parsing_config`, `ocr_parsing_config`, `layout_parsing_config`. If a field in that set is populated, all other fields in the set will automatically be cleared.
  # @!attribute [rw] ocr_parsing_config
  #   @return [::Google::Cloud::DiscoveryEngine::V1::DocumentProcessingConfig::ParsingConfig::OcrParsingConfig]
  #     Configurations applied to OCR parser. Currently it only applies to
  #     PDFs.
  #
  #     Note: The following fields are mutually exclusive: `ocr_parsing_config`, `digital_parsing_config`, `layout_parsing_config`. If a field in that set is populated, all other fields in the set will automatically be cleared.
  # @!attribute [rw] layout_parsing_config
  #   @return [::Google::Cloud::DiscoveryEngine::V1::DocumentProcessingConfig::ParsingConfig::LayoutParsingConfig]
  #     Configurations applied to layout parser.
  #
  #     Note: The following fields are mutually exclusive: `layout_parsing_config`, `digital_parsing_config`, `ocr_parsing_config`. If a field in that set is populated, all other fields in the set will automatically be cleared.
  class ParsingConfig
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods

    # The digital parsing configurations for documents.
    class DigitalParsingConfig
      include ::Google::Protobuf::MessageExts
      extend ::Google::Protobuf::MessageExts::ClassMethods
    end

    # The OCR parsing configurations for documents.
    # @!attribute [rw] enhanced_document_elements
    #   @deprecated This field is deprecated and may be removed in the next major version update.
    #   @return [::Array<::String>]
    #     [DEPRECATED] This field is deprecated. To use the additional enhanced
    #     document elements processing, please switch to `layout_parsing_config`.
    # @!attribute [rw] use_native_text
    #   @return [::Boolean]
    #     If true, will use native text instead of OCR text on pages containing
    #     native text.
    class OcrParsingConfig
      include ::Google::Protobuf::MessageExts
      extend ::Google::Protobuf::MessageExts::ClassMethods
    end

    # The layout parsing configurations for documents.
    class LayoutParsingConfig
      include ::Google::Protobuf::MessageExts
      extend ::Google::Protobuf::MessageExts::ClassMethods
    end
  end

  # @!attribute [rw] key
  #   @return [::String]
  # @!attribute [rw] value
  #   @return [::Google::Cloud::DiscoveryEngine::V1::DocumentProcessingConfig::ParsingConfig]
  class ParsingConfigOverridesEntry
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end
end

#default_parsing_config::Google::Cloud::DiscoveryEngine::V1::DocumentProcessingConfig::ParsingConfig



61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# File 'proto_docs/google/cloud/discoveryengine/v1/document_processing_config.rb', line 61

class DocumentProcessingConfig
  include ::Google::Protobuf::MessageExts
  extend ::Google::Protobuf::MessageExts::ClassMethods

  # Configuration for chunking config.
  # @!attribute [rw] layout_based_chunking_config
  #   @return [::Google::Cloud::DiscoveryEngine::V1::DocumentProcessingConfig::ChunkingConfig::LayoutBasedChunkingConfig]
  #     Configuration for the layout based chunking.
  class ChunkingConfig
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods

    # Configuration for the layout based chunking.
    # @!attribute [rw] chunk_size
    #   @return [::Integer]
    #     The token size limit for each chunk.
    #
    #     Supported values: 100-500 (inclusive).
    #     Default value: 500.
    # @!attribute [rw] include_ancestor_headings
    #   @return [::Boolean]
    #     Whether to include appending different levels of headings to chunks
    #     from the middle of the document to prevent context loss.
    #
    #     Default value: False.
    class LayoutBasedChunkingConfig
      include ::Google::Protobuf::MessageExts
      extend ::Google::Protobuf::MessageExts::ClassMethods
    end
  end

  # Related configurations applied to a specific type of document parser.
  # @!attribute [rw] digital_parsing_config
  #   @return [::Google::Cloud::DiscoveryEngine::V1::DocumentProcessingConfig::ParsingConfig::DigitalParsingConfig]
  #     Configurations applied to digital parser.
  #
  #     Note: The following fields are mutually exclusive: `digital_parsing_config`, `ocr_parsing_config`, `layout_parsing_config`. If a field in that set is populated, all other fields in the set will automatically be cleared.
  # @!attribute [rw] ocr_parsing_config
  #   @return [::Google::Cloud::DiscoveryEngine::V1::DocumentProcessingConfig::ParsingConfig::OcrParsingConfig]
  #     Configurations applied to OCR parser. Currently it only applies to
  #     PDFs.
  #
  #     Note: The following fields are mutually exclusive: `ocr_parsing_config`, `digital_parsing_config`, `layout_parsing_config`. If a field in that set is populated, all other fields in the set will automatically be cleared.
  # @!attribute [rw] layout_parsing_config
  #   @return [::Google::Cloud::DiscoveryEngine::V1::DocumentProcessingConfig::ParsingConfig::LayoutParsingConfig]
  #     Configurations applied to layout parser.
  #
  #     Note: The following fields are mutually exclusive: `layout_parsing_config`, `digital_parsing_config`, `ocr_parsing_config`. If a field in that set is populated, all other fields in the set will automatically be cleared.
  class ParsingConfig
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods

    # The digital parsing configurations for documents.
    class DigitalParsingConfig
      include ::Google::Protobuf::MessageExts
      extend ::Google::Protobuf::MessageExts::ClassMethods
    end

    # The OCR parsing configurations for documents.
    # @!attribute [rw] enhanced_document_elements
    #   @deprecated This field is deprecated and may be removed in the next major version update.
    #   @return [::Array<::String>]
    #     [DEPRECATED] This field is deprecated. To use the additional enhanced
    #     document elements processing, please switch to `layout_parsing_config`.
    # @!attribute [rw] use_native_text
    #   @return [::Boolean]
    #     If true, will use native text instead of OCR text on pages containing
    #     native text.
    class OcrParsingConfig
      include ::Google::Protobuf::MessageExts
      extend ::Google::Protobuf::MessageExts::ClassMethods
    end

    # The layout parsing configurations for documents.
    class LayoutParsingConfig
      include ::Google::Protobuf::MessageExts
      extend ::Google::Protobuf::MessageExts::ClassMethods
    end
  end

  # @!attribute [rw] key
  #   @return [::String]
  # @!attribute [rw] value
  #   @return [::Google::Cloud::DiscoveryEngine::V1::DocumentProcessingConfig::ParsingConfig]
  class ParsingConfigOverridesEntry
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end
end

#name::String



61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# File 'proto_docs/google/cloud/discoveryengine/v1/document_processing_config.rb', line 61

class DocumentProcessingConfig
  include ::Google::Protobuf::MessageExts
  extend ::Google::Protobuf::MessageExts::ClassMethods

  # Configuration for chunking config.
  # @!attribute [rw] layout_based_chunking_config
  #   @return [::Google::Cloud::DiscoveryEngine::V1::DocumentProcessingConfig::ChunkingConfig::LayoutBasedChunkingConfig]
  #     Configuration for the layout based chunking.
  class ChunkingConfig
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods

    # Configuration for the layout based chunking.
    # @!attribute [rw] chunk_size
    #   @return [::Integer]
    #     The token size limit for each chunk.
    #
    #     Supported values: 100-500 (inclusive).
    #     Default value: 500.
    # @!attribute [rw] include_ancestor_headings
    #   @return [::Boolean]
    #     Whether to include appending different levels of headings to chunks
    #     from the middle of the document to prevent context loss.
    #
    #     Default value: False.
    class LayoutBasedChunkingConfig
      include ::Google::Protobuf::MessageExts
      extend ::Google::Protobuf::MessageExts::ClassMethods
    end
  end

  # Related configurations applied to a specific type of document parser.
  # @!attribute [rw] digital_parsing_config
  #   @return [::Google::Cloud::DiscoveryEngine::V1::DocumentProcessingConfig::ParsingConfig::DigitalParsingConfig]
  #     Configurations applied to digital parser.
  #
  #     Note: The following fields are mutually exclusive: `digital_parsing_config`, `ocr_parsing_config`, `layout_parsing_config`. If a field in that set is populated, all other fields in the set will automatically be cleared.
  # @!attribute [rw] ocr_parsing_config
  #   @return [::Google::Cloud::DiscoveryEngine::V1::DocumentProcessingConfig::ParsingConfig::OcrParsingConfig]
  #     Configurations applied to OCR parser. Currently it only applies to
  #     PDFs.
  #
  #     Note: The following fields are mutually exclusive: `ocr_parsing_config`, `digital_parsing_config`, `layout_parsing_config`. If a field in that set is populated, all other fields in the set will automatically be cleared.
  # @!attribute [rw] layout_parsing_config
  #   @return [::Google::Cloud::DiscoveryEngine::V1::DocumentProcessingConfig::ParsingConfig::LayoutParsingConfig]
  #     Configurations applied to layout parser.
  #
  #     Note: The following fields are mutually exclusive: `layout_parsing_config`, `digital_parsing_config`, `ocr_parsing_config`. If a field in that set is populated, all other fields in the set will automatically be cleared.
  class ParsingConfig
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods

    # The digital parsing configurations for documents.
    class DigitalParsingConfig
      include ::Google::Protobuf::MessageExts
      extend ::Google::Protobuf::MessageExts::ClassMethods
    end

    # The OCR parsing configurations for documents.
    # @!attribute [rw] enhanced_document_elements
    #   @deprecated This field is deprecated and may be removed in the next major version update.
    #   @return [::Array<::String>]
    #     [DEPRECATED] This field is deprecated. To use the additional enhanced
    #     document elements processing, please switch to `layout_parsing_config`.
    # @!attribute [rw] use_native_text
    #   @return [::Boolean]
    #     If true, will use native text instead of OCR text on pages containing
    #     native text.
    class OcrParsingConfig
      include ::Google::Protobuf::MessageExts
      extend ::Google::Protobuf::MessageExts::ClassMethods
    end

    # The layout parsing configurations for documents.
    class LayoutParsingConfig
      include ::Google::Protobuf::MessageExts
      extend ::Google::Protobuf::MessageExts::ClassMethods
    end
  end

  # @!attribute [rw] key
  #   @return [::String]
  # @!attribute [rw] value
  #   @return [::Google::Cloud::DiscoveryEngine::V1::DocumentProcessingConfig::ParsingConfig]
  class ParsingConfigOverridesEntry
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end
end

#parsing_config_overrides::Google::Protobuf::Map{::String => ::Google::Cloud::DiscoveryEngine::V1::DocumentProcessingConfig::ParsingConfig}



61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# File 'proto_docs/google/cloud/discoveryengine/v1/document_processing_config.rb', line 61

class DocumentProcessingConfig
  include ::Google::Protobuf::MessageExts
  extend ::Google::Protobuf::MessageExts::ClassMethods

  # Configuration for chunking config.
  # @!attribute [rw] layout_based_chunking_config
  #   @return [::Google::Cloud::DiscoveryEngine::V1::DocumentProcessingConfig::ChunkingConfig::LayoutBasedChunkingConfig]
  #     Configuration for the layout based chunking.
  class ChunkingConfig
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods

    # Configuration for the layout based chunking.
    # @!attribute [rw] chunk_size
    #   @return [::Integer]
    #     The token size limit for each chunk.
    #
    #     Supported values: 100-500 (inclusive).
    #     Default value: 500.
    # @!attribute [rw] include_ancestor_headings
    #   @return [::Boolean]
    #     Whether to include appending different levels of headings to chunks
    #     from the middle of the document to prevent context loss.
    #
    #     Default value: False.
    class LayoutBasedChunkingConfig
      include ::Google::Protobuf::MessageExts
      extend ::Google::Protobuf::MessageExts::ClassMethods
    end
  end

  # Related configurations applied to a specific type of document parser.
  # @!attribute [rw] digital_parsing_config
  #   @return [::Google::Cloud::DiscoveryEngine::V1::DocumentProcessingConfig::ParsingConfig::DigitalParsingConfig]
  #     Configurations applied to digital parser.
  #
  #     Note: The following fields are mutually exclusive: `digital_parsing_config`, `ocr_parsing_config`, `layout_parsing_config`. If a field in that set is populated, all other fields in the set will automatically be cleared.
  # @!attribute [rw] ocr_parsing_config
  #   @return [::Google::Cloud::DiscoveryEngine::V1::DocumentProcessingConfig::ParsingConfig::OcrParsingConfig]
  #     Configurations applied to OCR parser. Currently it only applies to
  #     PDFs.
  #
  #     Note: The following fields are mutually exclusive: `ocr_parsing_config`, `digital_parsing_config`, `layout_parsing_config`. If a field in that set is populated, all other fields in the set will automatically be cleared.
  # @!attribute [rw] layout_parsing_config
  #   @return [::Google::Cloud::DiscoveryEngine::V1::DocumentProcessingConfig::ParsingConfig::LayoutParsingConfig]
  #     Configurations applied to layout parser.
  #
  #     Note: The following fields are mutually exclusive: `layout_parsing_config`, `digital_parsing_config`, `ocr_parsing_config`. If a field in that set is populated, all other fields in the set will automatically be cleared.
  class ParsingConfig
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods

    # The digital parsing configurations for documents.
    class DigitalParsingConfig
      include ::Google::Protobuf::MessageExts
      extend ::Google::Protobuf::MessageExts::ClassMethods
    end

    # The OCR parsing configurations for documents.
    # @!attribute [rw] enhanced_document_elements
    #   @deprecated This field is deprecated and may be removed in the next major version update.
    #   @return [::Array<::String>]
    #     [DEPRECATED] This field is deprecated. To use the additional enhanced
    #     document elements processing, please switch to `layout_parsing_config`.
    # @!attribute [rw] use_native_text
    #   @return [::Boolean]
    #     If true, will use native text instead of OCR text on pages containing
    #     native text.
    class OcrParsingConfig
      include ::Google::Protobuf::MessageExts
      extend ::Google::Protobuf::MessageExts::ClassMethods
    end

    # The layout parsing configurations for documents.
    class LayoutParsingConfig
      include ::Google::Protobuf::MessageExts
      extend ::Google::Protobuf::MessageExts::ClassMethods
    end
  end

  # @!attribute [rw] key
  #   @return [::String]
  # @!attribute [rw] value
  #   @return [::Google::Cloud::DiscoveryEngine::V1::DocumentProcessingConfig::ParsingConfig]
  class ParsingConfigOverridesEntry
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end
end