-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathdatasets_documents.go
436 lines (364 loc) · 14.2 KB
/
datasets_documents.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
package coze
import (
"context"
"encoding/base64"
"net/http"
)
func (r *datasetsDocuments) Create(ctx context.Context, req *CreateDatasetsDocumentsReq) (*CreateDatasetsDocumentsResp, error) {
method := http.MethodPost
uri := "/open_api/knowledge/document/create"
resp := &createDatasetsDocumentsResp{}
err := r.client.Request(ctx, method, uri, req, resp, r.commonHeaderOpt...)
if err != nil {
return nil, err
}
resp.CreateDatasetsDocumentsResp.setHTTPResponse(resp.HTTPResponse)
return resp.CreateDatasetsDocumentsResp, nil
}
func (r *datasetsDocuments) Update(ctx context.Context, req *UpdateDatasetsDocumentsReq) (*UpdateDatasetsDocumentsResp, error) {
method := http.MethodPost
uri := "/open_api/knowledge/document/update"
resp := &updateDatasetsDocumentsResp{}
err := r.client.Request(ctx, method, uri, req, resp, r.commonHeaderOpt...)
if err != nil {
return nil, err
}
result := &UpdateDatasetsDocumentsResp{}
result.setHTTPResponse(resp.HTTPResponse)
return result, nil
}
func (r *datasetsDocuments) Delete(ctx context.Context, req *DeleteDatasetsDocumentsReq) (*DeleteDatasetsDocumentsResp, error) {
method := http.MethodPost
uri := "/open_api/knowledge/document/delete"
resp := &deleteDatasetsDocumentsResp{}
err := r.client.Request(ctx, method, uri, req, resp, r.commonHeaderOpt...)
if err != nil {
return nil, err
}
result := &DeleteDatasetsDocumentsResp{}
result.setHTTPResponse(resp.HTTPResponse)
return result, nil
}
func (r *datasetsDocuments) List(ctx context.Context, req *ListDatasetsDocumentsReq) (NumberPaged[Document], error) {
if req.Page == 0 {
req.Page = 1
}
if req.Size == 0 {
req.Size = 20
}
return NewNumberPaged[Document](
func(request *pageRequest) (*pageResponse[Document], error) {
uri := "/open_api/knowledge/document/list"
resp := &listDatasetsDocumentsResp{}
doReq := &ListDatasetsDocumentsReq{
DatasetID: req.DatasetID,
Size: request.PageSize,
Page: request.PageNum,
}
err := r.client.Request(ctx, http.MethodPost, uri, doReq, resp, r.commonHeaderOpt...)
if err != nil {
return nil, err
}
return &pageResponse[Document]{
Total: int(resp.Total),
HasMore: request.PageSize <= len(resp.DocumentInfos),
Data: resp.DocumentInfos,
LogID: resp.HTTPResponse.LogID(),
}, nil
}, req.Size, req.Page)
}
type datasetsDocuments struct {
client *core
commonHeaderOpt []RequestOption
}
func newDatasetsDocuments(core *core) *datasetsDocuments {
return &datasetsDocuments{client: core, commonHeaderOpt: []RequestOption{
withHTTPHeader("Agw-Js-Conv", "str"),
}}
}
// Document represents a document in the datasets
type Document struct {
// The ID of the file.
DocumentID string `json:"document_id"`
// The total character count of the file content.
CharCount int `json:"char_count"`
// The chunking rules. For detailed instructions, refer to the ChunkStrategy object.
ChunkStrategy *DocumentChunkStrategy `json:"chunk_strategy"`
// The upload time of the file, in the format of a 10-digit Unix timestamp.
CreateTime int `json:"create_time"`
// The last modified time of the file, in the format of a 10-digit Unix timestamp.
UpdateTime int `json:"update_time"`
// The type of file format. Values include:
// 0: Document type, such as txt, pdf, online web pages, etc.
// 1: Spreadsheet type, such as xls spreadsheets, etc.
// 2: Images type, such as png images, etc.
FormatType DocumentFormatType `json:"format_type"`
// The number of times the file has been hit in conversations.
HitCount int `json:"hit_count"`
// The name of the file.
Name string `json:"name"`
// The size of the file in bytes.
Size int `json:"size"`
// The number of slices the file has been divided into.
SliceCount int `json:"slice_count"`
// The method of uploading the file. Values include:
// 0: Upload local files.
// 1: Upload online web pages.
SourceType DocumentSourceType `json:"source_type"`
// The processing status of the file. Values include:
// 0: Processing
// 1: Completed
// 9: Processing failed, it is recommended to re-upload
Status DocumentStatus `json:"status"`
// The format of the local file, i.e., the file extension, such as "txt".
// Supported formats include PDF, TXT, DOC, DOCX.
Type string `json:"type"`
// The frequency of automatic updates for online web pages, in hours.
UpdateInterval int `json:"update_interval"`
// Whether the online web page is automatically updated. Values include:
// 0: Do not automatically update
// 1: Automatically update
UpdateType DocumentUpdateType `json:"update_type"`
}
// DocumentBase represents base information for creating a document
type DocumentBase struct {
// The name of the file.
Name string `json:"name"`
// The metadata information of the file.
SourceInfo *DocumentSourceInfo `json:"source_info"`
// The update strategy for online web pages. Defaults to no automatic update.
UpdateRule *DocumentUpdateRule `json:"update_rule,omitempty"`
}
// DocumentChunkStrategy represents chunking strategy for datasetsDocuments
type DocumentChunkStrategy struct {
// The chunking settings. Values include:
// 0: Automatic chunking and cleaning. Uses preset rules for data chunking and processing.
// 1: Custom. In this case, details need to be specified through separator, max_tokens,
// remove_extra_spaces, and remove_urls_emails.
ChunkType int `json:"chunk_type"`
// Maximum chunk length, ranging from 100 to 2000.
// Required when chunk_type=1.
MaxTokens int `json:"max_tokens,omitempty"`
// Whether to automatically filter consecutive spaces, line breaks, and tabs.
// Values include:
// true: Automatically filter
// false: (Default) Do not automatically filter
// Takes effect when chunk_type=1.
RemoveExtraSpaces bool `json:"remove_extra_spaces,omitempty"`
// Whether to automatically filter all URLs and email addresses.
// Values include:
// true: Automatically filter
// false: (Default) Do not automatically filter
// Takes effect when chunk_type=1.
RemoveUrlsEmails bool `json:"remove_urls_emails,omitempty"`
// The chunk identifier.
// Required when chunk_type=1.
Separator string `json:"separator,omitempty"`
}
// DocumentSourceInfo represents source information for a document
type DocumentSourceInfo struct {
// Base64 encoding of the local file.
// Required when uploading local files.
FileBase64 *string `json:"file_base64,omitempty"`
// The format of the local file, i.e., the file extension, such as "txt".
// Supported formats include PDF, TXT, DOC, DOCX.
// The uploaded file type should match the knowledge base type.
// Required when uploading local files.
FileType *string `json:"file_type,omitempty"`
// The URL of the webpage.
// Required when uploading webpages.
WebUrl *string `json:"web_url,omitempty"`
// The upload method of the file.
// 1 to indicate uploading online webpages.
// 5 to indicate uploading fileID.
// Required when uploading online webpages.
DocumentSource *int `json:"document_source,omitempty"`
SourceFileID *int64 `json:"source_file_id,omitempty"`
}
// DocumentUpdateRule represents update rules for datasetsDocuments
type DocumentUpdateRule struct {
// Whether the online webpage is automatically updated.
// Values include:
// 0: Do not automatically update
// 1: Automatically update
UpdateType DocumentUpdateType `json:"update_type"`
// The frequency of automatic updates for online webpages, in hours.
// Minimum value is 24.
UpdateInterval int `json:"update_interval"`
}
// DocumentFormatType represents the format type of a document
type DocumentFormatType int
const (
// Document type, such as txt, pdf, online web pages, etc.
DocumentFormatTypeDocument DocumentFormatType = 0
// Spreadsheet type, such as xls spreadsheets, etc.
DocumentFormatTypeSpreadsheet DocumentFormatType = 1
// Images type, such as png images, etc.
DocumentFormatTypeImage DocumentFormatType = 2
)
// DocumentSourceType represents the source type of a document
type DocumentSourceType int
const (
// Upload local files.
DocumentSourceTypeLocalFile DocumentSourceType = 0
// Upload online web pages.
DocumentSourceTypeOnlineWeb DocumentSourceType = 1
)
// DocumentStatus represents the status of a document
type DocumentStatus int
const (
// Processing
DocumentStatusProcessing DocumentStatus = 0
// Completed
DocumentStatusCompleted DocumentStatus = 1
// Processing failed, it is recommended to re-upload
DocumentStatusFailed DocumentStatus = 9
)
// DocumentUpdateType represents the update type of a document
type DocumentUpdateType int
const (
// Do not automatically update
DocumentUpdateTypeNoAutoUpdate DocumentUpdateType = 0
// Automatically update
DocumentUpdateTypeAutoUpdate DocumentUpdateType = 1
)
// CreateDatasetsDocumentsReq represents request for creating document
type CreateDatasetsDocumentsReq struct {
// The ID of the knowledge base.
DatasetID int64 `json:"dataset_id"`
// The metadata information of the files awaiting upload. The array has a maximum length of 10,
// meaning up to 10 files can be uploaded at a time. For detailed instructions, refer to the
// DocumentBase object.
DocumentBases []*DocumentBase `json:"document_bases"`
// Chunk strategy. These rules must be set only when uploading a file to new knowledge for the
// first time. For subsequent file uploads to this knowledge, it is not necessary to pass these
// rules; the default is to continue using the initial settings, and modifications are not
// supported. For detailed instructions, refer to the ChunkStrategy object.
ChunkStrategy *DocumentChunkStrategy `json:"chunk_strategy,omitempty"`
// The type of file format. Values include:
// 0: Document type, such as txt, pdf, online web pages, etc.
// 2: Images type, such as png images, etc.
FormatType DocumentFormatType `json:"format_type"`
}
// DeleteDatasetsDocumentsReq represents request for deleting datasetsDocuments
type DeleteDatasetsDocumentsReq struct {
DocumentIDs []int64 `json:"document_ids"`
}
// ListDatasetsDocumentsReq represents request for listing datasetsDocuments
type ListDatasetsDocumentsReq struct {
// The ID of the knowledge base.
DatasetID int64 `json:"dataset_id"`
// The page number for paginated queries. Default is 1, meaning the data return starts from the
// first page.
Page int `json:"page,omitempty"`
// The size of pagination. Default is 10, meaning that 10 data entries are returned per page.
Size int `json:"size,omitempty"`
}
// UpdateDatasetsDocumentsReq represents request for updating document
type UpdateDatasetsDocumentsReq struct {
// The ID of the knowledge base file.
DocumentID int64 `json:"document_id"`
// The new name of the knowledge base file.
DocumentName string `json:"document_name,omitempty"`
// The update strategy for online web pages. Defaults to no automatic updates.
// For detailed information, refer to the UpdateRule object.
UpdateRule *DocumentUpdateRule `json:"update_rule,omitempty"`
}
// createDatasetsDocumentsResp represents response for creating document
type createDatasetsDocumentsResp struct {
baseResponse
*CreateDatasetsDocumentsResp
}
// CreateDatasetsDocumentsResp represents response for creating document
type CreateDatasetsDocumentsResp struct {
baseModel
DocumentInfos []*Document `json:"document_infos"`
}
// listDatasetsDocumentsResp represents response for listing datasetsDocuments
type listDatasetsDocumentsResp struct {
baseResponse
*ListDatasetsDocumentsResp
}
// ListDatasetsDocumentsResp represents response for listing datasetsDocuments
type ListDatasetsDocumentsResp struct {
baseModel
Total int64 `json:"total"`
DocumentInfos []*Document `json:"document_infos"`
}
// deleteDatasetsDocumentsResp represents response for deleting datasetsDocuments
type deleteDatasetsDocumentsResp struct {
baseResponse
}
// DeleteDatasetsDocumentsResp represents response for deleting datasetsDocuments
type DeleteDatasetsDocumentsResp struct {
baseModel
}
// updateDatasetsDocumentsResp represents response for updating document
type updateDatasetsDocumentsResp struct {
baseResponse
}
// UpdateDatasetsDocumentsResp represents response for updating document
type UpdateDatasetsDocumentsResp struct {
baseModel
}
// DocumentBaseBuildWebPage creates basic document information for webpage type
func DocumentBaseBuildWebPage(name string, url string, interval *int) *DocumentBase {
updateRule := DocumentUpdateRuleBuildNoAuto()
if interval != nil {
updateRule = DocumentUpdateRuleBuildAutoUpdate(*interval)
}
return &DocumentBase{
Name: name,
SourceInfo: DocumentSourceInfoBuildWebPage(url),
UpdateRule: updateRule,
}
}
// DocumentBaseBuildLocalFile creates basic document information for local file type
func DocumentBaseBuildLocalFile(name string, content string, fileType string) *DocumentBase {
return &DocumentBase{
Name: name,
SourceInfo: DocumentSourceInfoBuildLocalFile(content, fileType),
}
}
// DocumentBaseBuildImage creates basic document information for image type
func DocumentBaseBuildImage(name string, fileID int64) *DocumentBase {
return &DocumentBase{
Name: name,
SourceInfo: DocumentSourceInfoBuildImage(fileID),
}
}
// DocumentSourceInfoBuildWebPage creates document source information for webpage type
func DocumentSourceInfoBuildWebPage(url string) *DocumentSourceInfo {
return &DocumentSourceInfo{
WebUrl: &url,
DocumentSource: ptr(1),
}
}
// DocumentSourceInfoBuildImage creates document source information for image type
func DocumentSourceInfoBuildImage(fileID int64) *DocumentSourceInfo {
return &DocumentSourceInfo{
SourceFileID: &fileID,
DocumentSource: ptr(5),
}
}
// DocumentSourceInfoBuildLocalFile creates document source information for local file type
func DocumentSourceInfoBuildLocalFile(content string, fileType string) *DocumentSourceInfo {
encodedContent := base64.StdEncoding.EncodeToString([]byte(content))
return &DocumentSourceInfo{
FileBase64: &encodedContent,
FileType: &fileType,
}
}
// DocumentUpdateRuleBuildNoAuto creates a rule for no automatic updates
func DocumentUpdateRuleBuildNoAuto() *DocumentUpdateRule {
return &DocumentUpdateRule{
UpdateType: DocumentUpdateTypeNoAutoUpdate,
}
}
// DocumentUpdateRuleBuildAutoUpdate creates a rule for automatic updates with specified interval
func DocumentUpdateRuleBuildAutoUpdate(interval int) *DocumentUpdateRule {
return &DocumentUpdateRule{
UpdateType: DocumentUpdateTypeAutoUpdate,
UpdateInterval: interval,
}
}