diff --git a/core/block/import/html/converter.go b/core/block/import/html/converter.go index 641bd4b7d2..5ad5a6fd3e 100644 --- a/core/block/import/html/converter.go +++ b/core/block/import/html/converter.go @@ -189,7 +189,7 @@ func (h *HTML) updateFilesInLinks(block *model.Block, filesSource source.Source, if newFileName, createFileBlock, err = common.ProvideFileName(mark.Param, filesSource, path, h.tempDirProvider); err == nil { mark.Param = newFileName if createFileBlock { - anymark.ConvertTextToFile(block) + block.Content = anymark.ConvertTextToFile(mark.Param) break } continue diff --git a/core/block/import/markdown/anymark/anyblocks.go b/core/block/import/markdown/anymark/anyblocks.go index 7fe6c06265..eeea01d640 100644 --- a/core/block/import/markdown/anymark/anyblocks.go +++ b/core/block/import/markdown/anymark/anyblocks.go @@ -92,10 +92,10 @@ func provideCodeBlock(textArr []string, language string, id string) *model.Block } } -func ConvertTextToFile(block *model.Block) { +func ConvertTextToFile(filePath string) *model.BlockContentOfFile { // "svg" excluded - if block.GetText().GetMarks().Marks[0].Param == "" { - return + if filePath == "" { + return nil } imageFormats := []string{"jpg", "jpeg", "png", "gif", "webp"} @@ -104,7 +104,7 @@ func ConvertTextToFile(block *model.Block) { pdfFormat := "pdf" fileType := model.BlockContentFile_File - fileExt := filepath.Ext(block.GetText().GetMarks().Marks[0].Param) + fileExt := filepath.Ext(filePath) if fileExt != "" { fileExt = fileExt[1:] for _, ext := range imageFormats { @@ -131,14 +131,13 @@ func ConvertTextToFile(block *model.Block) { if strings.EqualFold(fileExt, pdfFormat) { fileType = model.BlockContentFile_PDF } - - block.Content = &model.BlockContentOfFile{ - File: &model.BlockContentFile{ - Name: block.GetText().GetMarks().Marks[0].Param, - State: model.BlockContentFile_Empty, - Type: fileType, - }, - } + } + return &model.BlockContentOfFile{ + File: &model.BlockContentFile{ + Name: filePath, + State: model.BlockContentFile_Empty, + Type: fileType, + }, } } diff --git a/core/block/import/markdown/blockconverter.go b/core/block/import/markdown/blockconverter.go index 9bbe405b6c..388c217357 100644 --- a/core/block/import/markdown/blockconverter.go +++ b/core/block/import/markdown/blockconverter.go @@ -98,35 +98,73 @@ func (m *mdConverter) processBlocks(shortPath string, file *FileInfo, files map[ func (m *mdConverter) processTextBlock(block *model.Block, files map[string]*FileInfo) { txt := block.GetText() - if txt != nil && txt.Marks != nil && len(txt.Marks.Marks) == 1 && - txt.Marks.Marks[0].Type == model.BlockContentTextMark_Link { - link := txt.Marks.Marks[0].Param - wholeLineLink := m.isWholeLineLink(txt) - ext := filepath.Ext(link) - - // todo: bug with multiple markup links in arow when the first is external - if file := files[link]; file != nil { - if strings.EqualFold(ext, ".csv") { - m.processCSVFileLink(block, files, link, wholeLineLink) + if txt != nil && txt.Marks != nil { + if len(txt.Marks.Marks) == 1 && txt.Marks.Marks[0].Type == model.BlockContentTextMark_Link { + m.handleSingleMark(block, files) + } else { + m.handleMultipleMarks(block, files) + } + } +} + +func (m *mdConverter) handleSingleMark(block *model.Block, files map[string]*FileInfo) { + txt := block.GetText() + link := txt.Marks.Marks[0].Param + wholeLineLink := m.isWholeLineLink(txt.Text, txt.Marks.Marks[0]) + ext := filepath.Ext(link) + // todo: bug with multiple markup links in arow when the first is external + if file := files[link]; file != nil { + if strings.EqualFold(ext, ".csv") { + m.processCSVFileLink(block, files, link, wholeLineLink) + return + } + if strings.EqualFold(ext, ".md") { + // only convert if this is the only link in the row + m.convertToAnytypeLinkBlock(block, wholeLineLink) + } else { + block.Content = anymark.ConvertTextToFile(txt.Marks.Marks[0].Param) + } + file.HasInboundLinks = true + } else if wholeLineLink { + block.Content = m.convertTextToBookmark(txt.Marks.Marks[0].Param) + } +} + +func (m *mdConverter) handleMultipleMarks(block *model.Block, files map[string]*FileInfo) { + txt := block.GetText() + for _, mark := range txt.Marks.Marks { + if mark.Type == model.BlockContentTextMark_Link { + if stop := m.handleSingleLinkMark(block, files, mark, txt); stop { return } - if strings.EqualFold(ext, ".md") { - // only convert if this is the only link in the row - m.convertToAnytypeLinkBlock(block, wholeLineLink) - } else { - anymark.ConvertTextToFile(block) - } - file.HasInboundLinks = true - } else if wholeLineLink { - m.convertTextToBookmark(block) } } } -func (m *mdConverter) isWholeLineLink(txt *model.BlockContentText) bool { +func (m *mdConverter) handleSingleLinkMark(block *model.Block, files map[string]*FileInfo, mark *model.BlockContentTextMark, txt *model.BlockContentText) bool { + link := mark.Param + ext := filepath.Ext(link) + if file := files[link]; file != nil { + file.HasInboundLinks = true + if strings.EqualFold(ext, ".md") || strings.EqualFold(ext, ".csv") { + mark.Type = model.BlockContentTextMark_Object + return false + } + if m.isWholeLineLink(txt.Text, mark) { + block.Content = anymark.ConvertTextToFile(mark.Param) + return true + } + } else if m.isWholeLineLink(txt.Text, mark) { + block.Content = m.convertTextToBookmark(mark.Param) + return true + } + return false +} + +func (m *mdConverter) isWholeLineLink(text string, marks *model.BlockContentTextMark) bool { var wholeLineLink bool - textRunes := []rune(txt.Text) - var from, to = int(txt.Marks.Marks[0].Range.From), int(txt.Marks.Marks[0].Range.To) + textRunes := []rune(text) + var from, to = int(marks.Range.From), int(marks.Range.To) if from == 0 || (from < len(textRunes) && len(strings.TrimSpace(string(textRunes[0:from]))) == 0) { if to >= len(textRunes) || len(strings.TrimSpace(string(textRunes[to:]))) == 0 { wholeLineLink = true @@ -201,14 +239,14 @@ func (m *mdConverter) convertTextToPageLink(block *model.Block) { } } -func (m *mdConverter) convertTextToBookmark(block *model.Block) { - if err := uri.ValidateURI(block.GetText().Marks.Marks[0].Param); err != nil { - return +func (m *mdConverter) convertTextToBookmark(url string) *model.BlockContentOfBookmark { + if err := uri.ValidateURI(url); err != nil { + return nil } - block.Content = &model.BlockContentOfBookmark{ + return &model.BlockContentOfBookmark{ Bookmark: &model.BlockContentBookmark{ - Url: block.GetText().Marks.Marks[0].Param, + Url: url, }, } } diff --git a/core/block/import/markdown/blockconverter_test.go b/core/block/import/markdown/blockconverter_test.go index 874efa0633..f5e8c1df65 100644 --- a/core/block/import/markdown/blockconverter_test.go +++ b/core/block/import/markdown/blockconverter_test.go @@ -39,7 +39,7 @@ func Test_processFiles(t *testing.T) { files := converter.processFiles(absolutePath, common.NewError(pb.RpcObjectImportRequest_IGNORE_ERRORS), source) // then - assert.Len(t, files, 3) + assert.Len(t, files, 6) pdfFilePath := filepath.Join(absolutePath, "test.pdf") assert.Contains(t, files, pdfFilePath) @@ -71,7 +71,7 @@ func Test_processFiles(t *testing.T) { files := converter.processFiles(absolutePath, common.NewError(pb.RpcObjectImportRequest_IGNORE_ERRORS), source) // then - assert.Len(t, files, 1) + assert.Len(t, files, 4) pdfFilePath := filepath.Join(absolutePath, "test.pdf") assert.NotContains(t, files, pdfFilePath) diff --git a/core/block/import/markdown/import.go b/core/block/import/markdown/import.go index bc5e0386f7..45e3c48483 100644 --- a/core/block/import/markdown/import.go +++ b/core/block/import/markdown/import.go @@ -140,8 +140,8 @@ func (m *Markdown) getSnapshotsAndRootObjectsIds( m.processImportStep(pathsCount, files, progress, allErrors, details, m.setNewID) || m.processImportStep(pathsCount, files, progress, allErrors, details, m.addLinkToObjectBlocks) || m.processImportStep(pathsCount, files, progress, allErrors, details, m.linkPagesWithRootFile) || - m.processImportStep(pathsCount, files, progress, allErrors, details, m.fillEmptyBlocks) || m.processImportStep(pathsCount, files, progress, allErrors, details, m.addLinkBlocks) || + m.processImportStep(pathsCount, files, progress, allErrors, details, m.fillEmptyBlocks) || m.processImportStep(pathsCount, files, progress, allErrors, details, m.addChildBlocks) { return nil, nil } diff --git a/core/block/import/markdown/import_test.go b/core/block/import/markdown/import_test.go index 62381f2ecc..b44fa3eadc 100644 --- a/core/block/import/markdown/import_test.go +++ b/core/block/import/markdown/import_test.go @@ -8,6 +8,7 @@ import ( "github.com/stretchr/testify/assert" + "github.com/anyproto/anytype-heart/core/block/import/common" "github.com/anyproto/anytype-heart/core/block/process" "github.com/anyproto/anytype-heart/pb" "github.com/anyproto/anytype-heart/pkg/lib/pb/model" @@ -75,6 +76,89 @@ func TestMarkdown_GetSnapshots(t *testing.T) { assert.Nil(t, sn) assert.True(t, err.IsNoObjectToImportError(1)) }) + t.Run("import file with links", func(t *testing.T) { + // given + converter := newMDConverter(&MockTempDir{}) + h := &Markdown{blockConverter: converter} + p := process.NewProgress(pb.ModelProcess_Import) + + // when + sn, err := h.GetSnapshots(context.Background(), &pb.RpcObjectImportRequest{ + Params: &pb.RpcObjectImportRequestParamsOfMarkdownParams{ + MarkdownParams: &pb.RpcObjectImportRequestMarkdownParams{Path: []string{"testdata"}}, + }, + Type: model.Import_Markdown, + Mode: pb.RpcObjectImportRequest_IGNORE_ERRORS, + }, p) + + // then + assert.Nil(t, err) + assert.NotNil(t, sn) + assert.Len(t, sn.Snapshots, 4) + + var found bool + for _, snapshot := range sn.Snapshots { + if snapshot.FileName == "testdata/links.md" { + found = true + assert.Len(t, snapshot.Snapshot.Data.Blocks, 14) + assertLinkBlocks(t, snapshot) + } + } + assert.True(t, found) + }) +} + +func assertLinkBlocks(t *testing.T, snapshot *common.Snapshot) { + assert.Equal(t, "File does not exist test1", snapshot.Snapshot.Data.Blocks[0].GetText().GetText()) + assert.Len(t, snapshot.Snapshot.Data.Blocks[0].GetText().GetMarks().GetMarks(), 1) + assert.Equal(t, model.BlockContentTextMark_Link, snapshot.Snapshot.Data.Blocks[0].GetText().GetMarks().GetMarks()[0].GetType()) + + assert.Equal(t, snapshot.Snapshot.Data.Blocks[1].GetText().GetText(), "Test link to page test2") + assert.Len(t, snapshot.Snapshot.Data.Blocks[1].GetText().GetMarks().GetMarks(), 1) + assert.Equal(t, model.BlockContentTextMark_Mention, snapshot.Snapshot.Data.Blocks[1].GetText().GetMarks().GetMarks()[0].GetType()) + + assert.NotNil(t, snapshot.Snapshot.Data.Blocks[2].GetFile()) + assert.Contains(t, snapshot.Snapshot.Data.Blocks[2].GetFile().GetName(), "test.txt") + + assert.Equal(t, snapshot.Snapshot.Data.Blocks[3].GetText().GetText(), "Test link to csv test4") + assert.Len(t, snapshot.Snapshot.Data.Blocks[3].GetText().GetMarks().GetMarks(), 1) + assert.Equal(t, model.BlockContentTextMark_Mention, snapshot.Snapshot.Data.Blocks[3].GetText().GetMarks().GetMarks()[0].GetType()) + + assert.Equal(t, snapshot.Snapshot.Data.Blocks[4].GetText().GetText(), "File does not exist with bold mark test1") + assert.Len(t, snapshot.Snapshot.Data.Blocks[4].GetText().GetMarks().GetMarks(), 2) + assert.Equal(t, model.BlockContentTextMark_Link, snapshot.Snapshot.Data.Blocks[4].GetText().GetMarks().GetMarks()[0].GetType()) + assert.Equal(t, model.BlockContentTextMark_Bold, snapshot.Snapshot.Data.Blocks[4].GetText().GetMarks().GetMarks()[1].GetType()) + + assert.Equal(t, snapshot.Snapshot.Data.Blocks[5].GetText().GetText(), "Test link to page with bold mark test2") + assert.Len(t, snapshot.Snapshot.Data.Blocks[5].GetText().GetMarks().GetMarks(), 2) + assert.Equal(t, model.BlockContentTextMark_Object, snapshot.Snapshot.Data.Blocks[5].GetText().GetMarks().GetMarks()[0].GetType()) + assert.Equal(t, model.BlockContentTextMark_Bold, snapshot.Snapshot.Data.Blocks[5].GetText().GetMarks().GetMarks()[1].GetType()) + + assert.Equal(t, snapshot.Snapshot.Data.Blocks[6].GetText().GetText(), "Test file block with bold mark test3") + assert.Len(t, snapshot.Snapshot.Data.Blocks[6].GetText().GetMarks().GetMarks(), 2) + assert.Equal(t, model.BlockContentTextMark_Link, snapshot.Snapshot.Data.Blocks[6].GetText().GetMarks().GetMarks()[0].GetType()) + assert.Equal(t, model.BlockContentTextMark_Bold, snapshot.Snapshot.Data.Blocks[6].GetText().GetMarks().GetMarks()[1].GetType()) + + assert.Equal(t, snapshot.Snapshot.Data.Blocks[7].GetText().GetText(), "Test link to csv with bold mark test4") + assert.Len(t, snapshot.Snapshot.Data.Blocks[7].GetText().GetMarks().GetMarks(), 2) + assert.Equal(t, model.BlockContentTextMark_Object, snapshot.Snapshot.Data.Blocks[7].GetText().GetMarks().GetMarks()[0].GetType()) + assert.Equal(t, model.BlockContentTextMark_Bold, snapshot.Snapshot.Data.Blocks[7].GetText().GetMarks().GetMarks()[1].GetType()) + + assert.NotNil(t, snapshot.Snapshot.Data.Blocks[8].GetBookmark()) + assert.Equal(t, "testdata/file.md", snapshot.Snapshot.Data.Blocks[8].GetBookmark().GetUrl()) + + assert.Equal(t, snapshot.Snapshot.Data.Blocks[9].GetText().GetText(), "test2") + assert.Len(t, snapshot.Snapshot.Data.Blocks[9].GetText().GetMarks().GetMarks(), 2) + assert.Equal(t, model.BlockContentTextMark_Object, snapshot.Snapshot.Data.Blocks[9].GetText().GetMarks().GetMarks()[0].GetType()) + assert.Equal(t, model.BlockContentTextMark_Bold, snapshot.Snapshot.Data.Blocks[9].GetText().GetMarks().GetMarks()[1].GetType()) + + assert.NotNil(t, snapshot.Snapshot.Data.Blocks[10].GetFile()) + assert.Contains(t, snapshot.Snapshot.Data.Blocks[10].GetFile().GetName(), "test.txt") + + assert.Equal(t, snapshot.Snapshot.Data.Blocks[11].GetText().GetText(), "test4") + assert.Len(t, snapshot.Snapshot.Data.Blocks[11].GetText().GetMarks().GetMarks(), 2) + assert.Equal(t, model.BlockContentTextMark_Object, snapshot.Snapshot.Data.Blocks[11].GetText().GetMarks().GetMarks()[0].GetType()) + assert.Equal(t, model.BlockContentTextMark_Bold, snapshot.Snapshot.Data.Blocks[11].GetText().GetMarks().GetMarks()[1].GetType()) } func setupTestDirectory(t *testing.T) string { diff --git a/core/block/import/markdown/testdata/links.md b/core/block/import/markdown/testdata/links.md new file mode 100644 index 0000000000..4de86546ea --- /dev/null +++ b/core/block/import/markdown/testdata/links.md @@ -0,0 +1,23 @@ +File does not exist [test1](file.md) + +Test link to page [test2](test.md) + +Test file block [test3](test.txt) + +Test link to csv [test4](test.csv) + +File does not exist with bold mark **[test1](file.md)** + +Test link to page with bold mark **[test2](test.md)** + +Test file block with bold mark **[test3](test.txt)** + +Test link to csv with bold mark **[test4](test.csv)** + +**[test1](file.md)** + +**[test2](test.md)** + +**[test3](test.txt)** + +**[test4](test.csv)** diff --git a/core/block/import/markdown/testdata/test.csv b/core/block/import/markdown/testdata/test.csv new file mode 100644 index 0000000000..e69de29bb2 diff --git a/core/block/import/markdown/testdata/test.txt b/core/block/import/markdown/testdata/test.txt new file mode 100644 index 0000000000..e69de29bb2