Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/include/mol_descriptors.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
#pragma once
#include "common.hpp"
#include "duckdb/main/connection.hpp"

namespace duckdb_rdkit {
void RegisterDescriptorFunctions(DatabaseInstance &instance);
void mol_registration_hash(DataChunk &args, ExpressionState &state, Vector &result);
}
27 changes: 27 additions & 0 deletions src/mol_descriptors.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
#include "qed.hpp"
#include "types.hpp"
#include "umbra_mol.hpp"
#include <GraphMol/MolHash/nmmolhash.h>
#include <sstream>

namespace duckdb_rdkit {

Expand Down Expand Up @@ -131,6 +133,26 @@
});
}

void mol_registration_hash(DataChunk &args, ExpressionState &state, Vector &result) {
D_ASSERT(args.data.size() == 1);
auto &binary_umbra_mol = args.data[0];
auto count = args.size();

UnaryExecutor::Execute<string_t, string_t>(
binary_umbra_mol, result, count, [&](string_t b_umbra_mol) {
auto umbra_mol = umbra_mol_t(b_umbra_mol);
auto bmol = umbra_mol.GetBinaryMol();
auto mol = rdkit_binary_mol_to_mol(bmol);

std::stringstream hash_stream;
hash_stream << "tautomer_smiles:" << RDKit::MolHash::MolHash(mol, RDKit::MolHash::HashFunction::HetAtomTautomer) << "\n";

Check failure on line 148 in src/mol_descriptors.cpp

View workflow job for this annotation

GitHub Actions / Build extension binaries / Linux (linux_amd64, ubuntu:18.04, x64-linux)

cannot convert 'std::unique_ptr<RDKit::ROMol>' to 'RDKit::RWMol*'

Check failure on line 148 in src/mol_descriptors.cpp

View workflow job for this annotation

GitHub Actions / Build extension binaries / Linux (linux_arm64, ubuntu:18.04, arm64-linux)

cannot convert 'std::unique_ptr<RDKit::ROMol>' to 'RDKit::RWMol*'
hash_stream << "canonical_smiles:" << RDKit::MolHash::MolHash(mol, RDKit::MolHash::HashFunction::CanonicalSmiles) << "\n";

Check failure on line 149 in src/mol_descriptors.cpp

View workflow job for this annotation

GitHub Actions / Build extension binaries / Linux (linux_amd64, ubuntu:18.04, x64-linux)

cannot convert 'std::unique_ptr<RDKit::ROMol>' to 'RDKit::RWMol*'

Check failure on line 149 in src/mol_descriptors.cpp

View workflow job for this annotation

GitHub Actions / Build extension binaries / Linux (linux_arm64, ubuntu:18.04, arm64-linux)

cannot convert 'std::unique_ptr<RDKit::ROMol>' to 'RDKit::RWMol*'
hash_stream << "mol_formula:" << RDKit::MolHash::MolHash(mol, RDKit::MolHash::HashFunction::MolFormula);

Check failure on line 150 in src/mol_descriptors.cpp

View workflow job for this annotation

GitHub Actions / Build extension binaries / Linux (linux_amd64, ubuntu:18.04, x64-linux)

cannot convert 'std::unique_ptr<RDKit::ROMol>' to 'RDKit::RWMol*'

Check failure on line 150 in src/mol_descriptors.cpp

View workflow job for this annotation

GitHub Actions / Build extension binaries / Linux (linux_arm64, ubuntu:18.04, arm64-linux)

cannot convert 'std::unique_ptr<RDKit::ROMol>' to 'RDKit::RWMol*'

return StringVector::AddString(result, hash_stream.str());
});
}

void RegisterDescriptorFunctions(DatabaseInstance &instance) {
ScalarFunctionSet set_mol_amw("mol_amw");
set_mol_amw.AddFunction(
Expand Down Expand Up @@ -171,5 +193,10 @@
set_mol_num_rotatable_bonds.AddFunction(
ScalarFunction({duckdb_rdkit::Mol()}, LogicalType::INTEGER, mol_num_rotatable_bonds));
ExtensionUtil::RegisterFunction(instance, set_mol_num_rotatable_bonds);

ScalarFunctionSet set_mol_registration_hash("mol_registration_hash");
set_mol_registration_hash.AddFunction(
ScalarFunction({duckdb_rdkit::Mol()}, LogicalType::VARCHAR, mol_registration_hash));
ExtensionUtil::RegisterFunction(instance, set_mol_registration_hash);
}
} // namespace duckdb_rdkit
27 changes: 27 additions & 0 deletions test/sql/mol_registration_hash.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Require statement will ensure this test is run with this extension loaded
require duckdb_rdkit

statement ok
CREATE TABLE molecules (m Mol, registration_hash VARCHAR);

statement ok
INSERT INTO molecules VALUES
(mol_from_smiles('CCO'), null),
(mol_from_smiles('C1=CC=CC=C1'), null);

statement ok
UPDATE molecules SET registration_hash=mol_registration_hash(m);

query I
SELECT registration_hash FROM molecules WHERE m = mol_from_smiles('CCO');
----
tautomer_smiles:CCO
canonical_smiles:CCO
mol_formula:C2H6O

query I
SELECT registration_hash FROM molecules WHERE m = mol_from_smiles('C1=CC=CC=C1');
----
tautomer_smiles:c1ccccc1
canonical_smiles:c1ccccc1
mol_formula:C6H6
Loading